Commit feaf222925cdfbc841a695fd30df8c6d0a694146
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 fixes from Ted Ts'o: "Ext4 bug fixes. We also reserved code points for encryption and read-only images (for which the implementation is mostly just the reserved code point for a read-only feature :-)" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: fix indirect punch hole corruption ext4: ignore journal checksum on remount; don't fail ext4: remove duplicate remount check for JOURNAL_CHECKSUM change ext4: fix mmap data corruption in nodelalloc mode when blocksize < pagesize ext4: support read-only images ext4: change to use setup_timer() instead of init_timer() ext4: reserve codepoints used by the ext4 encryption feature jbd2: complain about descriptor block checksum errors
Showing 5 changed files Inline Diff
fs/ext4/ext4.h
1 | /* | 1 | /* |
2 | * ext4.h | 2 | * ext4.h |
3 | * | 3 | * |
4 | * Copyright (C) 1992, 1993, 1994, 1995 | 4 | * Copyright (C) 1992, 1993, 1994, 1995 |
5 | * Remy Card (card@masi.ibp.fr) | 5 | * Remy Card (card@masi.ibp.fr) |
6 | * Laboratoire MASI - Institut Blaise Pascal | 6 | * Laboratoire MASI - Institut Blaise Pascal |
7 | * Universite Pierre et Marie Curie (Paris VI) | 7 | * Universite Pierre et Marie Curie (Paris VI) |
8 | * | 8 | * |
9 | * from | 9 | * from |
10 | * | 10 | * |
11 | * linux/include/linux/minix_fs.h | 11 | * linux/include/linux/minix_fs.h |
12 | * | 12 | * |
13 | * Copyright (C) 1991, 1992 Linus Torvalds | 13 | * Copyright (C) 1991, 1992 Linus Torvalds |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #ifndef _EXT4_H | 16 | #ifndef _EXT4_H |
17 | #define _EXT4_H | 17 | #define _EXT4_H |
18 | 18 | ||
19 | #include <linux/types.h> | 19 | #include <linux/types.h> |
20 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
21 | #include <linux/magic.h> | 21 | #include <linux/magic.h> |
22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
23 | #include <linux/quota.h> | 23 | #include <linux/quota.h> |
24 | #include <linux/rwsem.h> | 24 | #include <linux/rwsem.h> |
25 | #include <linux/rbtree.h> | 25 | #include <linux/rbtree.h> |
26 | #include <linux/seqlock.h> | 26 | #include <linux/seqlock.h> |
27 | #include <linux/mutex.h> | 27 | #include <linux/mutex.h> |
28 | #include <linux/timer.h> | 28 | #include <linux/timer.h> |
29 | #include <linux/wait.h> | 29 | #include <linux/wait.h> |
30 | #include <linux/blockgroup_lock.h> | 30 | #include <linux/blockgroup_lock.h> |
31 | #include <linux/percpu_counter.h> | 31 | #include <linux/percpu_counter.h> |
32 | #include <linux/ratelimit.h> | 32 | #include <linux/ratelimit.h> |
33 | #include <crypto/hash.h> | 33 | #include <crypto/hash.h> |
34 | #include <linux/falloc.h> | 34 | #include <linux/falloc.h> |
35 | #ifdef __KERNEL__ | 35 | #ifdef __KERNEL__ |
36 | #include <linux/compat.h> | 36 | #include <linux/compat.h> |
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * The fourth extended filesystem constants/structures | 40 | * The fourth extended filesystem constants/structures |
41 | */ | 41 | */ |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Define EXT4FS_DEBUG to produce debug messages | 44 | * Define EXT4FS_DEBUG to produce debug messages |
45 | */ | 45 | */ |
46 | #undef EXT4FS_DEBUG | 46 | #undef EXT4FS_DEBUG |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * Debug code | 49 | * Debug code |
50 | */ | 50 | */ |
51 | #ifdef EXT4FS_DEBUG | 51 | #ifdef EXT4FS_DEBUG |
52 | #define ext4_debug(f, a...) \ | 52 | #define ext4_debug(f, a...) \ |
53 | do { \ | 53 | do { \ |
54 | printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ | 54 | printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ |
55 | __FILE__, __LINE__, __func__); \ | 55 | __FILE__, __LINE__, __func__); \ |
56 | printk(KERN_DEBUG f, ## a); \ | 56 | printk(KERN_DEBUG f, ## a); \ |
57 | } while (0) | 57 | } while (0) |
58 | #else | 58 | #else |
59 | #define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) | 59 | #define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
60 | #endif | 60 | #endif |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * Turn on EXT_DEBUG to get lots of info about extents operations. | 63 | * Turn on EXT_DEBUG to get lots of info about extents operations. |
64 | */ | 64 | */ |
65 | #define EXT_DEBUG__ | 65 | #define EXT_DEBUG__ |
66 | #ifdef EXT_DEBUG | 66 | #ifdef EXT_DEBUG |
67 | #define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__) | 67 | #define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__) |
68 | #else | 68 | #else |
69 | #define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) | 69 | #define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
70 | #endif | 70 | #endif |
71 | 71 | ||
72 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ | 72 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ |
73 | ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a) | 73 | ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a) |
74 | 74 | ||
75 | #define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ | 75 | #define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ |
76 | ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) | 76 | ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) |
77 | 77 | ||
78 | #define EXT4_ERROR_FILE(file, block, fmt, a...) \ | 78 | #define EXT4_ERROR_FILE(file, block, fmt, a...) \ |
79 | ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) | 79 | ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a) |
80 | 80 | ||
81 | /* data type for block offset of block group */ | 81 | /* data type for block offset of block group */ |
82 | typedef int ext4_grpblk_t; | 82 | typedef int ext4_grpblk_t; |
83 | 83 | ||
84 | /* data type for filesystem-wide blocks number */ | 84 | /* data type for filesystem-wide blocks number */ |
85 | typedef unsigned long long ext4_fsblk_t; | 85 | typedef unsigned long long ext4_fsblk_t; |
86 | 86 | ||
87 | /* data type for file logical block number */ | 87 | /* data type for file logical block number */ |
88 | typedef __u32 ext4_lblk_t; | 88 | typedef __u32 ext4_lblk_t; |
89 | 89 | ||
90 | /* data type for block group number */ | 90 | /* data type for block group number */ |
91 | typedef unsigned int ext4_group_t; | 91 | typedef unsigned int ext4_group_t; |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Flags used in mballoc's allocation_context flags field. | 94 | * Flags used in mballoc's allocation_context flags field. |
95 | * | 95 | * |
96 | * Also used to show what's going on for debugging purposes when the | 96 | * Also used to show what's going on for debugging purposes when the |
97 | * flag field is exported via the traceport interface | 97 | * flag field is exported via the traceport interface |
98 | */ | 98 | */ |
99 | 99 | ||
100 | /* prefer goal again. length */ | 100 | /* prefer goal again. length */ |
101 | #define EXT4_MB_HINT_MERGE 0x0001 | 101 | #define EXT4_MB_HINT_MERGE 0x0001 |
102 | /* blocks already reserved */ | 102 | /* blocks already reserved */ |
103 | #define EXT4_MB_HINT_RESERVED 0x0002 | 103 | #define EXT4_MB_HINT_RESERVED 0x0002 |
104 | /* metadata is being allocated */ | 104 | /* metadata is being allocated */ |
105 | #define EXT4_MB_HINT_METADATA 0x0004 | 105 | #define EXT4_MB_HINT_METADATA 0x0004 |
106 | /* first blocks in the file */ | 106 | /* first blocks in the file */ |
107 | #define EXT4_MB_HINT_FIRST 0x0008 | 107 | #define EXT4_MB_HINT_FIRST 0x0008 |
108 | /* search for the best chunk */ | 108 | /* search for the best chunk */ |
109 | #define EXT4_MB_HINT_BEST 0x0010 | 109 | #define EXT4_MB_HINT_BEST 0x0010 |
110 | /* data is being allocated */ | 110 | /* data is being allocated */ |
111 | #define EXT4_MB_HINT_DATA 0x0020 | 111 | #define EXT4_MB_HINT_DATA 0x0020 |
112 | /* don't preallocate (for tails) */ | 112 | /* don't preallocate (for tails) */ |
113 | #define EXT4_MB_HINT_NOPREALLOC 0x0040 | 113 | #define EXT4_MB_HINT_NOPREALLOC 0x0040 |
114 | /* allocate for locality group */ | 114 | /* allocate for locality group */ |
115 | #define EXT4_MB_HINT_GROUP_ALLOC 0x0080 | 115 | #define EXT4_MB_HINT_GROUP_ALLOC 0x0080 |
116 | /* allocate goal blocks or none */ | 116 | /* allocate goal blocks or none */ |
117 | #define EXT4_MB_HINT_GOAL_ONLY 0x0100 | 117 | #define EXT4_MB_HINT_GOAL_ONLY 0x0100 |
118 | /* goal is meaningful */ | 118 | /* goal is meaningful */ |
119 | #define EXT4_MB_HINT_TRY_GOAL 0x0200 | 119 | #define EXT4_MB_HINT_TRY_GOAL 0x0200 |
120 | /* blocks already pre-reserved by delayed allocation */ | 120 | /* blocks already pre-reserved by delayed allocation */ |
121 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 | 121 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 |
122 | /* We are doing stream allocation */ | 122 | /* We are doing stream allocation */ |
123 | #define EXT4_MB_STREAM_ALLOC 0x0800 | 123 | #define EXT4_MB_STREAM_ALLOC 0x0800 |
124 | /* Use reserved root blocks if needed */ | 124 | /* Use reserved root blocks if needed */ |
125 | #define EXT4_MB_USE_ROOT_BLOCKS 0x1000 | 125 | #define EXT4_MB_USE_ROOT_BLOCKS 0x1000 |
126 | /* Use blocks from reserved pool */ | 126 | /* Use blocks from reserved pool */ |
127 | #define EXT4_MB_USE_RESERVED 0x2000 | 127 | #define EXT4_MB_USE_RESERVED 0x2000 |
128 | 128 | ||
129 | struct ext4_allocation_request { | 129 | struct ext4_allocation_request { |
130 | /* target inode for block we're allocating */ | 130 | /* target inode for block we're allocating */ |
131 | struct inode *inode; | 131 | struct inode *inode; |
132 | /* how many blocks we want to allocate */ | 132 | /* how many blocks we want to allocate */ |
133 | unsigned int len; | 133 | unsigned int len; |
134 | /* logical block in target inode */ | 134 | /* logical block in target inode */ |
135 | ext4_lblk_t logical; | 135 | ext4_lblk_t logical; |
136 | /* the closest logical allocated block to the left */ | 136 | /* the closest logical allocated block to the left */ |
137 | ext4_lblk_t lleft; | 137 | ext4_lblk_t lleft; |
138 | /* the closest logical allocated block to the right */ | 138 | /* the closest logical allocated block to the right */ |
139 | ext4_lblk_t lright; | 139 | ext4_lblk_t lright; |
140 | /* phys. target (a hint) */ | 140 | /* phys. target (a hint) */ |
141 | ext4_fsblk_t goal; | 141 | ext4_fsblk_t goal; |
142 | /* phys. block for the closest logical allocated block to the left */ | 142 | /* phys. block for the closest logical allocated block to the left */ |
143 | ext4_fsblk_t pleft; | 143 | ext4_fsblk_t pleft; |
144 | /* phys. block for the closest logical allocated block to the right */ | 144 | /* phys. block for the closest logical allocated block to the right */ |
145 | ext4_fsblk_t pright; | 145 | ext4_fsblk_t pright; |
146 | /* flags. see above EXT4_MB_HINT_* */ | 146 | /* flags. see above EXT4_MB_HINT_* */ |
147 | unsigned int flags; | 147 | unsigned int flags; |
148 | }; | 148 | }; |
149 | 149 | ||
150 | /* | 150 | /* |
151 | * Logical to physical block mapping, used by ext4_map_blocks() | 151 | * Logical to physical block mapping, used by ext4_map_blocks() |
152 | * | 152 | * |
153 | * This structure is used to pass requests into ext4_map_blocks() as | 153 | * This structure is used to pass requests into ext4_map_blocks() as |
154 | * well as to store the information returned by ext4_map_blocks(). It | 154 | * well as to store the information returned by ext4_map_blocks(). It |
155 | * takes less room on the stack than a struct buffer_head. | 155 | * takes less room on the stack than a struct buffer_head. |
156 | */ | 156 | */ |
157 | #define EXT4_MAP_NEW (1 << BH_New) | 157 | #define EXT4_MAP_NEW (1 << BH_New) |
158 | #define EXT4_MAP_MAPPED (1 << BH_Mapped) | 158 | #define EXT4_MAP_MAPPED (1 << BH_Mapped) |
159 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) | 159 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) |
160 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) | 160 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) |
161 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ | 161 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ |
162 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) | 162 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) |
163 | 163 | ||
164 | struct ext4_map_blocks { | 164 | struct ext4_map_blocks { |
165 | ext4_fsblk_t m_pblk; | 165 | ext4_fsblk_t m_pblk; |
166 | ext4_lblk_t m_lblk; | 166 | ext4_lblk_t m_lblk; |
167 | unsigned int m_len; | 167 | unsigned int m_len; |
168 | unsigned int m_flags; | 168 | unsigned int m_flags; |
169 | }; | 169 | }; |
170 | 170 | ||
171 | /* | 171 | /* |
172 | * Flags for ext4_io_end->flags | 172 | * Flags for ext4_io_end->flags |
173 | */ | 173 | */ |
174 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 174 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
175 | 175 | ||
176 | /* | 176 | /* |
177 | * For converting unwritten extents on a work queue. 'handle' is used for | 177 | * For converting unwritten extents on a work queue. 'handle' is used for |
178 | * buffered writeback. | 178 | * buffered writeback. |
179 | */ | 179 | */ |
180 | typedef struct ext4_io_end { | 180 | typedef struct ext4_io_end { |
181 | struct list_head list; /* per-file finished IO list */ | 181 | struct list_head list; /* per-file finished IO list */ |
182 | handle_t *handle; /* handle reserved for extent | 182 | handle_t *handle; /* handle reserved for extent |
183 | * conversion */ | 183 | * conversion */ |
184 | struct inode *inode; /* file being written to */ | 184 | struct inode *inode; /* file being written to */ |
185 | struct bio *bio; /* Linked list of completed | 185 | struct bio *bio; /* Linked list of completed |
186 | * bios covering the extent */ | 186 | * bios covering the extent */ |
187 | unsigned int flag; /* unwritten or not */ | 187 | unsigned int flag; /* unwritten or not */ |
188 | loff_t offset; /* offset in the file */ | 188 | loff_t offset; /* offset in the file */ |
189 | ssize_t size; /* size of the extent */ | 189 | ssize_t size; /* size of the extent */ |
190 | atomic_t count; /* reference counter */ | 190 | atomic_t count; /* reference counter */ |
191 | } ext4_io_end_t; | 191 | } ext4_io_end_t; |
192 | 192 | ||
193 | struct ext4_io_submit { | 193 | struct ext4_io_submit { |
194 | int io_op; | 194 | int io_op; |
195 | struct bio *io_bio; | 195 | struct bio *io_bio; |
196 | ext4_io_end_t *io_end; | 196 | ext4_io_end_t *io_end; |
197 | sector_t io_next_block; | 197 | sector_t io_next_block; |
198 | }; | 198 | }; |
199 | 199 | ||
200 | /* | 200 | /* |
201 | * Special inodes numbers | 201 | * Special inodes numbers |
202 | */ | 202 | */ |
203 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ | 203 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ |
204 | #define EXT4_ROOT_INO 2 /* Root inode */ | 204 | #define EXT4_ROOT_INO 2 /* Root inode */ |
205 | #define EXT4_USR_QUOTA_INO 3 /* User quota inode */ | 205 | #define EXT4_USR_QUOTA_INO 3 /* User quota inode */ |
206 | #define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */ | 206 | #define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */ |
207 | #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ | 207 | #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ |
208 | #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ | 208 | #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ |
209 | #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ | 209 | #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ |
210 | #define EXT4_JOURNAL_INO 8 /* Journal inode */ | 210 | #define EXT4_JOURNAL_INO 8 /* Journal inode */ |
211 | 211 | ||
212 | /* First non-reserved inode for old ext4 filesystems */ | 212 | /* First non-reserved inode for old ext4 filesystems */ |
213 | #define EXT4_GOOD_OLD_FIRST_INO 11 | 213 | #define EXT4_GOOD_OLD_FIRST_INO 11 |
214 | 214 | ||
215 | /* | 215 | /* |
216 | * Maximal count of links to a file | 216 | * Maximal count of links to a file |
217 | */ | 217 | */ |
218 | #define EXT4_LINK_MAX 65000 | 218 | #define EXT4_LINK_MAX 65000 |
219 | 219 | ||
220 | /* | 220 | /* |
221 | * Macro-instructions used to manage several block sizes | 221 | * Macro-instructions used to manage several block sizes |
222 | */ | 222 | */ |
223 | #define EXT4_MIN_BLOCK_SIZE 1024 | 223 | #define EXT4_MIN_BLOCK_SIZE 1024 |
224 | #define EXT4_MAX_BLOCK_SIZE 65536 | 224 | #define EXT4_MAX_BLOCK_SIZE 65536 |
225 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 | 225 | #define EXT4_MIN_BLOCK_LOG_SIZE 10 |
226 | #define EXT4_MAX_BLOCK_LOG_SIZE 16 | 226 | #define EXT4_MAX_BLOCK_LOG_SIZE 16 |
227 | #ifdef __KERNEL__ | 227 | #ifdef __KERNEL__ |
228 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) | 228 | # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) |
229 | #else | 229 | #else |
230 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) | 230 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) |
231 | #endif | 231 | #endif |
232 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) | 232 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) |
233 | #define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \ | 233 | #define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \ |
234 | EXT4_SB(s)->s_cluster_bits) | 234 | EXT4_SB(s)->s_cluster_bits) |
235 | #ifdef __KERNEL__ | 235 | #ifdef __KERNEL__ |
236 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) | 236 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) |
237 | # define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits) | 237 | # define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits) |
238 | #else | 238 | #else |
239 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) | 239 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) |
240 | #endif | 240 | #endif |
241 | #ifdef __KERNEL__ | 241 | #ifdef __KERNEL__ |
242 | #define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits) | 242 | #define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits) |
243 | #define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size) | 243 | #define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size) |
244 | #define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino) | 244 | #define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino) |
245 | #else | 245 | #else |
246 | #define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ | 246 | #define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ |
247 | EXT4_GOOD_OLD_INODE_SIZE : \ | 247 | EXT4_GOOD_OLD_INODE_SIZE : \ |
248 | (s)->s_inode_size) | 248 | (s)->s_inode_size) |
249 | #define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ | 249 | #define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \ |
250 | EXT4_GOOD_OLD_FIRST_INO : \ | 250 | EXT4_GOOD_OLD_FIRST_INO : \ |
251 | (s)->s_first_ino) | 251 | (s)->s_first_ino) |
252 | #endif | 252 | #endif |
253 | #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) | 253 | #define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) |
254 | 254 | ||
255 | /* Translate a block number to a cluster number */ | 255 | /* Translate a block number to a cluster number */ |
256 | #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits) | 256 | #define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits) |
257 | /* Translate a cluster number to a block number */ | 257 | /* Translate a cluster number to a block number */ |
258 | #define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits) | 258 | #define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits) |
259 | /* Translate # of blks to # of clusters */ | 259 | /* Translate # of blks to # of clusters */ |
260 | #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ | 260 | #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ |
261 | (sbi)->s_cluster_bits) | 261 | (sbi)->s_cluster_bits) |
262 | /* Mask out the low bits to get the starting block of the cluster */ | 262 | /* Mask out the low bits to get the starting block of the cluster */ |
263 | #define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \ | 263 | #define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \ |
264 | ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) | 264 | ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) |
265 | #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ | 265 | #define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ |
266 | ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) | 266 | ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) |
267 | /* Get the cluster offset */ | 267 | /* Get the cluster offset */ |
268 | #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ | 268 | #define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ |
269 | ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) | 269 | ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) |
270 | #define EXT4_LBLK_COFF(s, lblk) ((lblk) & \ | 270 | #define EXT4_LBLK_COFF(s, lblk) ((lblk) & \ |
271 | ((ext4_lblk_t) (s)->s_cluster_ratio - 1)) | 271 | ((ext4_lblk_t) (s)->s_cluster_ratio - 1)) |
272 | 272 | ||
273 | /* | 273 | /* |
274 | * Structure of a blocks group descriptor | 274 | * Structure of a blocks group descriptor |
275 | */ | 275 | */ |
276 | struct ext4_group_desc | 276 | struct ext4_group_desc |
277 | { | 277 | { |
278 | __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ | 278 | __le32 bg_block_bitmap_lo; /* Blocks bitmap block */ |
279 | __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ | 279 | __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */ |
280 | __le32 bg_inode_table_lo; /* Inodes table block */ | 280 | __le32 bg_inode_table_lo; /* Inodes table block */ |
281 | __le16 bg_free_blocks_count_lo;/* Free blocks count */ | 281 | __le16 bg_free_blocks_count_lo;/* Free blocks count */ |
282 | __le16 bg_free_inodes_count_lo;/* Free inodes count */ | 282 | __le16 bg_free_inodes_count_lo;/* Free inodes count */ |
283 | __le16 bg_used_dirs_count_lo; /* Directories count */ | 283 | __le16 bg_used_dirs_count_lo; /* Directories count */ |
284 | __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ | 284 | __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ |
285 | __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ | 285 | __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ |
286 | __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */ | 286 | __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */ |
287 | __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */ | 287 | __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */ |
288 | __le16 bg_itable_unused_lo; /* Unused inodes count */ | 288 | __le16 bg_itable_unused_lo; /* Unused inodes count */ |
289 | __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ | 289 | __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ |
290 | __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ | 290 | __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ |
291 | __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ | 291 | __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ |
292 | __le32 bg_inode_table_hi; /* Inodes table block MSB */ | 292 | __le32 bg_inode_table_hi; /* Inodes table block MSB */ |
293 | __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ | 293 | __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */ |
294 | __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ | 294 | __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ |
295 | __le16 bg_used_dirs_count_hi; /* Directories count MSB */ | 295 | __le16 bg_used_dirs_count_hi; /* Directories count MSB */ |
296 | __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ | 296 | __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ |
297 | __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ | 297 | __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ |
298 | __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */ | 298 | __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */ |
299 | __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */ | 299 | __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */ |
300 | __u32 bg_reserved; | 300 | __u32 bg_reserved; |
301 | }; | 301 | }; |
302 | 302 | ||
303 | #define EXT4_BG_INODE_BITMAP_CSUM_HI_END \ | 303 | #define EXT4_BG_INODE_BITMAP_CSUM_HI_END \ |
304 | (offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \ | 304 | (offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \ |
305 | sizeof(__le16)) | 305 | sizeof(__le16)) |
306 | #define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \ | 306 | #define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \ |
307 | (offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \ | 307 | (offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \ |
308 | sizeof(__le16)) | 308 | sizeof(__le16)) |
309 | 309 | ||
310 | /* | 310 | /* |
311 | * Structure of a flex block group info | 311 | * Structure of a flex block group info |
312 | */ | 312 | */ |
313 | 313 | ||
314 | struct flex_groups { | 314 | struct flex_groups { |
315 | atomic64_t free_clusters; | 315 | atomic64_t free_clusters; |
316 | atomic_t free_inodes; | 316 | atomic_t free_inodes; |
317 | atomic_t used_dirs; | 317 | atomic_t used_dirs; |
318 | }; | 318 | }; |
319 | 319 | ||
320 | #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ | 320 | #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ |
321 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ | 321 | #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ |
322 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ | 322 | #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ |
323 | 323 | ||
324 | /* | 324 | /* |
325 | * Macro-instructions used to manage group descriptors | 325 | * Macro-instructions used to manage group descriptors |
326 | */ | 326 | */ |
327 | #define EXT4_MIN_DESC_SIZE 32 | 327 | #define EXT4_MIN_DESC_SIZE 32 |
328 | #define EXT4_MIN_DESC_SIZE_64BIT 64 | 328 | #define EXT4_MIN_DESC_SIZE_64BIT 64 |
329 | #define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE | 329 | #define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE |
330 | #define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) | 330 | #define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) |
331 | #ifdef __KERNEL__ | 331 | #ifdef __KERNEL__ |
332 | # define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) | 332 | # define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) |
333 | # define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group) | 333 | # define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group) |
334 | # define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) | 334 | # define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) |
335 | # define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) | 335 | # define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) |
336 | # define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) | 336 | # define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) |
337 | #else | 337 | #else |
338 | # define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) | 338 | # define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) |
339 | # define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) | 339 | # define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s)) |
340 | # define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) | 340 | # define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) |
341 | #endif | 341 | #endif |
342 | 342 | ||
343 | /* | 343 | /* |
344 | * Constants relative to the data blocks | 344 | * Constants relative to the data blocks |
345 | */ | 345 | */ |
346 | #define EXT4_NDIR_BLOCKS 12 | 346 | #define EXT4_NDIR_BLOCKS 12 |
347 | #define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS | 347 | #define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS |
348 | #define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) | 348 | #define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1) |
349 | #define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) | 349 | #define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1) |
350 | #define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) | 350 | #define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1) |
351 | 351 | ||
352 | /* | 352 | /* |
353 | * Inode flags | 353 | * Inode flags |
354 | */ | 354 | */ |
355 | #define EXT4_SECRM_FL 0x00000001 /* Secure deletion */ | 355 | #define EXT4_SECRM_FL 0x00000001 /* Secure deletion */ |
356 | #define EXT4_UNRM_FL 0x00000002 /* Undelete */ | 356 | #define EXT4_UNRM_FL 0x00000002 /* Undelete */ |
357 | #define EXT4_COMPR_FL 0x00000004 /* Compress file */ | 357 | #define EXT4_COMPR_FL 0x00000004 /* Compress file */ |
358 | #define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */ | 358 | #define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */ |
359 | #define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */ | 359 | #define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */ |
360 | #define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */ | 360 | #define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */ |
361 | #define EXT4_NODUMP_FL 0x00000040 /* do not dump file */ | 361 | #define EXT4_NODUMP_FL 0x00000040 /* do not dump file */ |
362 | #define EXT4_NOATIME_FL 0x00000080 /* do not update atime */ | 362 | #define EXT4_NOATIME_FL 0x00000080 /* do not update atime */ |
363 | /* Reserved for compression usage... */ | 363 | /* Reserved for compression usage... */ |
364 | #define EXT4_DIRTY_FL 0x00000100 | 364 | #define EXT4_DIRTY_FL 0x00000100 |
365 | #define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ | 365 | #define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ |
366 | #define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ | 366 | #define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */ |
367 | #define EXT4_ECOMPR_FL 0x00000800 /* Compression error */ | 367 | /* nb: was previously EXT2_ECOMPR_FL */ |
368 | #define EXT4_ENCRYPT_FL 0x00000800 /* encrypted file */ | ||
368 | /* End compression flags --- maybe not all used */ | 369 | /* End compression flags --- maybe not all used */ |
369 | #define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ | 370 | #define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */ |
370 | #define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ | 371 | #define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */ |
371 | #define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ | 372 | #define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ |
372 | #define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ | 373 | #define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ |
373 | #define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ | 374 | #define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ |
374 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ | 375 | #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ |
375 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ | 376 | #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ |
376 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ | 377 | #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ |
377 | #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ | 378 | #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ |
378 | #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ | 379 | #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ |
379 | #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ | 380 | #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ |
380 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 381 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
381 | 382 | ||
382 | #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ | 383 | #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ |
383 | #define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */ | 384 | #define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */ |
384 | 385 | ||
385 | /* Flags that should be inherited by new inodes from their parent. */ | 386 | /* Flags that should be inherited by new inodes from their parent. */ |
386 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ | 387 | #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ |
387 | EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ | 388 | EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ |
388 | EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ | 389 | EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ |
389 | EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) | 390 | EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) |
390 | 391 | ||
391 | /* Flags that are appropriate for regular files (all but dir-specific ones). */ | 392 | /* Flags that are appropriate for regular files (all but dir-specific ones). */ |
392 | #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) | 393 | #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) |
393 | 394 | ||
394 | /* Flags that are appropriate for non-directories/regular files. */ | 395 | /* Flags that are appropriate for non-directories/regular files. */ |
395 | #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) | 396 | #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) |
396 | 397 | ||
397 | /* Mask out flags that are inappropriate for the given type of inode. */ | 398 | /* Mask out flags that are inappropriate for the given type of inode. */ |
398 | static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) | 399 | static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) |
399 | { | 400 | { |
400 | if (S_ISDIR(mode)) | 401 | if (S_ISDIR(mode)) |
401 | return flags; | 402 | return flags; |
402 | else if (S_ISREG(mode)) | 403 | else if (S_ISREG(mode)) |
403 | return flags & EXT4_REG_FLMASK; | 404 | return flags & EXT4_REG_FLMASK; |
404 | else | 405 | else |
405 | return flags & EXT4_OTHER_FLMASK; | 406 | return flags & EXT4_OTHER_FLMASK; |
406 | } | 407 | } |
407 | 408 | ||
408 | /* | 409 | /* |
409 | * Inode flags used for atomic set/get | 410 | * Inode flags used for atomic set/get |
410 | */ | 411 | */ |
411 | enum { | 412 | enum { |
412 | EXT4_INODE_SECRM = 0, /* Secure deletion */ | 413 | EXT4_INODE_SECRM = 0, /* Secure deletion */ |
413 | EXT4_INODE_UNRM = 1, /* Undelete */ | 414 | EXT4_INODE_UNRM = 1, /* Undelete */ |
414 | EXT4_INODE_COMPR = 2, /* Compress file */ | 415 | EXT4_INODE_COMPR = 2, /* Compress file */ |
415 | EXT4_INODE_SYNC = 3, /* Synchronous updates */ | 416 | EXT4_INODE_SYNC = 3, /* Synchronous updates */ |
416 | EXT4_INODE_IMMUTABLE = 4, /* Immutable file */ | 417 | EXT4_INODE_IMMUTABLE = 4, /* Immutable file */ |
417 | EXT4_INODE_APPEND = 5, /* writes to file may only append */ | 418 | EXT4_INODE_APPEND = 5, /* writes to file may only append */ |
418 | EXT4_INODE_NODUMP = 6, /* do not dump file */ | 419 | EXT4_INODE_NODUMP = 6, /* do not dump file */ |
419 | EXT4_INODE_NOATIME = 7, /* do not update atime */ | 420 | EXT4_INODE_NOATIME = 7, /* do not update atime */ |
420 | /* Reserved for compression usage... */ | 421 | /* Reserved for compression usage... */ |
421 | EXT4_INODE_DIRTY = 8, | 422 | EXT4_INODE_DIRTY = 8, |
422 | EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ | 423 | EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ |
423 | EXT4_INODE_NOCOMPR = 10, /* Don't compress */ | 424 | EXT4_INODE_NOCOMPR = 10, /* Don't compress */ |
424 | EXT4_INODE_ECOMPR = 11, /* Compression error */ | 425 | EXT4_INODE_ENCRYPT = 11, /* Compression error */ |
425 | /* End compression flags --- maybe not all used */ | 426 | /* End compression flags --- maybe not all used */ |
426 | EXT4_INODE_INDEX = 12, /* hash-indexed directory */ | 427 | EXT4_INODE_INDEX = 12, /* hash-indexed directory */ |
427 | EXT4_INODE_IMAGIC = 13, /* AFS directory */ | 428 | EXT4_INODE_IMAGIC = 13, /* AFS directory */ |
428 | EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */ | 429 | EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */ |
429 | EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */ | 430 | EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */ |
430 | EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */ | 431 | EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */ |
431 | EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ | 432 | EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ |
432 | EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ | 433 | EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ |
433 | EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ | 434 | EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ |
434 | EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ | 435 | EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ |
435 | EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ | 436 | EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ |
436 | EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ | 437 | EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ |
437 | EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ | 438 | EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ |
438 | }; | 439 | }; |
439 | 440 | ||
440 | /* | 441 | /* |
441 | * Since it's pretty easy to mix up bit numbers and hex values, we use a | 442 | * Since it's pretty easy to mix up bit numbers and hex values, we use a |
442 | * build-time check to make sure that EXT4_XXX_FL is consistent with respect to | 443 | * build-time check to make sure that EXT4_XXX_FL is consistent with respect to |
443 | * EXT4_INODE_XXX. If all is well, the macros will be dropped, so, it won't cost | 444 | * EXT4_INODE_XXX. If all is well, the macros will be dropped, so, it won't cost |
444 | * any extra space in the compiled kernel image, otherwise, the build will fail. | 445 | * any extra space in the compiled kernel image, otherwise, the build will fail. |
445 | * It's important that these values are the same, since we are using | 446 | * It's important that these values are the same, since we are using |
446 | * EXT4_INODE_XXX to test for flag values, but EXT4_XXX_FL must be consistent | 447 | * EXT4_INODE_XXX to test for flag values, but EXT4_XXX_FL must be consistent |
447 | * with the values of FS_XXX_FL defined in include/linux/fs.h and the on-disk | 448 | * with the values of FS_XXX_FL defined in include/linux/fs.h and the on-disk |
448 | * values found in ext2, ext3 and ext4 filesystems, and of course the values | 449 | * values found in ext2, ext3 and ext4 filesystems, and of course the values |
449 | * defined in e2fsprogs. | 450 | * defined in e2fsprogs. |
450 | * | 451 | * |
451 | * It's not paranoia if the Murphy's Law really *is* out to get you. :-) | 452 | * It's not paranoia if the Murphy's Law really *is* out to get you. :-) |
452 | */ | 453 | */ |
453 | #define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) | 454 | #define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) |
454 | #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG)) | 455 | #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG)) |
455 | 456 | ||
456 | static inline void ext4_check_flag_values(void) | 457 | static inline void ext4_check_flag_values(void) |
457 | { | 458 | { |
458 | CHECK_FLAG_VALUE(SECRM); | 459 | CHECK_FLAG_VALUE(SECRM); |
459 | CHECK_FLAG_VALUE(UNRM); | 460 | CHECK_FLAG_VALUE(UNRM); |
460 | CHECK_FLAG_VALUE(COMPR); | 461 | CHECK_FLAG_VALUE(COMPR); |
461 | CHECK_FLAG_VALUE(SYNC); | 462 | CHECK_FLAG_VALUE(SYNC); |
462 | CHECK_FLAG_VALUE(IMMUTABLE); | 463 | CHECK_FLAG_VALUE(IMMUTABLE); |
463 | CHECK_FLAG_VALUE(APPEND); | 464 | CHECK_FLAG_VALUE(APPEND); |
464 | CHECK_FLAG_VALUE(NODUMP); | 465 | CHECK_FLAG_VALUE(NODUMP); |
465 | CHECK_FLAG_VALUE(NOATIME); | 466 | CHECK_FLAG_VALUE(NOATIME); |
466 | CHECK_FLAG_VALUE(DIRTY); | 467 | CHECK_FLAG_VALUE(DIRTY); |
467 | CHECK_FLAG_VALUE(COMPRBLK); | 468 | CHECK_FLAG_VALUE(COMPRBLK); |
468 | CHECK_FLAG_VALUE(NOCOMPR); | 469 | CHECK_FLAG_VALUE(NOCOMPR); |
469 | CHECK_FLAG_VALUE(ECOMPR); | 470 | CHECK_FLAG_VALUE(ENCRYPT); |
470 | CHECK_FLAG_VALUE(INDEX); | 471 | CHECK_FLAG_VALUE(INDEX); |
471 | CHECK_FLAG_VALUE(IMAGIC); | 472 | CHECK_FLAG_VALUE(IMAGIC); |
472 | CHECK_FLAG_VALUE(JOURNAL_DATA); | 473 | CHECK_FLAG_VALUE(JOURNAL_DATA); |
473 | CHECK_FLAG_VALUE(NOTAIL); | 474 | CHECK_FLAG_VALUE(NOTAIL); |
474 | CHECK_FLAG_VALUE(DIRSYNC); | 475 | CHECK_FLAG_VALUE(DIRSYNC); |
475 | CHECK_FLAG_VALUE(TOPDIR); | 476 | CHECK_FLAG_VALUE(TOPDIR); |
476 | CHECK_FLAG_VALUE(HUGE_FILE); | 477 | CHECK_FLAG_VALUE(HUGE_FILE); |
477 | CHECK_FLAG_VALUE(EXTENTS); | 478 | CHECK_FLAG_VALUE(EXTENTS); |
478 | CHECK_FLAG_VALUE(EA_INODE); | 479 | CHECK_FLAG_VALUE(EA_INODE); |
479 | CHECK_FLAG_VALUE(EOFBLOCKS); | 480 | CHECK_FLAG_VALUE(EOFBLOCKS); |
480 | CHECK_FLAG_VALUE(INLINE_DATA); | 481 | CHECK_FLAG_VALUE(INLINE_DATA); |
481 | CHECK_FLAG_VALUE(RESERVED); | 482 | CHECK_FLAG_VALUE(RESERVED); |
482 | } | 483 | } |
483 | 484 | ||
484 | /* Used to pass group descriptor data when online resize is done */ | 485 | /* Used to pass group descriptor data when online resize is done */ |
485 | struct ext4_new_group_input { | 486 | struct ext4_new_group_input { |
486 | __u32 group; /* Group number for this data */ | 487 | __u32 group; /* Group number for this data */ |
487 | __u64 block_bitmap; /* Absolute block number of block bitmap */ | 488 | __u64 block_bitmap; /* Absolute block number of block bitmap */ |
488 | __u64 inode_bitmap; /* Absolute block number of inode bitmap */ | 489 | __u64 inode_bitmap; /* Absolute block number of inode bitmap */ |
489 | __u64 inode_table; /* Absolute block number of inode table start */ | 490 | __u64 inode_table; /* Absolute block number of inode table start */ |
490 | __u32 blocks_count; /* Total number of blocks in this group */ | 491 | __u32 blocks_count; /* Total number of blocks in this group */ |
491 | __u16 reserved_blocks; /* Number of reserved blocks in this group */ | 492 | __u16 reserved_blocks; /* Number of reserved blocks in this group */ |
492 | __u16 unused; | 493 | __u16 unused; |
493 | }; | 494 | }; |
494 | 495 | ||
495 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 496 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
496 | struct compat_ext4_new_group_input { | 497 | struct compat_ext4_new_group_input { |
497 | u32 group; | 498 | u32 group; |
498 | compat_u64 block_bitmap; | 499 | compat_u64 block_bitmap; |
499 | compat_u64 inode_bitmap; | 500 | compat_u64 inode_bitmap; |
500 | compat_u64 inode_table; | 501 | compat_u64 inode_table; |
501 | u32 blocks_count; | 502 | u32 blocks_count; |
502 | u16 reserved_blocks; | 503 | u16 reserved_blocks; |
503 | u16 unused; | 504 | u16 unused; |
504 | }; | 505 | }; |
505 | #endif | 506 | #endif |
506 | 507 | ||
507 | /* The struct ext4_new_group_input in kernel space, with free_blocks_count */ | 508 | /* The struct ext4_new_group_input in kernel space, with free_blocks_count */ |
508 | struct ext4_new_group_data { | 509 | struct ext4_new_group_data { |
509 | __u32 group; | 510 | __u32 group; |
510 | __u64 block_bitmap; | 511 | __u64 block_bitmap; |
511 | __u64 inode_bitmap; | 512 | __u64 inode_bitmap; |
512 | __u64 inode_table; | 513 | __u64 inode_table; |
513 | __u32 blocks_count; | 514 | __u32 blocks_count; |
514 | __u16 reserved_blocks; | 515 | __u16 reserved_blocks; |
515 | __u16 unused; | 516 | __u16 unused; |
516 | __u32 free_blocks_count; | 517 | __u32 free_blocks_count; |
517 | }; | 518 | }; |
518 | 519 | ||
519 | /* Indexes used to index group tables in ext4_new_group_data */ | 520 | /* Indexes used to index group tables in ext4_new_group_data */ |
520 | enum { | 521 | enum { |
521 | BLOCK_BITMAP = 0, /* block bitmap */ | 522 | BLOCK_BITMAP = 0, /* block bitmap */ |
522 | INODE_BITMAP, /* inode bitmap */ | 523 | INODE_BITMAP, /* inode bitmap */ |
523 | INODE_TABLE, /* inode tables */ | 524 | INODE_TABLE, /* inode tables */ |
524 | GROUP_TABLE_COUNT, | 525 | GROUP_TABLE_COUNT, |
525 | }; | 526 | }; |
526 | 527 | ||
527 | /* | 528 | /* |
528 | * Flags used by ext4_map_blocks() | 529 | * Flags used by ext4_map_blocks() |
529 | */ | 530 | */ |
530 | /* Allocate any needed blocks and/or convert an unwritten | 531 | /* Allocate any needed blocks and/or convert an unwritten |
531 | extent to be an initialized ext4 */ | 532 | extent to be an initialized ext4 */ |
532 | #define EXT4_GET_BLOCKS_CREATE 0x0001 | 533 | #define EXT4_GET_BLOCKS_CREATE 0x0001 |
533 | /* Request the creation of an unwritten extent */ | 534 | /* Request the creation of an unwritten extent */ |
534 | #define EXT4_GET_BLOCKS_UNWRIT_EXT 0x0002 | 535 | #define EXT4_GET_BLOCKS_UNWRIT_EXT 0x0002 |
535 | #define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT (EXT4_GET_BLOCKS_UNWRIT_EXT|\ | 536 | #define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT (EXT4_GET_BLOCKS_UNWRIT_EXT|\ |
536 | EXT4_GET_BLOCKS_CREATE) | 537 | EXT4_GET_BLOCKS_CREATE) |
537 | /* Caller is from the delayed allocation writeout path | 538 | /* Caller is from the delayed allocation writeout path |
538 | * finally doing the actual allocation of delayed blocks */ | 539 | * finally doing the actual allocation of delayed blocks */ |
539 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 | 540 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 |
540 | /* caller is from the direct IO path, request to creation of an | 541 | /* caller is from the direct IO path, request to creation of an |
541 | unwritten extents if not allocated, split the unwritten | 542 | unwritten extents if not allocated, split the unwritten |
542 | extent if blocks has been preallocated already*/ | 543 | extent if blocks has been preallocated already*/ |
543 | #define EXT4_GET_BLOCKS_PRE_IO 0x0008 | 544 | #define EXT4_GET_BLOCKS_PRE_IO 0x0008 |
544 | #define EXT4_GET_BLOCKS_CONVERT 0x0010 | 545 | #define EXT4_GET_BLOCKS_CONVERT 0x0010 |
545 | #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ | 546 | #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ |
546 | EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) | 547 | EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) |
547 | /* Convert extent to initialized after IO complete */ | 548 | /* Convert extent to initialized after IO complete */ |
548 | #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ | 549 | #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ |
549 | EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) | 550 | EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT) |
550 | /* Eventual metadata allocation (due to growing extent tree) | 551 | /* Eventual metadata allocation (due to growing extent tree) |
551 | * should not fail, so try to use reserved blocks for that.*/ | 552 | * should not fail, so try to use reserved blocks for that.*/ |
552 | #define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020 | 553 | #define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020 |
553 | /* Don't normalize allocation size (used for fallocate) */ | 554 | /* Don't normalize allocation size (used for fallocate) */ |
554 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 | 555 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 |
555 | /* Request will not result in inode size update (user for fallocate) */ | 556 | /* Request will not result in inode size update (user for fallocate) */ |
556 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 | 557 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 |
557 | /* Do not take i_data_sem locking in ext4_map_blocks */ | 558 | /* Do not take i_data_sem locking in ext4_map_blocks */ |
558 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 | 559 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 |
559 | /* Convert written extents to unwritten */ | 560 | /* Convert written extents to unwritten */ |
560 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 | 561 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200 |
561 | 562 | ||
562 | /* | 563 | /* |
563 | * The bit position of these flags must not overlap with any of the | 564 | * The bit position of these flags must not overlap with any of the |
564 | * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(), | 565 | * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(), |
565 | * read_extent_tree_block(), ext4_split_extent_at(), | 566 | * read_extent_tree_block(), ext4_split_extent_at(), |
566 | * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). | 567 | * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). |
567 | * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be | 568 | * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be |
568 | * caching the extents when reading from the extent tree while a | 569 | * caching the extents when reading from the extent tree while a |
569 | * truncate or punch hole operation is in progress. | 570 | * truncate or punch hole operation is in progress. |
570 | */ | 571 | */ |
571 | #define EXT4_EX_NOCACHE 0x40000000 | 572 | #define EXT4_EX_NOCACHE 0x40000000 |
572 | #define EXT4_EX_FORCE_CACHE 0x20000000 | 573 | #define EXT4_EX_FORCE_CACHE 0x20000000 |
573 | 574 | ||
574 | /* | 575 | /* |
575 | * Flags used by ext4_free_blocks | 576 | * Flags used by ext4_free_blocks |
576 | */ | 577 | */ |
577 | #define EXT4_FREE_BLOCKS_METADATA 0x0001 | 578 | #define EXT4_FREE_BLOCKS_METADATA 0x0001 |
578 | #define EXT4_FREE_BLOCKS_FORGET 0x0002 | 579 | #define EXT4_FREE_BLOCKS_FORGET 0x0002 |
579 | #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 | 580 | #define EXT4_FREE_BLOCKS_VALIDATED 0x0004 |
580 | #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 | 581 | #define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 |
581 | #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 | 582 | #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 |
582 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 | 583 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 |
583 | 584 | ||
584 | /* | 585 | /* |
585 | * ioctl commands | 586 | * ioctl commands |
586 | */ | 587 | */ |
587 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS | 588 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS |
588 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS | 589 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS |
589 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) | 590 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) |
590 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) | 591 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) |
591 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION | 592 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION |
592 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION | 593 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION |
593 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) | 594 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) |
594 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) | 595 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) |
595 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) | 596 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) |
596 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) | 597 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) |
597 | #define EXT4_IOC_MIGRATE _IO('f', 9) | 598 | #define EXT4_IOC_MIGRATE _IO('f', 9) |
598 | /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ | 599 | /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ |
599 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | 600 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ |
600 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) | 601 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) |
601 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) | 602 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) |
602 | #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) | 603 | #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) |
603 | #define EXT4_IOC_SWAP_BOOT _IO('f', 17) | 604 | #define EXT4_IOC_SWAP_BOOT _IO('f', 17) |
604 | #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) | 605 | #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) |
605 | 606 | ||
606 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 607 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
607 | /* | 608 | /* |
608 | * ioctl commands in 32 bit emulation | 609 | * ioctl commands in 32 bit emulation |
609 | */ | 610 | */ |
610 | #define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS | 611 | #define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS |
611 | #define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS | 612 | #define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS |
612 | #define EXT4_IOC32_GETVERSION _IOR('f', 3, int) | 613 | #define EXT4_IOC32_GETVERSION _IOR('f', 3, int) |
613 | #define EXT4_IOC32_SETVERSION _IOW('f', 4, int) | 614 | #define EXT4_IOC32_SETVERSION _IOW('f', 4, int) |
614 | #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) | 615 | #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) |
615 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) | 616 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) |
616 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) | 617 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) |
617 | #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) | 618 | #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) |
618 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION | 619 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION |
619 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION | 620 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION |
620 | #endif | 621 | #endif |
621 | 622 | ||
622 | /* Max physical block we can address w/o extents */ | 623 | /* Max physical block we can address w/o extents */ |
623 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF | 624 | #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF |
624 | 625 | ||
625 | /* | 626 | /* |
626 | * Structure of an inode on the disk | 627 | * Structure of an inode on the disk |
627 | */ | 628 | */ |
628 | struct ext4_inode { | 629 | struct ext4_inode { |
629 | __le16 i_mode; /* File mode */ | 630 | __le16 i_mode; /* File mode */ |
630 | __le16 i_uid; /* Low 16 bits of Owner Uid */ | 631 | __le16 i_uid; /* Low 16 bits of Owner Uid */ |
631 | __le32 i_size_lo; /* Size in bytes */ | 632 | __le32 i_size_lo; /* Size in bytes */ |
632 | __le32 i_atime; /* Access time */ | 633 | __le32 i_atime; /* Access time */ |
633 | __le32 i_ctime; /* Inode Change time */ | 634 | __le32 i_ctime; /* Inode Change time */ |
634 | __le32 i_mtime; /* Modification time */ | 635 | __le32 i_mtime; /* Modification time */ |
635 | __le32 i_dtime; /* Deletion Time */ | 636 | __le32 i_dtime; /* Deletion Time */ |
636 | __le16 i_gid; /* Low 16 bits of Group Id */ | 637 | __le16 i_gid; /* Low 16 bits of Group Id */ |
637 | __le16 i_links_count; /* Links count */ | 638 | __le16 i_links_count; /* Links count */ |
638 | __le32 i_blocks_lo; /* Blocks count */ | 639 | __le32 i_blocks_lo; /* Blocks count */ |
639 | __le32 i_flags; /* File flags */ | 640 | __le32 i_flags; /* File flags */ |
640 | union { | 641 | union { |
641 | struct { | 642 | struct { |
642 | __le32 l_i_version; | 643 | __le32 l_i_version; |
643 | } linux1; | 644 | } linux1; |
644 | struct { | 645 | struct { |
645 | __u32 h_i_translator; | 646 | __u32 h_i_translator; |
646 | } hurd1; | 647 | } hurd1; |
647 | struct { | 648 | struct { |
648 | __u32 m_i_reserved1; | 649 | __u32 m_i_reserved1; |
649 | } masix1; | 650 | } masix1; |
650 | } osd1; /* OS dependent 1 */ | 651 | } osd1; /* OS dependent 1 */ |
651 | __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ | 652 | __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ |
652 | __le32 i_generation; /* File version (for NFS) */ | 653 | __le32 i_generation; /* File version (for NFS) */ |
653 | __le32 i_file_acl_lo; /* File ACL */ | 654 | __le32 i_file_acl_lo; /* File ACL */ |
654 | __le32 i_size_high; | 655 | __le32 i_size_high; |
655 | __le32 i_obso_faddr; /* Obsoleted fragment address */ | 656 | __le32 i_obso_faddr; /* Obsoleted fragment address */ |
656 | union { | 657 | union { |
657 | struct { | 658 | struct { |
658 | __le16 l_i_blocks_high; /* were l_i_reserved1 */ | 659 | __le16 l_i_blocks_high; /* were l_i_reserved1 */ |
659 | __le16 l_i_file_acl_high; | 660 | __le16 l_i_file_acl_high; |
660 | __le16 l_i_uid_high; /* these 2 fields */ | 661 | __le16 l_i_uid_high; /* these 2 fields */ |
661 | __le16 l_i_gid_high; /* were reserved2[0] */ | 662 | __le16 l_i_gid_high; /* were reserved2[0] */ |
662 | __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ | 663 | __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ |
663 | __le16 l_i_reserved; | 664 | __le16 l_i_reserved; |
664 | } linux2; | 665 | } linux2; |
665 | struct { | 666 | struct { |
666 | __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ | 667 | __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ |
667 | __u16 h_i_mode_high; | 668 | __u16 h_i_mode_high; |
668 | __u16 h_i_uid_high; | 669 | __u16 h_i_uid_high; |
669 | __u16 h_i_gid_high; | 670 | __u16 h_i_gid_high; |
670 | __u32 h_i_author; | 671 | __u32 h_i_author; |
671 | } hurd2; | 672 | } hurd2; |
672 | struct { | 673 | struct { |
673 | __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ | 674 | __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ |
674 | __le16 m_i_file_acl_high; | 675 | __le16 m_i_file_acl_high; |
675 | __u32 m_i_reserved2[2]; | 676 | __u32 m_i_reserved2[2]; |
676 | } masix2; | 677 | } masix2; |
677 | } osd2; /* OS dependent 2 */ | 678 | } osd2; /* OS dependent 2 */ |
678 | __le16 i_extra_isize; | 679 | __le16 i_extra_isize; |
679 | __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ | 680 | __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ |
680 | __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ | 681 | __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ |
681 | __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ | 682 | __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ |
682 | __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ | 683 | __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ |
683 | __le32 i_crtime; /* File Creation time */ | 684 | __le32 i_crtime; /* File Creation time */ |
684 | __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ | 685 | __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ |
685 | __le32 i_version_hi; /* high 32 bits for 64-bit version */ | 686 | __le32 i_version_hi; /* high 32 bits for 64-bit version */ |
686 | }; | 687 | }; |
687 | 688 | ||
688 | struct move_extent { | 689 | struct move_extent { |
689 | __u32 reserved; /* should be zero */ | 690 | __u32 reserved; /* should be zero */ |
690 | __u32 donor_fd; /* donor file descriptor */ | 691 | __u32 donor_fd; /* donor file descriptor */ |
691 | __u64 orig_start; /* logical start offset in block for orig */ | 692 | __u64 orig_start; /* logical start offset in block for orig */ |
692 | __u64 donor_start; /* logical start offset in block for donor */ | 693 | __u64 donor_start; /* logical start offset in block for donor */ |
693 | __u64 len; /* block length to be moved */ | 694 | __u64 len; /* block length to be moved */ |
694 | __u64 moved_len; /* moved block length */ | 695 | __u64 moved_len; /* moved block length */ |
695 | }; | 696 | }; |
696 | 697 | ||
697 | #define EXT4_EPOCH_BITS 2 | 698 | #define EXT4_EPOCH_BITS 2 |
698 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) | 699 | #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) |
699 | #define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) | 700 | #define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) |
700 | 701 | ||
701 | /* | 702 | /* |
702 | * Extended fields will fit into an inode if the filesystem was formatted | 703 | * Extended fields will fit into an inode if the filesystem was formatted |
703 | * with large inodes (-I 256 or larger) and there are not currently any EAs | 704 | * with large inodes (-I 256 or larger) and there are not currently any EAs |
704 | * consuming all of the available space. For new inodes we always reserve | 705 | * consuming all of the available space. For new inodes we always reserve |
705 | * enough space for the kernel's known extended fields, but for inodes | 706 | * enough space for the kernel's known extended fields, but for inodes |
706 | * created with an old kernel this might not have been the case. None of | 707 | * created with an old kernel this might not have been the case. None of |
707 | * the extended inode fields is critical for correct filesystem operation. | 708 | * the extended inode fields is critical for correct filesystem operation. |
708 | * This macro checks if a certain field fits in the inode. Note that | 709 | * This macro checks if a certain field fits in the inode. Note that |
709 | * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize | 710 | * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize |
710 | */ | 711 | */ |
711 | #define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ | 712 | #define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ |
712 | ((offsetof(typeof(*ext4_inode), field) + \ | 713 | ((offsetof(typeof(*ext4_inode), field) + \ |
713 | sizeof((ext4_inode)->field)) \ | 714 | sizeof((ext4_inode)->field)) \ |
714 | <= (EXT4_GOOD_OLD_INODE_SIZE + \ | 715 | <= (EXT4_GOOD_OLD_INODE_SIZE + \ |
715 | (einode)->i_extra_isize)) \ | 716 | (einode)->i_extra_isize)) \ |
716 | 717 | ||
717 | static inline __le32 ext4_encode_extra_time(struct timespec *time) | 718 | static inline __le32 ext4_encode_extra_time(struct timespec *time) |
718 | { | 719 | { |
719 | return cpu_to_le32((sizeof(time->tv_sec) > 4 ? | 720 | return cpu_to_le32((sizeof(time->tv_sec) > 4 ? |
720 | (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | | 721 | (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | |
721 | ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); | 722 | ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); |
722 | } | 723 | } |
723 | 724 | ||
724 | static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) | 725 | static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) |
725 | { | 726 | { |
726 | if (sizeof(time->tv_sec) > 4) | 727 | if (sizeof(time->tv_sec) > 4) |
727 | time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) | 728 | time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) |
728 | << 32; | 729 | << 32; |
729 | time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; | 730 | time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; |
730 | } | 731 | } |
731 | 732 | ||
732 | #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ | 733 | #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ |
733 | do { \ | 734 | do { \ |
734 | (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ | 735 | (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ |
735 | if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ | 736 | if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ |
736 | (raw_inode)->xtime ## _extra = \ | 737 | (raw_inode)->xtime ## _extra = \ |
737 | ext4_encode_extra_time(&(inode)->xtime); \ | 738 | ext4_encode_extra_time(&(inode)->xtime); \ |
738 | } while (0) | 739 | } while (0) |
739 | 740 | ||
740 | #define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ | 741 | #define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ |
741 | do { \ | 742 | do { \ |
742 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ | 743 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ |
743 | (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ | 744 | (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ |
744 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ | 745 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ |
745 | (raw_inode)->xtime ## _extra = \ | 746 | (raw_inode)->xtime ## _extra = \ |
746 | ext4_encode_extra_time(&(einode)->xtime); \ | 747 | ext4_encode_extra_time(&(einode)->xtime); \ |
747 | } while (0) | 748 | } while (0) |
748 | 749 | ||
749 | #define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ | 750 | #define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ |
750 | do { \ | 751 | do { \ |
751 | (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ | 752 | (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ |
752 | if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ | 753 | if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ |
753 | ext4_decode_extra_time(&(inode)->xtime, \ | 754 | ext4_decode_extra_time(&(inode)->xtime, \ |
754 | raw_inode->xtime ## _extra); \ | 755 | raw_inode->xtime ## _extra); \ |
755 | else \ | 756 | else \ |
756 | (inode)->xtime.tv_nsec = 0; \ | 757 | (inode)->xtime.tv_nsec = 0; \ |
757 | } while (0) | 758 | } while (0) |
758 | 759 | ||
759 | #define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ | 760 | #define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ |
760 | do { \ | 761 | do { \ |
761 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ | 762 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ |
762 | (einode)->xtime.tv_sec = \ | 763 | (einode)->xtime.tv_sec = \ |
763 | (signed)le32_to_cpu((raw_inode)->xtime); \ | 764 | (signed)le32_to_cpu((raw_inode)->xtime); \ |
764 | else \ | 765 | else \ |
765 | (einode)->xtime.tv_sec = 0; \ | 766 | (einode)->xtime.tv_sec = 0; \ |
766 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ | 767 | if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ |
767 | ext4_decode_extra_time(&(einode)->xtime, \ | 768 | ext4_decode_extra_time(&(einode)->xtime, \ |
768 | raw_inode->xtime ## _extra); \ | 769 | raw_inode->xtime ## _extra); \ |
769 | else \ | 770 | else \ |
770 | (einode)->xtime.tv_nsec = 0; \ | 771 | (einode)->xtime.tv_nsec = 0; \ |
771 | } while (0) | 772 | } while (0) |
772 | 773 | ||
773 | #define i_disk_version osd1.linux1.l_i_version | 774 | #define i_disk_version osd1.linux1.l_i_version |
774 | 775 | ||
775 | #if defined(__KERNEL__) || defined(__linux__) | 776 | #if defined(__KERNEL__) || defined(__linux__) |
776 | #define i_reserved1 osd1.linux1.l_i_reserved1 | 777 | #define i_reserved1 osd1.linux1.l_i_reserved1 |
777 | #define i_file_acl_high osd2.linux2.l_i_file_acl_high | 778 | #define i_file_acl_high osd2.linux2.l_i_file_acl_high |
778 | #define i_blocks_high osd2.linux2.l_i_blocks_high | 779 | #define i_blocks_high osd2.linux2.l_i_blocks_high |
779 | #define i_uid_low i_uid | 780 | #define i_uid_low i_uid |
780 | #define i_gid_low i_gid | 781 | #define i_gid_low i_gid |
781 | #define i_uid_high osd2.linux2.l_i_uid_high | 782 | #define i_uid_high osd2.linux2.l_i_uid_high |
782 | #define i_gid_high osd2.linux2.l_i_gid_high | 783 | #define i_gid_high osd2.linux2.l_i_gid_high |
783 | #define i_checksum_lo osd2.linux2.l_i_checksum_lo | 784 | #define i_checksum_lo osd2.linux2.l_i_checksum_lo |
784 | 785 | ||
785 | #elif defined(__GNU__) | 786 | #elif defined(__GNU__) |
786 | 787 | ||
787 | #define i_translator osd1.hurd1.h_i_translator | 788 | #define i_translator osd1.hurd1.h_i_translator |
788 | #define i_uid_high osd2.hurd2.h_i_uid_high | 789 | #define i_uid_high osd2.hurd2.h_i_uid_high |
789 | #define i_gid_high osd2.hurd2.h_i_gid_high | 790 | #define i_gid_high osd2.hurd2.h_i_gid_high |
790 | #define i_author osd2.hurd2.h_i_author | 791 | #define i_author osd2.hurd2.h_i_author |
791 | 792 | ||
792 | #elif defined(__masix__) | 793 | #elif defined(__masix__) |
793 | 794 | ||
794 | #define i_reserved1 osd1.masix1.m_i_reserved1 | 795 | #define i_reserved1 osd1.masix1.m_i_reserved1 |
795 | #define i_file_acl_high osd2.masix2.m_i_file_acl_high | 796 | #define i_file_acl_high osd2.masix2.m_i_file_acl_high |
796 | #define i_reserved2 osd2.masix2.m_i_reserved2 | 797 | #define i_reserved2 osd2.masix2.m_i_reserved2 |
797 | 798 | ||
798 | #endif /* defined(__KERNEL__) || defined(__linux__) */ | 799 | #endif /* defined(__KERNEL__) || defined(__linux__) */ |
799 | 800 | ||
800 | #include "extents_status.h" | 801 | #include "extents_status.h" |
801 | 802 | ||
802 | /* | 803 | /* |
803 | * fourth extended file system inode data in memory | 804 | * fourth extended file system inode data in memory |
804 | */ | 805 | */ |
805 | struct ext4_inode_info { | 806 | struct ext4_inode_info { |
806 | __le32 i_data[15]; /* unconverted */ | 807 | __le32 i_data[15]; /* unconverted */ |
807 | __u32 i_dtime; | 808 | __u32 i_dtime; |
808 | ext4_fsblk_t i_file_acl; | 809 | ext4_fsblk_t i_file_acl; |
809 | 810 | ||
810 | /* | 811 | /* |
811 | * i_block_group is the number of the block group which contains | 812 | * i_block_group is the number of the block group which contains |
812 | * this file's inode. Constant across the lifetime of the inode, | 813 | * this file's inode. Constant across the lifetime of the inode, |
813 | * it is ued for making block allocation decisions - we try to | 814 | * it is ued for making block allocation decisions - we try to |
814 | * place a file's data blocks near its inode block, and new inodes | 815 | * place a file's data blocks near its inode block, and new inodes |
815 | * near to their parent directory's inode. | 816 | * near to their parent directory's inode. |
816 | */ | 817 | */ |
817 | ext4_group_t i_block_group; | 818 | ext4_group_t i_block_group; |
818 | ext4_lblk_t i_dir_start_lookup; | 819 | ext4_lblk_t i_dir_start_lookup; |
819 | #if (BITS_PER_LONG < 64) | 820 | #if (BITS_PER_LONG < 64) |
820 | unsigned long i_state_flags; /* Dynamic state flags */ | 821 | unsigned long i_state_flags; /* Dynamic state flags */ |
821 | #endif | 822 | #endif |
822 | unsigned long i_flags; | 823 | unsigned long i_flags; |
823 | 824 | ||
824 | /* | 825 | /* |
825 | * Extended attributes can be read independently of the main file | 826 | * Extended attributes can be read independently of the main file |
826 | * data. Taking i_mutex even when reading would cause contention | 827 | * data. Taking i_mutex even when reading would cause contention |
827 | * between readers of EAs and writers of regular file data, so | 828 | * between readers of EAs and writers of regular file data, so |
828 | * instead we synchronize on xattr_sem when reading or changing | 829 | * instead we synchronize on xattr_sem when reading or changing |
829 | * EAs. | 830 | * EAs. |
830 | */ | 831 | */ |
831 | struct rw_semaphore xattr_sem; | 832 | struct rw_semaphore xattr_sem; |
832 | 833 | ||
833 | struct list_head i_orphan; /* unlinked but open inodes */ | 834 | struct list_head i_orphan; /* unlinked but open inodes */ |
834 | 835 | ||
835 | /* | 836 | /* |
836 | * i_disksize keeps track of what the inode size is ON DISK, not | 837 | * i_disksize keeps track of what the inode size is ON DISK, not |
837 | * in memory. During truncate, i_size is set to the new size by | 838 | * in memory. During truncate, i_size is set to the new size by |
838 | * the VFS prior to calling ext4_truncate(), but the filesystem won't | 839 | * the VFS prior to calling ext4_truncate(), but the filesystem won't |
839 | * set i_disksize to 0 until the truncate is actually under way. | 840 | * set i_disksize to 0 until the truncate is actually under way. |
840 | * | 841 | * |
841 | * The intent is that i_disksize always represents the blocks which | 842 | * The intent is that i_disksize always represents the blocks which |
842 | * are used by this file. This allows recovery to restart truncate | 843 | * are used by this file. This allows recovery to restart truncate |
843 | * on orphans if we crash during truncate. We actually write i_disksize | 844 | * on orphans if we crash during truncate. We actually write i_disksize |
844 | * into the on-disk inode when writing inodes out, instead of i_size. | 845 | * into the on-disk inode when writing inodes out, instead of i_size. |
845 | * | 846 | * |
846 | * The only time when i_disksize and i_size may be different is when | 847 | * The only time when i_disksize and i_size may be different is when |
847 | * a truncate is in progress. The only things which change i_disksize | 848 | * a truncate is in progress. The only things which change i_disksize |
848 | * are ext4_get_block (growth) and ext4_truncate (shrinkth). | 849 | * are ext4_get_block (growth) and ext4_truncate (shrinkth). |
849 | */ | 850 | */ |
850 | loff_t i_disksize; | 851 | loff_t i_disksize; |
851 | 852 | ||
852 | /* | 853 | /* |
853 | * i_data_sem is for serialising ext4_truncate() against | 854 | * i_data_sem is for serialising ext4_truncate() against |
854 | * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's | 855 | * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's |
855 | * data tree are chopped off during truncate. We can't do that in | 856 | * data tree are chopped off during truncate. We can't do that in |
856 | * ext4 because whenever we perform intermediate commits during | 857 | * ext4 because whenever we perform intermediate commits during |
857 | * truncate, the inode and all the metadata blocks *must* be in a | 858 | * truncate, the inode and all the metadata blocks *must* be in a |
858 | * consistent state which allows truncation of the orphans to restart | 859 | * consistent state which allows truncation of the orphans to restart |
859 | * during recovery. Hence we must fix the get_block-vs-truncate race | 860 | * during recovery. Hence we must fix the get_block-vs-truncate race |
860 | * by other means, so we have i_data_sem. | 861 | * by other means, so we have i_data_sem. |
861 | */ | 862 | */ |
862 | struct rw_semaphore i_data_sem; | 863 | struct rw_semaphore i_data_sem; |
863 | struct inode vfs_inode; | 864 | struct inode vfs_inode; |
864 | struct jbd2_inode *jinode; | 865 | struct jbd2_inode *jinode; |
865 | 866 | ||
866 | spinlock_t i_raw_lock; /* protects updates to the raw inode */ | 867 | spinlock_t i_raw_lock; /* protects updates to the raw inode */ |
867 | 868 | ||
868 | /* | 869 | /* |
869 | * File creation time. Its function is same as that of | 870 | * File creation time. Its function is same as that of |
870 | * struct timespec i_{a,c,m}time in the generic inode. | 871 | * struct timespec i_{a,c,m}time in the generic inode. |
871 | */ | 872 | */ |
872 | struct timespec i_crtime; | 873 | struct timespec i_crtime; |
873 | 874 | ||
874 | /* mballoc */ | 875 | /* mballoc */ |
875 | struct list_head i_prealloc_list; | 876 | struct list_head i_prealloc_list; |
876 | spinlock_t i_prealloc_lock; | 877 | spinlock_t i_prealloc_lock; |
877 | 878 | ||
878 | /* extents status tree */ | 879 | /* extents status tree */ |
879 | struct ext4_es_tree i_es_tree; | 880 | struct ext4_es_tree i_es_tree; |
880 | rwlock_t i_es_lock; | 881 | rwlock_t i_es_lock; |
881 | struct list_head i_es_list; | 882 | struct list_head i_es_list; |
882 | unsigned int i_es_all_nr; /* protected by i_es_lock */ | 883 | unsigned int i_es_all_nr; /* protected by i_es_lock */ |
883 | unsigned int i_es_shk_nr; /* protected by i_es_lock */ | 884 | unsigned int i_es_shk_nr; /* protected by i_es_lock */ |
884 | ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for | 885 | ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for |
885 | extents to shrink. Protected by | 886 | extents to shrink. Protected by |
886 | i_es_lock */ | 887 | i_es_lock */ |
887 | 888 | ||
888 | /* ialloc */ | 889 | /* ialloc */ |
889 | ext4_group_t i_last_alloc_group; | 890 | ext4_group_t i_last_alloc_group; |
890 | 891 | ||
891 | /* allocation reservation info for delalloc */ | 892 | /* allocation reservation info for delalloc */ |
892 | /* In case of bigalloc, these refer to clusters rather than blocks */ | 893 | /* In case of bigalloc, these refer to clusters rather than blocks */ |
893 | unsigned int i_reserved_data_blocks; | 894 | unsigned int i_reserved_data_blocks; |
894 | unsigned int i_reserved_meta_blocks; | 895 | unsigned int i_reserved_meta_blocks; |
895 | unsigned int i_allocated_meta_blocks; | 896 | unsigned int i_allocated_meta_blocks; |
896 | ext4_lblk_t i_da_metadata_calc_last_lblock; | 897 | ext4_lblk_t i_da_metadata_calc_last_lblock; |
897 | int i_da_metadata_calc_len; | 898 | int i_da_metadata_calc_len; |
898 | 899 | ||
899 | /* on-disk additional length */ | 900 | /* on-disk additional length */ |
900 | __u16 i_extra_isize; | 901 | __u16 i_extra_isize; |
901 | 902 | ||
902 | /* Indicate the inline data space. */ | 903 | /* Indicate the inline data space. */ |
903 | u16 i_inline_off; | 904 | u16 i_inline_off; |
904 | u16 i_inline_size; | 905 | u16 i_inline_size; |
905 | 906 | ||
906 | #ifdef CONFIG_QUOTA | 907 | #ifdef CONFIG_QUOTA |
907 | /* quota space reservation, managed internally by quota code */ | 908 | /* quota space reservation, managed internally by quota code */ |
908 | qsize_t i_reserved_quota; | 909 | qsize_t i_reserved_quota; |
909 | #endif | 910 | #endif |
910 | 911 | ||
911 | /* Lock protecting lists below */ | 912 | /* Lock protecting lists below */ |
912 | spinlock_t i_completed_io_lock; | 913 | spinlock_t i_completed_io_lock; |
913 | /* | 914 | /* |
914 | * Completed IOs that need unwritten extents handling and have | 915 | * Completed IOs that need unwritten extents handling and have |
915 | * transaction reserved | 916 | * transaction reserved |
916 | */ | 917 | */ |
917 | struct list_head i_rsv_conversion_list; | 918 | struct list_head i_rsv_conversion_list; |
918 | /* | 919 | /* |
919 | * Completed IOs that need unwritten extents handling and don't have | 920 | * Completed IOs that need unwritten extents handling and don't have |
920 | * transaction reserved | 921 | * transaction reserved |
921 | */ | 922 | */ |
922 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 923 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
923 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ | 924 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ |
924 | struct work_struct i_rsv_conversion_work; | 925 | struct work_struct i_rsv_conversion_work; |
925 | 926 | ||
926 | spinlock_t i_block_reservation_lock; | 927 | spinlock_t i_block_reservation_lock; |
927 | 928 | ||
928 | /* | 929 | /* |
929 | * Transactions that contain inode's metadata needed to complete | 930 | * Transactions that contain inode's metadata needed to complete |
930 | * fsync and fdatasync, respectively. | 931 | * fsync and fdatasync, respectively. |
931 | */ | 932 | */ |
932 | tid_t i_sync_tid; | 933 | tid_t i_sync_tid; |
933 | tid_t i_datasync_tid; | 934 | tid_t i_datasync_tid; |
934 | 935 | ||
935 | #ifdef CONFIG_QUOTA | 936 | #ifdef CONFIG_QUOTA |
936 | struct dquot *i_dquot[MAXQUOTAS]; | 937 | struct dquot *i_dquot[MAXQUOTAS]; |
937 | #endif | 938 | #endif |
938 | 939 | ||
939 | /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ | 940 | /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ |
940 | __u32 i_csum_seed; | 941 | __u32 i_csum_seed; |
941 | }; | 942 | }; |
942 | 943 | ||
943 | /* | 944 | /* |
944 | * File system states | 945 | * File system states |
945 | */ | 946 | */ |
946 | #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ | 947 | #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ |
947 | #define EXT4_ERROR_FS 0x0002 /* Errors detected */ | 948 | #define EXT4_ERROR_FS 0x0002 /* Errors detected */ |
948 | #define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ | 949 | #define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */ |
949 | 950 | ||
950 | /* | 951 | /* |
951 | * Misc. filesystem flags | 952 | * Misc. filesystem flags |
952 | */ | 953 | */ |
953 | #define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ | 954 | #define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ |
954 | #define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ | 955 | #define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ |
955 | #define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ | 956 | #define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ |
956 | 957 | ||
957 | /* | 958 | /* |
958 | * Mount flags set via mount options or defaults | 959 | * Mount flags set via mount options or defaults |
959 | */ | 960 | */ |
960 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ | 961 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ |
961 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ | 962 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ |
962 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ | 963 | #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ |
963 | #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ | 964 | #define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ |
964 | #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ | 965 | #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ |
965 | #define EXT4_MOUNT_ERRORS_MASK 0x00070 | 966 | #define EXT4_MOUNT_ERRORS_MASK 0x00070 |
966 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ | 967 | #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ |
967 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ | 968 | #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ |
968 | #ifdef CONFIG_FS_DAX | 969 | #ifdef CONFIG_FS_DAX |
969 | #define EXT4_MOUNT_DAX 0x00200 /* Direct Access */ | 970 | #define EXT4_MOUNT_DAX 0x00200 /* Direct Access */ |
970 | #else | 971 | #else |
971 | #define EXT4_MOUNT_DAX 0 | 972 | #define EXT4_MOUNT_DAX 0 |
972 | #endif | 973 | #endif |
973 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ | 974 | #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ |
974 | #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ | 975 | #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ |
975 | #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ | 976 | #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ |
976 | #define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ | 977 | #define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ |
977 | #define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ | 978 | #define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ |
978 | #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ | 979 | #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ |
979 | #define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ | 980 | #define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ |
980 | #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ | 981 | #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ |
981 | #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ | 982 | #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ |
982 | #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ | 983 | #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ |
983 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ | 984 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ |
984 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ | 985 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
985 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ | 986 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
986 | #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ | 987 | #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ |
987 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 988 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
988 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 989 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
989 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 990 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
990 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 991 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
991 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | 992 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ |
992 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ | 993 | #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ |
993 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ | 994 | #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ |
994 | 995 | ||
995 | /* | 996 | /* |
996 | * Mount flags set either automatically (could not be set by mount option) | 997 | * Mount flags set either automatically (could not be set by mount option) |
997 | * based on per file system feature or property or in special cases such as | 998 | * based on per file system feature or property or in special cases such as |
998 | * distinguishing between explicit mount option definition and default. | 999 | * distinguishing between explicit mount option definition and default. |
999 | */ | 1000 | */ |
1000 | #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly | 1001 | #define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly |
1001 | specified delalloc */ | 1002 | specified delalloc */ |
1002 | #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group | 1003 | #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group |
1003 | size of blocksize * 8 | 1004 | size of blocksize * 8 |
1004 | blocks */ | 1005 | blocks */ |
1005 | #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated | 1006 | #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated |
1006 | file systems */ | 1007 | file systems */ |
1007 | 1008 | ||
1008 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ | 1009 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ |
1009 | ~EXT4_MOUNT_##opt | 1010 | ~EXT4_MOUNT_##opt |
1010 | #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ | 1011 | #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ |
1011 | EXT4_MOUNT_##opt | 1012 | EXT4_MOUNT_##opt |
1012 | #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ | 1013 | #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ |
1013 | EXT4_MOUNT_##opt) | 1014 | EXT4_MOUNT_##opt) |
1014 | 1015 | ||
1015 | #define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \ | 1016 | #define clear_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 &= \ |
1016 | ~EXT4_MOUNT2_##opt | 1017 | ~EXT4_MOUNT2_##opt |
1017 | #define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \ | 1018 | #define set_opt2(sb, opt) EXT4_SB(sb)->s_mount_opt2 |= \ |
1018 | EXT4_MOUNT2_##opt | 1019 | EXT4_MOUNT2_##opt |
1019 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ | 1020 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ |
1020 | EXT4_MOUNT2_##opt) | 1021 | EXT4_MOUNT2_##opt) |
1021 | 1022 | ||
1022 | #define ext4_test_and_set_bit __test_and_set_bit_le | 1023 | #define ext4_test_and_set_bit __test_and_set_bit_le |
1023 | #define ext4_set_bit __set_bit_le | 1024 | #define ext4_set_bit __set_bit_le |
1024 | #define ext4_set_bit_atomic ext2_set_bit_atomic | 1025 | #define ext4_set_bit_atomic ext2_set_bit_atomic |
1025 | #define ext4_test_and_clear_bit __test_and_clear_bit_le | 1026 | #define ext4_test_and_clear_bit __test_and_clear_bit_le |
1026 | #define ext4_clear_bit __clear_bit_le | 1027 | #define ext4_clear_bit __clear_bit_le |
1027 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic | 1028 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic |
1028 | #define ext4_test_bit test_bit_le | 1029 | #define ext4_test_bit test_bit_le |
1029 | #define ext4_find_next_zero_bit find_next_zero_bit_le | 1030 | #define ext4_find_next_zero_bit find_next_zero_bit_le |
1030 | #define ext4_find_next_bit find_next_bit_le | 1031 | #define ext4_find_next_bit find_next_bit_le |
1031 | 1032 | ||
1032 | extern void ext4_set_bits(void *bm, int cur, int len); | 1033 | extern void ext4_set_bits(void *bm, int cur, int len); |
1033 | 1034 | ||
1034 | /* | 1035 | /* |
1035 | * Maximal mount counts between two filesystem checks | 1036 | * Maximal mount counts between two filesystem checks |
1036 | */ | 1037 | */ |
1037 | #define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ | 1038 | #define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ |
1038 | #define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ | 1039 | #define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ |
1039 | 1040 | ||
1040 | /* | 1041 | /* |
1041 | * Behaviour when detecting errors | 1042 | * Behaviour when detecting errors |
1042 | */ | 1043 | */ |
1043 | #define EXT4_ERRORS_CONTINUE 1 /* Continue execution */ | 1044 | #define EXT4_ERRORS_CONTINUE 1 /* Continue execution */ |
1044 | #define EXT4_ERRORS_RO 2 /* Remount fs read-only */ | 1045 | #define EXT4_ERRORS_RO 2 /* Remount fs read-only */ |
1045 | #define EXT4_ERRORS_PANIC 3 /* Panic */ | 1046 | #define EXT4_ERRORS_PANIC 3 /* Panic */ |
1046 | #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE | 1047 | #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE |
1047 | 1048 | ||
1048 | /* Metadata checksum algorithm codes */ | 1049 | /* Metadata checksum algorithm codes */ |
1049 | #define EXT4_CRC32C_CHKSUM 1 | 1050 | #define EXT4_CRC32C_CHKSUM 1 |
1050 | 1051 | ||
1052 | /* Encryption algorithms */ | ||
1053 | #define EXT4_ENCRYPTION_MODE_INVALID 0 | ||
1054 | #define EXT4_ENCRYPTION_MODE_AES_256_XTS 1 | ||
1055 | #define EXT4_ENCRYPTION_MODE_AES_256_GCM 2 | ||
1056 | #define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 | ||
1057 | |||
1051 | /* | 1058 | /* |
1052 | * Structure of the super block | 1059 | * Structure of the super block |
1053 | */ | 1060 | */ |
1054 | struct ext4_super_block { | 1061 | struct ext4_super_block { |
1055 | /*00*/ __le32 s_inodes_count; /* Inodes count */ | 1062 | /*00*/ __le32 s_inodes_count; /* Inodes count */ |
1056 | __le32 s_blocks_count_lo; /* Blocks count */ | 1063 | __le32 s_blocks_count_lo; /* Blocks count */ |
1057 | __le32 s_r_blocks_count_lo; /* Reserved blocks count */ | 1064 | __le32 s_r_blocks_count_lo; /* Reserved blocks count */ |
1058 | __le32 s_free_blocks_count_lo; /* Free blocks count */ | 1065 | __le32 s_free_blocks_count_lo; /* Free blocks count */ |
1059 | /*10*/ __le32 s_free_inodes_count; /* Free inodes count */ | 1066 | /*10*/ __le32 s_free_inodes_count; /* Free inodes count */ |
1060 | __le32 s_first_data_block; /* First Data Block */ | 1067 | __le32 s_first_data_block; /* First Data Block */ |
1061 | __le32 s_log_block_size; /* Block size */ | 1068 | __le32 s_log_block_size; /* Block size */ |
1062 | __le32 s_log_cluster_size; /* Allocation cluster size */ | 1069 | __le32 s_log_cluster_size; /* Allocation cluster size */ |
1063 | /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ | 1070 | /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ |
1064 | __le32 s_clusters_per_group; /* # Clusters per group */ | 1071 | __le32 s_clusters_per_group; /* # Clusters per group */ |
1065 | __le32 s_inodes_per_group; /* # Inodes per group */ | 1072 | __le32 s_inodes_per_group; /* # Inodes per group */ |
1066 | __le32 s_mtime; /* Mount time */ | 1073 | __le32 s_mtime; /* Mount time */ |
1067 | /*30*/ __le32 s_wtime; /* Write time */ | 1074 | /*30*/ __le32 s_wtime; /* Write time */ |
1068 | __le16 s_mnt_count; /* Mount count */ | 1075 | __le16 s_mnt_count; /* Mount count */ |
1069 | __le16 s_max_mnt_count; /* Maximal mount count */ | 1076 | __le16 s_max_mnt_count; /* Maximal mount count */ |
1070 | __le16 s_magic; /* Magic signature */ | 1077 | __le16 s_magic; /* Magic signature */ |
1071 | __le16 s_state; /* File system state */ | 1078 | __le16 s_state; /* File system state */ |
1072 | __le16 s_errors; /* Behaviour when detecting errors */ | 1079 | __le16 s_errors; /* Behaviour when detecting errors */ |
1073 | __le16 s_minor_rev_level; /* minor revision level */ | 1080 | __le16 s_minor_rev_level; /* minor revision level */ |
1074 | /*40*/ __le32 s_lastcheck; /* time of last check */ | 1081 | /*40*/ __le32 s_lastcheck; /* time of last check */ |
1075 | __le32 s_checkinterval; /* max. time between checks */ | 1082 | __le32 s_checkinterval; /* max. time between checks */ |
1076 | __le32 s_creator_os; /* OS */ | 1083 | __le32 s_creator_os; /* OS */ |
1077 | __le32 s_rev_level; /* Revision level */ | 1084 | __le32 s_rev_level; /* Revision level */ |
1078 | /*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ | 1085 | /*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ |
1079 | __le16 s_def_resgid; /* Default gid for reserved blocks */ | 1086 | __le16 s_def_resgid; /* Default gid for reserved blocks */ |
1080 | /* | 1087 | /* |
1081 | * These fields are for EXT4_DYNAMIC_REV superblocks only. | 1088 | * These fields are for EXT4_DYNAMIC_REV superblocks only. |
1082 | * | 1089 | * |
1083 | * Note: the difference between the compatible feature set and | 1090 | * Note: the difference between the compatible feature set and |
1084 | * the incompatible feature set is that if there is a bit set | 1091 | * the incompatible feature set is that if there is a bit set |
1085 | * in the incompatible feature set that the kernel doesn't | 1092 | * in the incompatible feature set that the kernel doesn't |
1086 | * know about, it should refuse to mount the filesystem. | 1093 | * know about, it should refuse to mount the filesystem. |
1087 | * | 1094 | * |
1088 | * e2fsck's requirements are more strict; if it doesn't know | 1095 | * e2fsck's requirements are more strict; if it doesn't know |
1089 | * about a feature in either the compatible or incompatible | 1096 | * about a feature in either the compatible or incompatible |
1090 | * feature set, it must abort and not try to meddle with | 1097 | * feature set, it must abort and not try to meddle with |
1091 | * things it doesn't understand... | 1098 | * things it doesn't understand... |
1092 | */ | 1099 | */ |
1093 | __le32 s_first_ino; /* First non-reserved inode */ | 1100 | __le32 s_first_ino; /* First non-reserved inode */ |
1094 | __le16 s_inode_size; /* size of inode structure */ | 1101 | __le16 s_inode_size; /* size of inode structure */ |
1095 | __le16 s_block_group_nr; /* block group # of this superblock */ | 1102 | __le16 s_block_group_nr; /* block group # of this superblock */ |
1096 | __le32 s_feature_compat; /* compatible feature set */ | 1103 | __le32 s_feature_compat; /* compatible feature set */ |
1097 | /*60*/ __le32 s_feature_incompat; /* incompatible feature set */ | 1104 | /*60*/ __le32 s_feature_incompat; /* incompatible feature set */ |
1098 | __le32 s_feature_ro_compat; /* readonly-compatible feature set */ | 1105 | __le32 s_feature_ro_compat; /* readonly-compatible feature set */ |
1099 | /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ | 1106 | /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ |
1100 | /*78*/ char s_volume_name[16]; /* volume name */ | 1107 | /*78*/ char s_volume_name[16]; /* volume name */ |
1101 | /*88*/ char s_last_mounted[64]; /* directory where last mounted */ | 1108 | /*88*/ char s_last_mounted[64]; /* directory where last mounted */ |
1102 | /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ | 1109 | /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ |
1103 | /* | 1110 | /* |
1104 | * Performance hints. Directory preallocation should only | 1111 | * Performance hints. Directory preallocation should only |
1105 | * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. | 1112 | * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. |
1106 | */ | 1113 | */ |
1107 | __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ | 1114 | __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ |
1108 | __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ | 1115 | __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ |
1109 | __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ | 1116 | __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ |
1110 | /* | 1117 | /* |
1111 | * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. | 1118 | * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. |
1112 | */ | 1119 | */ |
1113 | /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ | 1120 | /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ |
1114 | /*E0*/ __le32 s_journal_inum; /* inode number of journal file */ | 1121 | /*E0*/ __le32 s_journal_inum; /* inode number of journal file */ |
1115 | __le32 s_journal_dev; /* device number of journal file */ | 1122 | __le32 s_journal_dev; /* device number of journal file */ |
1116 | __le32 s_last_orphan; /* start of list of inodes to delete */ | 1123 | __le32 s_last_orphan; /* start of list of inodes to delete */ |
1117 | __le32 s_hash_seed[4]; /* HTREE hash seed */ | 1124 | __le32 s_hash_seed[4]; /* HTREE hash seed */ |
1118 | __u8 s_def_hash_version; /* Default hash version to use */ | 1125 | __u8 s_def_hash_version; /* Default hash version to use */ |
1119 | __u8 s_jnl_backup_type; | 1126 | __u8 s_jnl_backup_type; |
1120 | __le16 s_desc_size; /* size of group descriptor */ | 1127 | __le16 s_desc_size; /* size of group descriptor */ |
1121 | /*100*/ __le32 s_default_mount_opts; | 1128 | /*100*/ __le32 s_default_mount_opts; |
1122 | __le32 s_first_meta_bg; /* First metablock block group */ | 1129 | __le32 s_first_meta_bg; /* First metablock block group */ |
1123 | __le32 s_mkfs_time; /* When the filesystem was created */ | 1130 | __le32 s_mkfs_time; /* When the filesystem was created */ |
1124 | __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ | 1131 | __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ |
1125 | /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ | 1132 | /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ |
1126 | /*150*/ __le32 s_blocks_count_hi; /* Blocks count */ | 1133 | /*150*/ __le32 s_blocks_count_hi; /* Blocks count */ |
1127 | __le32 s_r_blocks_count_hi; /* Reserved blocks count */ | 1134 | __le32 s_r_blocks_count_hi; /* Reserved blocks count */ |
1128 | __le32 s_free_blocks_count_hi; /* Free blocks count */ | 1135 | __le32 s_free_blocks_count_hi; /* Free blocks count */ |
1129 | __le16 s_min_extra_isize; /* All inodes have at least # bytes */ | 1136 | __le16 s_min_extra_isize; /* All inodes have at least # bytes */ |
1130 | __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ | 1137 | __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ |
1131 | __le32 s_flags; /* Miscellaneous flags */ | 1138 | __le32 s_flags; /* Miscellaneous flags */ |
1132 | __le16 s_raid_stride; /* RAID stride */ | 1139 | __le16 s_raid_stride; /* RAID stride */ |
1133 | __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ | 1140 | __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ |
1134 | __le64 s_mmp_block; /* Block for multi-mount protection */ | 1141 | __le64 s_mmp_block; /* Block for multi-mount protection */ |
1135 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ | 1142 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ |
1136 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ | 1143 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ |
1137 | __u8 s_checksum_type; /* metadata checksum algorithm used */ | 1144 | __u8 s_checksum_type; /* metadata checksum algorithm used */ |
1138 | __le16 s_reserved_pad; | 1145 | __le16 s_reserved_pad; |
1139 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ | 1146 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ |
1140 | __le32 s_snapshot_inum; /* Inode number of active snapshot */ | 1147 | __le32 s_snapshot_inum; /* Inode number of active snapshot */ |
1141 | __le32 s_snapshot_id; /* sequential ID of active snapshot */ | 1148 | __le32 s_snapshot_id; /* sequential ID of active snapshot */ |
1142 | __le64 s_snapshot_r_blocks_count; /* reserved blocks for active | 1149 | __le64 s_snapshot_r_blocks_count; /* reserved blocks for active |
1143 | snapshot's future use */ | 1150 | snapshot's future use */ |
1144 | __le32 s_snapshot_list; /* inode number of the head of the | 1151 | __le32 s_snapshot_list; /* inode number of the head of the |
1145 | on-disk snapshot list */ | 1152 | on-disk snapshot list */ |
1146 | #define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count) | 1153 | #define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count) |
1147 | __le32 s_error_count; /* number of fs errors */ | 1154 | __le32 s_error_count; /* number of fs errors */ |
1148 | __le32 s_first_error_time; /* first time an error happened */ | 1155 | __le32 s_first_error_time; /* first time an error happened */ |
1149 | __le32 s_first_error_ino; /* inode involved in first error */ | 1156 | __le32 s_first_error_ino; /* inode involved in first error */ |
1150 | __le64 s_first_error_block; /* block involved of first error */ | 1157 | __le64 s_first_error_block; /* block involved of first error */ |
1151 | __u8 s_first_error_func[32]; /* function where the error happened */ | 1158 | __u8 s_first_error_func[32]; /* function where the error happened */ |
1152 | __le32 s_first_error_line; /* line number where error happened */ | 1159 | __le32 s_first_error_line; /* line number where error happened */ |
1153 | __le32 s_last_error_time; /* most recent time of an error */ | 1160 | __le32 s_last_error_time; /* most recent time of an error */ |
1154 | __le32 s_last_error_ino; /* inode involved in last error */ | 1161 | __le32 s_last_error_ino; /* inode involved in last error */ |
1155 | __le32 s_last_error_line; /* line number where error happened */ | 1162 | __le32 s_last_error_line; /* line number where error happened */ |
1156 | __le64 s_last_error_block; /* block involved of last error */ | 1163 | __le64 s_last_error_block; /* block involved of last error */ |
1157 | __u8 s_last_error_func[32]; /* function where the error happened */ | 1164 | __u8 s_last_error_func[32]; /* function where the error happened */ |
1158 | #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) | 1165 | #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) |
1159 | __u8 s_mount_opts[64]; | 1166 | __u8 s_mount_opts[64]; |
1160 | __le32 s_usr_quota_inum; /* inode for tracking user quota */ | 1167 | __le32 s_usr_quota_inum; /* inode for tracking user quota */ |
1161 | __le32 s_grp_quota_inum; /* inode for tracking group quota */ | 1168 | __le32 s_grp_quota_inum; /* inode for tracking group quota */ |
1162 | __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ | 1169 | __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ |
1163 | __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ | 1170 | __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ |
1164 | __le32 s_reserved[106]; /* Padding to the end of the block */ | 1171 | __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */ |
1172 | __le32 s_reserved[105]; /* Padding to the end of the block */ | ||
1165 | __le32 s_checksum; /* crc32c(superblock) */ | 1173 | __le32 s_checksum; /* crc32c(superblock) */ |
1166 | }; | 1174 | }; |
1167 | 1175 | ||
1168 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) | 1176 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) |
1169 | 1177 | ||
1170 | #ifdef __KERNEL__ | 1178 | #ifdef __KERNEL__ |
1171 | 1179 | ||
1172 | /* | 1180 | /* |
1173 | * run-time mount flags | 1181 | * run-time mount flags |
1174 | */ | 1182 | */ |
1175 | #define EXT4_MF_MNTDIR_SAMPLED 0x0001 | 1183 | #define EXT4_MF_MNTDIR_SAMPLED 0x0001 |
1176 | #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ | 1184 | #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ |
1177 | 1185 | ||
1178 | /* Number of quota types we support */ | 1186 | /* Number of quota types we support */ |
1179 | #define EXT4_MAXQUOTAS 2 | 1187 | #define EXT4_MAXQUOTAS 2 |
1180 | 1188 | ||
1181 | /* | 1189 | /* |
1182 | * fourth extended-fs super-block data in memory | 1190 | * fourth extended-fs super-block data in memory |
1183 | */ | 1191 | */ |
1184 | struct ext4_sb_info { | 1192 | struct ext4_sb_info { |
1185 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ | 1193 | unsigned long s_desc_size; /* Size of a group descriptor in bytes */ |
1186 | unsigned long s_inodes_per_block;/* Number of inodes per block */ | 1194 | unsigned long s_inodes_per_block;/* Number of inodes per block */ |
1187 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ | 1195 | unsigned long s_blocks_per_group;/* Number of blocks in a group */ |
1188 | unsigned long s_clusters_per_group; /* Number of clusters in a group */ | 1196 | unsigned long s_clusters_per_group; /* Number of clusters in a group */ |
1189 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ | 1197 | unsigned long s_inodes_per_group;/* Number of inodes in a group */ |
1190 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ | 1198 | unsigned long s_itb_per_group; /* Number of inode table blocks per group */ |
1191 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ | 1199 | unsigned long s_gdb_count; /* Number of group descriptor blocks */ |
1192 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ | 1200 | unsigned long s_desc_per_block; /* Number of group descriptors per block */ |
1193 | ext4_group_t s_groups_count; /* Number of groups in the fs */ | 1201 | ext4_group_t s_groups_count; /* Number of groups in the fs */ |
1194 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ | 1202 | ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ |
1195 | unsigned long s_overhead; /* # of fs overhead clusters */ | 1203 | unsigned long s_overhead; /* # of fs overhead clusters */ |
1196 | unsigned int s_cluster_ratio; /* Number of blocks per cluster */ | 1204 | unsigned int s_cluster_ratio; /* Number of blocks per cluster */ |
1197 | unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */ | 1205 | unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */ |
1198 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 1206 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
1199 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | 1207 | struct buffer_head * s_sbh; /* Buffer containing the super block */ |
1200 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ | 1208 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
1201 | struct buffer_head **s_group_desc; | 1209 | struct buffer_head **s_group_desc; |
1202 | unsigned int s_mount_opt; | 1210 | unsigned int s_mount_opt; |
1203 | unsigned int s_mount_opt2; | 1211 | unsigned int s_mount_opt2; |
1204 | unsigned int s_mount_flags; | 1212 | unsigned int s_mount_flags; |
1205 | unsigned int s_def_mount_opt; | 1213 | unsigned int s_def_mount_opt; |
1206 | ext4_fsblk_t s_sb_block; | 1214 | ext4_fsblk_t s_sb_block; |
1207 | atomic64_t s_resv_clusters; | 1215 | atomic64_t s_resv_clusters; |
1208 | kuid_t s_resuid; | 1216 | kuid_t s_resuid; |
1209 | kgid_t s_resgid; | 1217 | kgid_t s_resgid; |
1210 | unsigned short s_mount_state; | 1218 | unsigned short s_mount_state; |
1211 | unsigned short s_pad; | 1219 | unsigned short s_pad; |
1212 | int s_addr_per_block_bits; | 1220 | int s_addr_per_block_bits; |
1213 | int s_desc_per_block_bits; | 1221 | int s_desc_per_block_bits; |
1214 | int s_inode_size; | 1222 | int s_inode_size; |
1215 | int s_first_ino; | 1223 | int s_first_ino; |
1216 | unsigned int s_inode_readahead_blks; | 1224 | unsigned int s_inode_readahead_blks; |
1217 | unsigned int s_inode_goal; | 1225 | unsigned int s_inode_goal; |
1218 | spinlock_t s_next_gen_lock; | 1226 | spinlock_t s_next_gen_lock; |
1219 | u32 s_next_generation; | 1227 | u32 s_next_generation; |
1220 | u32 s_hash_seed[4]; | 1228 | u32 s_hash_seed[4]; |
1221 | int s_def_hash_version; | 1229 | int s_def_hash_version; |
1222 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ | 1230 | int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ |
1223 | struct percpu_counter s_freeclusters_counter; | 1231 | struct percpu_counter s_freeclusters_counter; |
1224 | struct percpu_counter s_freeinodes_counter; | 1232 | struct percpu_counter s_freeinodes_counter; |
1225 | struct percpu_counter s_dirs_counter; | 1233 | struct percpu_counter s_dirs_counter; |
1226 | struct percpu_counter s_dirtyclusters_counter; | 1234 | struct percpu_counter s_dirtyclusters_counter; |
1227 | struct blockgroup_lock *s_blockgroup_lock; | 1235 | struct blockgroup_lock *s_blockgroup_lock; |
1228 | struct proc_dir_entry *s_proc; | 1236 | struct proc_dir_entry *s_proc; |
1229 | struct kobject s_kobj; | 1237 | struct kobject s_kobj; |
1230 | struct completion s_kobj_unregister; | 1238 | struct completion s_kobj_unregister; |
1231 | struct super_block *s_sb; | 1239 | struct super_block *s_sb; |
1232 | 1240 | ||
1233 | /* Journaling */ | 1241 | /* Journaling */ |
1234 | struct journal_s *s_journal; | 1242 | struct journal_s *s_journal; |
1235 | struct list_head s_orphan; | 1243 | struct list_head s_orphan; |
1236 | struct mutex s_orphan_lock; | 1244 | struct mutex s_orphan_lock; |
1237 | unsigned long s_resize_flags; /* Flags indicating if there | 1245 | unsigned long s_resize_flags; /* Flags indicating if there |
1238 | is a resizer */ | 1246 | is a resizer */ |
1239 | unsigned long s_commit_interval; | 1247 | unsigned long s_commit_interval; |
1240 | u32 s_max_batch_time; | 1248 | u32 s_max_batch_time; |
1241 | u32 s_min_batch_time; | 1249 | u32 s_min_batch_time; |
1242 | struct block_device *journal_bdev; | 1250 | struct block_device *journal_bdev; |
1243 | #ifdef CONFIG_QUOTA | 1251 | #ifdef CONFIG_QUOTA |
1244 | char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */ | 1252 | char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */ |
1245 | int s_jquota_fmt; /* Format of quota to use */ | 1253 | int s_jquota_fmt; /* Format of quota to use */ |
1246 | #endif | 1254 | #endif |
1247 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ | 1255 | unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ |
1248 | struct rb_root system_blks; | 1256 | struct rb_root system_blks; |
1249 | 1257 | ||
1250 | #ifdef EXTENTS_STATS | 1258 | #ifdef EXTENTS_STATS |
1251 | /* ext4 extents stats */ | 1259 | /* ext4 extents stats */ |
1252 | unsigned long s_ext_min; | 1260 | unsigned long s_ext_min; |
1253 | unsigned long s_ext_max; | 1261 | unsigned long s_ext_max; |
1254 | unsigned long s_depth_max; | 1262 | unsigned long s_depth_max; |
1255 | spinlock_t s_ext_stats_lock; | 1263 | spinlock_t s_ext_stats_lock; |
1256 | unsigned long s_ext_blocks; | 1264 | unsigned long s_ext_blocks; |
1257 | unsigned long s_ext_extents; | 1265 | unsigned long s_ext_extents; |
1258 | #endif | 1266 | #endif |
1259 | 1267 | ||
1260 | /* for buddy allocator */ | 1268 | /* for buddy allocator */ |
1261 | struct ext4_group_info ***s_group_info; | 1269 | struct ext4_group_info ***s_group_info; |
1262 | struct inode *s_buddy_cache; | 1270 | struct inode *s_buddy_cache; |
1263 | spinlock_t s_md_lock; | 1271 | spinlock_t s_md_lock; |
1264 | unsigned short *s_mb_offsets; | 1272 | unsigned short *s_mb_offsets; |
1265 | unsigned int *s_mb_maxs; | 1273 | unsigned int *s_mb_maxs; |
1266 | unsigned int s_group_info_size; | 1274 | unsigned int s_group_info_size; |
1267 | 1275 | ||
1268 | /* tunables */ | 1276 | /* tunables */ |
1269 | unsigned long s_stripe; | 1277 | unsigned long s_stripe; |
1270 | unsigned int s_mb_stream_request; | 1278 | unsigned int s_mb_stream_request; |
1271 | unsigned int s_mb_max_to_scan; | 1279 | unsigned int s_mb_max_to_scan; |
1272 | unsigned int s_mb_min_to_scan; | 1280 | unsigned int s_mb_min_to_scan; |
1273 | unsigned int s_mb_stats; | 1281 | unsigned int s_mb_stats; |
1274 | unsigned int s_mb_order2_reqs; | 1282 | unsigned int s_mb_order2_reqs; |
1275 | unsigned int s_mb_group_prealloc; | 1283 | unsigned int s_mb_group_prealloc; |
1276 | unsigned int s_max_dir_size_kb; | 1284 | unsigned int s_max_dir_size_kb; |
1277 | /* where last allocation was done - for stream allocation */ | 1285 | /* where last allocation was done - for stream allocation */ |
1278 | unsigned long s_mb_last_group; | 1286 | unsigned long s_mb_last_group; |
1279 | unsigned long s_mb_last_start; | 1287 | unsigned long s_mb_last_start; |
1280 | 1288 | ||
1281 | /* stats for buddy allocator */ | 1289 | /* stats for buddy allocator */ |
1282 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | 1290 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ |
1283 | atomic_t s_bal_success; /* we found long enough chunks */ | 1291 | atomic_t s_bal_success; /* we found long enough chunks */ |
1284 | atomic_t s_bal_allocated; /* in blocks */ | 1292 | atomic_t s_bal_allocated; /* in blocks */ |
1285 | atomic_t s_bal_ex_scanned; /* total extents scanned */ | 1293 | atomic_t s_bal_ex_scanned; /* total extents scanned */ |
1286 | atomic_t s_bal_goals; /* goal hits */ | 1294 | atomic_t s_bal_goals; /* goal hits */ |
1287 | atomic_t s_bal_breaks; /* too long searches */ | 1295 | atomic_t s_bal_breaks; /* too long searches */ |
1288 | atomic_t s_bal_2orders; /* 2^order hits */ | 1296 | atomic_t s_bal_2orders; /* 2^order hits */ |
1289 | spinlock_t s_bal_lock; | 1297 | spinlock_t s_bal_lock; |
1290 | unsigned long s_mb_buddies_generated; | 1298 | unsigned long s_mb_buddies_generated; |
1291 | unsigned long long s_mb_generation_time; | 1299 | unsigned long long s_mb_generation_time; |
1292 | atomic_t s_mb_lost_chunks; | 1300 | atomic_t s_mb_lost_chunks; |
1293 | atomic_t s_mb_preallocated; | 1301 | atomic_t s_mb_preallocated; |
1294 | atomic_t s_mb_discarded; | 1302 | atomic_t s_mb_discarded; |
1295 | atomic_t s_lock_busy; | 1303 | atomic_t s_lock_busy; |
1296 | 1304 | ||
1297 | /* locality groups */ | 1305 | /* locality groups */ |
1298 | struct ext4_locality_group __percpu *s_locality_groups; | 1306 | struct ext4_locality_group __percpu *s_locality_groups; |
1299 | 1307 | ||
1300 | /* for write statistics */ | 1308 | /* for write statistics */ |
1301 | unsigned long s_sectors_written_start; | 1309 | unsigned long s_sectors_written_start; |
1302 | u64 s_kbytes_written; | 1310 | u64 s_kbytes_written; |
1303 | 1311 | ||
1304 | /* the size of zero-out chunk */ | 1312 | /* the size of zero-out chunk */ |
1305 | unsigned int s_extent_max_zeroout_kb; | 1313 | unsigned int s_extent_max_zeroout_kb; |
1306 | 1314 | ||
1307 | unsigned int s_log_groups_per_flex; | 1315 | unsigned int s_log_groups_per_flex; |
1308 | struct flex_groups *s_flex_groups; | 1316 | struct flex_groups *s_flex_groups; |
1309 | ext4_group_t s_flex_groups_allocated; | 1317 | ext4_group_t s_flex_groups_allocated; |
1310 | 1318 | ||
1311 | /* workqueue for reserved extent conversions (buffered io) */ | 1319 | /* workqueue for reserved extent conversions (buffered io) */ |
1312 | struct workqueue_struct *rsv_conversion_wq; | 1320 | struct workqueue_struct *rsv_conversion_wq; |
1313 | 1321 | ||
1314 | /* timer for periodic error stats printing */ | 1322 | /* timer for periodic error stats printing */ |
1315 | struct timer_list s_err_report; | 1323 | struct timer_list s_err_report; |
1316 | 1324 | ||
1317 | /* Lazy inode table initialization info */ | 1325 | /* Lazy inode table initialization info */ |
1318 | struct ext4_li_request *s_li_request; | 1326 | struct ext4_li_request *s_li_request; |
1319 | /* Wait multiplier for lazy initialization thread */ | 1327 | /* Wait multiplier for lazy initialization thread */ |
1320 | unsigned int s_li_wait_mult; | 1328 | unsigned int s_li_wait_mult; |
1321 | 1329 | ||
1322 | /* Kernel thread for multiple mount protection */ | 1330 | /* Kernel thread for multiple mount protection */ |
1323 | struct task_struct *s_mmp_tsk; | 1331 | struct task_struct *s_mmp_tsk; |
1324 | 1332 | ||
1325 | /* record the last minlen when FITRIM is called. */ | 1333 | /* record the last minlen when FITRIM is called. */ |
1326 | atomic_t s_last_trim_minblks; | 1334 | atomic_t s_last_trim_minblks; |
1327 | 1335 | ||
1328 | /* Reference to checksum algorithm driver via cryptoapi */ | 1336 | /* Reference to checksum algorithm driver via cryptoapi */ |
1329 | struct crypto_shash *s_chksum_driver; | 1337 | struct crypto_shash *s_chksum_driver; |
1330 | 1338 | ||
1331 | /* Precomputed FS UUID checksum for seeding other checksums */ | 1339 | /* Precomputed FS UUID checksum for seeding other checksums */ |
1332 | __u32 s_csum_seed; | 1340 | __u32 s_csum_seed; |
1333 | 1341 | ||
1334 | /* Reclaim extents from extent status tree */ | 1342 | /* Reclaim extents from extent status tree */ |
1335 | struct shrinker s_es_shrinker; | 1343 | struct shrinker s_es_shrinker; |
1336 | struct list_head s_es_list; /* List of inodes with reclaimable extents */ | 1344 | struct list_head s_es_list; /* List of inodes with reclaimable extents */ |
1337 | long s_es_nr_inode; | 1345 | long s_es_nr_inode; |
1338 | struct ext4_es_stats s_es_stats; | 1346 | struct ext4_es_stats s_es_stats; |
1339 | struct mb_cache *s_mb_cache; | 1347 | struct mb_cache *s_mb_cache; |
1340 | spinlock_t s_es_lock ____cacheline_aligned_in_smp; | 1348 | spinlock_t s_es_lock ____cacheline_aligned_in_smp; |
1341 | 1349 | ||
1342 | /* Ratelimit ext4 messages. */ | 1350 | /* Ratelimit ext4 messages. */ |
1343 | struct ratelimit_state s_err_ratelimit_state; | 1351 | struct ratelimit_state s_err_ratelimit_state; |
1344 | struct ratelimit_state s_warning_ratelimit_state; | 1352 | struct ratelimit_state s_warning_ratelimit_state; |
1345 | struct ratelimit_state s_msg_ratelimit_state; | 1353 | struct ratelimit_state s_msg_ratelimit_state; |
1346 | }; | 1354 | }; |
1347 | 1355 | ||
1348 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1356 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
1349 | { | 1357 | { |
1350 | return sb->s_fs_info; | 1358 | return sb->s_fs_info; |
1351 | } | 1359 | } |
1352 | static inline struct ext4_inode_info *EXT4_I(struct inode *inode) | 1360 | static inline struct ext4_inode_info *EXT4_I(struct inode *inode) |
1353 | { | 1361 | { |
1354 | return container_of(inode, struct ext4_inode_info, vfs_inode); | 1362 | return container_of(inode, struct ext4_inode_info, vfs_inode); |
1355 | } | 1363 | } |
1356 | 1364 | ||
1357 | static inline struct timespec ext4_current_time(struct inode *inode) | 1365 | static inline struct timespec ext4_current_time(struct inode *inode) |
1358 | { | 1366 | { |
1359 | return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? | 1367 | return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? |
1360 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 1368 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
1361 | } | 1369 | } |
1362 | 1370 | ||
1363 | static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | 1371 | static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) |
1364 | { | 1372 | { |
1365 | return ino == EXT4_ROOT_INO || | 1373 | return ino == EXT4_ROOT_INO || |
1366 | ino == EXT4_USR_QUOTA_INO || | 1374 | ino == EXT4_USR_QUOTA_INO || |
1367 | ino == EXT4_GRP_QUOTA_INO || | 1375 | ino == EXT4_GRP_QUOTA_INO || |
1368 | ino == EXT4_BOOT_LOADER_INO || | 1376 | ino == EXT4_BOOT_LOADER_INO || |
1369 | ino == EXT4_JOURNAL_INO || | 1377 | ino == EXT4_JOURNAL_INO || |
1370 | ino == EXT4_RESIZE_INO || | 1378 | ino == EXT4_RESIZE_INO || |
1371 | (ino >= EXT4_FIRST_INO(sb) && | 1379 | (ino >= EXT4_FIRST_INO(sb) && |
1372 | ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); | 1380 | ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); |
1373 | } | 1381 | } |
1374 | 1382 | ||
1375 | static inline void ext4_set_io_unwritten_flag(struct inode *inode, | 1383 | static inline void ext4_set_io_unwritten_flag(struct inode *inode, |
1376 | struct ext4_io_end *io_end) | 1384 | struct ext4_io_end *io_end) |
1377 | { | 1385 | { |
1378 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 1386 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
1379 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 1387 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
1380 | atomic_inc(&EXT4_I(inode)->i_unwritten); | 1388 | atomic_inc(&EXT4_I(inode)->i_unwritten); |
1381 | } | 1389 | } |
1382 | } | 1390 | } |
1383 | 1391 | ||
1384 | static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode) | 1392 | static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode) |
1385 | { | 1393 | { |
1386 | return inode->i_private; | 1394 | return inode->i_private; |
1387 | } | 1395 | } |
1388 | 1396 | ||
1389 | static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io) | 1397 | static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io) |
1390 | { | 1398 | { |
1391 | inode->i_private = io; | 1399 | inode->i_private = io; |
1392 | } | 1400 | } |
1393 | 1401 | ||
1394 | /* | 1402 | /* |
1395 | * Inode dynamic state flags | 1403 | * Inode dynamic state flags |
1396 | */ | 1404 | */ |
1397 | enum { | 1405 | enum { |
1398 | EXT4_STATE_JDATA, /* journaled data exists */ | 1406 | EXT4_STATE_JDATA, /* journaled data exists */ |
1399 | EXT4_STATE_NEW, /* inode is newly created */ | 1407 | EXT4_STATE_NEW, /* inode is newly created */ |
1400 | EXT4_STATE_XATTR, /* has in-inode xattrs */ | 1408 | EXT4_STATE_XATTR, /* has in-inode xattrs */ |
1401 | EXT4_STATE_NO_EXPAND, /* No space for expansion */ | 1409 | EXT4_STATE_NO_EXPAND, /* No space for expansion */ |
1402 | EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ | 1410 | EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ |
1403 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ | 1411 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ |
1404 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ | 1412 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ |
1405 | EXT4_STATE_NEWENTRY, /* File just added to dir */ | 1413 | EXT4_STATE_NEWENTRY, /* File just added to dir */ |
1406 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read | 1414 | EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read |
1407 | nolocking */ | 1415 | nolocking */ |
1408 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ | 1416 | EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ |
1409 | EXT4_STATE_ORDERED_MODE, /* data=ordered mode */ | 1417 | EXT4_STATE_ORDERED_MODE, /* data=ordered mode */ |
1410 | EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ | 1418 | EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ |
1411 | }; | 1419 | }; |
1412 | 1420 | ||
1413 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ | 1421 | #define EXT4_INODE_BIT_FNS(name, field, offset) \ |
1414 | static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ | 1422 | static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ |
1415 | { \ | 1423 | { \ |
1416 | return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ | 1424 | return test_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ |
1417 | } \ | 1425 | } \ |
1418 | static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ | 1426 | static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ |
1419 | { \ | 1427 | { \ |
1420 | set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ | 1428 | set_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ |
1421 | } \ | 1429 | } \ |
1422 | static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ | 1430 | static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ |
1423 | { \ | 1431 | { \ |
1424 | clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ | 1432 | clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ |
1425 | } | 1433 | } |
1426 | 1434 | ||
1427 | /* Add these declarations here only so that these functions can be | 1435 | /* Add these declarations here only so that these functions can be |
1428 | * found by name. Otherwise, they are very hard to locate. */ | 1436 | * found by name. Otherwise, they are very hard to locate. */ |
1429 | static inline int ext4_test_inode_flag(struct inode *inode, int bit); | 1437 | static inline int ext4_test_inode_flag(struct inode *inode, int bit); |
1430 | static inline void ext4_set_inode_flag(struct inode *inode, int bit); | 1438 | static inline void ext4_set_inode_flag(struct inode *inode, int bit); |
1431 | static inline void ext4_clear_inode_flag(struct inode *inode, int bit); | 1439 | static inline void ext4_clear_inode_flag(struct inode *inode, int bit); |
1432 | EXT4_INODE_BIT_FNS(flag, flags, 0) | 1440 | EXT4_INODE_BIT_FNS(flag, flags, 0) |
1433 | 1441 | ||
1434 | /* Add these declarations here only so that these functions can be | 1442 | /* Add these declarations here only so that these functions can be |
1435 | * found by name. Otherwise, they are very hard to locate. */ | 1443 | * found by name. Otherwise, they are very hard to locate. */ |
1436 | static inline int ext4_test_inode_state(struct inode *inode, int bit); | 1444 | static inline int ext4_test_inode_state(struct inode *inode, int bit); |
1437 | static inline void ext4_set_inode_state(struct inode *inode, int bit); | 1445 | static inline void ext4_set_inode_state(struct inode *inode, int bit); |
1438 | static inline void ext4_clear_inode_state(struct inode *inode, int bit); | 1446 | static inline void ext4_clear_inode_state(struct inode *inode, int bit); |
1439 | #if (BITS_PER_LONG < 64) | 1447 | #if (BITS_PER_LONG < 64) |
1440 | EXT4_INODE_BIT_FNS(state, state_flags, 0) | 1448 | EXT4_INODE_BIT_FNS(state, state_flags, 0) |
1441 | 1449 | ||
1442 | static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | 1450 | static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) |
1443 | { | 1451 | { |
1444 | (ei)->i_state_flags = 0; | 1452 | (ei)->i_state_flags = 0; |
1445 | } | 1453 | } |
1446 | #else | 1454 | #else |
1447 | EXT4_INODE_BIT_FNS(state, flags, 32) | 1455 | EXT4_INODE_BIT_FNS(state, flags, 32) |
1448 | 1456 | ||
1449 | static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | 1457 | static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) |
1450 | { | 1458 | { |
1451 | /* We depend on the fact that callers will set i_flags */ | 1459 | /* We depend on the fact that callers will set i_flags */ |
1452 | } | 1460 | } |
1453 | #endif | 1461 | #endif |
1454 | #else | 1462 | #else |
1455 | /* Assume that user mode programs are passing in an ext4fs superblock, not | 1463 | /* Assume that user mode programs are passing in an ext4fs superblock, not |
1456 | * a kernel struct super_block. This will allow us to call the feature-test | 1464 | * a kernel struct super_block. This will allow us to call the feature-test |
1457 | * macros from user land. */ | 1465 | * macros from user land. */ |
1458 | #define EXT4_SB(sb) (sb) | 1466 | #define EXT4_SB(sb) (sb) |
1459 | #endif | 1467 | #endif |
1460 | 1468 | ||
1461 | #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime | 1469 | #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime |
1462 | 1470 | ||
1463 | /* | 1471 | /* |
1464 | * Codes for operating systems | 1472 | * Codes for operating systems |
1465 | */ | 1473 | */ |
1466 | #define EXT4_OS_LINUX 0 | 1474 | #define EXT4_OS_LINUX 0 |
1467 | #define EXT4_OS_HURD 1 | 1475 | #define EXT4_OS_HURD 1 |
1468 | #define EXT4_OS_MASIX 2 | 1476 | #define EXT4_OS_MASIX 2 |
1469 | #define EXT4_OS_FREEBSD 3 | 1477 | #define EXT4_OS_FREEBSD 3 |
1470 | #define EXT4_OS_LITES 4 | 1478 | #define EXT4_OS_LITES 4 |
1471 | 1479 | ||
1472 | /* | 1480 | /* |
1473 | * Revision levels | 1481 | * Revision levels |
1474 | */ | 1482 | */ |
1475 | #define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ | 1483 | #define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */ |
1476 | #define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ | 1484 | #define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ |
1477 | 1485 | ||
1478 | #define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV | 1486 | #define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV |
1479 | #define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV | 1487 | #define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV |
1480 | 1488 | ||
1481 | #define EXT4_GOOD_OLD_INODE_SIZE 128 | 1489 | #define EXT4_GOOD_OLD_INODE_SIZE 128 |
1482 | 1490 | ||
1483 | /* | 1491 | /* |
1484 | * Feature set definitions | 1492 | * Feature set definitions |
1485 | */ | 1493 | */ |
1486 | 1494 | ||
1487 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ | 1495 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ |
1488 | ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0) | 1496 | ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0) |
1489 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ | 1497 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ |
1490 | ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0) | 1498 | ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0) |
1491 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ | 1499 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ |
1492 | ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0) | 1500 | ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0) |
1493 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ | 1501 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ |
1494 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) | 1502 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) |
1495 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ | 1503 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ |
1496 | EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) | 1504 | EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) |
1497 | #define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ | 1505 | #define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \ |
1498 | EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) | 1506 | EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) |
1499 | #define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ | 1507 | #define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \ |
1500 | EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) | 1508 | EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) |
1501 | #define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ | 1509 | #define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ |
1502 | EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) | 1510 | EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) |
1503 | #define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ | 1511 | #define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \ |
1504 | EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) | 1512 | EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) |
1505 | 1513 | ||
1506 | #define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 | 1514 | #define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001 |
1507 | #define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 | 1515 | #define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002 |
1508 | #define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 | 1516 | #define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004 |
1509 | #define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 | 1517 | #define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 |
1510 | #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 | 1518 | #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 |
1511 | #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 | 1519 | #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 |
1512 | #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 | 1520 | #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 |
1513 | 1521 | ||
1514 | #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 | 1522 | #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 |
1515 | #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 | 1523 | #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 |
1516 | #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 | 1524 | #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 |
1517 | #define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 | 1525 | #define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008 |
1518 | #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 | 1526 | #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 |
1519 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 | 1527 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 |
1520 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 | 1528 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 |
1521 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 | 1529 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 |
1522 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 | 1530 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 |
1523 | /* | 1531 | /* |
1524 | * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When | 1532 | * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When |
1525 | * METADATA_CSUM is set, group descriptor checksums use the same algorithm as | 1533 | * METADATA_CSUM is set, group descriptor checksums use the same algorithm as |
1526 | * all other data structures' checksums. However, the METADATA_CSUM and | 1534 | * all other data structures' checksums. However, the METADATA_CSUM and |
1527 | * GDT_CSUM bits are mutually exclusive. | 1535 | * GDT_CSUM bits are mutually exclusive. |
1528 | */ | 1536 | */ |
1529 | #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 | 1537 | #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 |
1538 | #define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000 | ||
1530 | 1539 | ||
1531 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 | 1540 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 |
1532 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 | 1541 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 |
1533 | #define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ | 1542 | #define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ |
1534 | #define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ | 1543 | #define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ |
1535 | #define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 | 1544 | #define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 |
1536 | #define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ | 1545 | #define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ |
1537 | #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 | 1546 | #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 |
1538 | #define EXT4_FEATURE_INCOMPAT_MMP 0x0100 | 1547 | #define EXT4_FEATURE_INCOMPAT_MMP 0x0100 |
1539 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 | 1548 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 |
1540 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ | 1549 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ |
1541 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ | 1550 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ |
1542 | #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ | 1551 | #define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */ |
1543 | #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ | 1552 | #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ |
1544 | #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ | 1553 | #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ |
1554 | #define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000 | ||
1545 | 1555 | ||
1546 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | 1556 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR |
1547 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1557 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
1548 | EXT4_FEATURE_INCOMPAT_META_BG) | 1558 | EXT4_FEATURE_INCOMPAT_META_BG) |
1549 | #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | 1559 | #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ |
1550 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | 1560 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ |
1551 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR) | 1561 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR) |
1552 | 1562 | ||
1553 | #define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | 1563 | #define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR |
1554 | #define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1564 | #define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
1555 | EXT4_FEATURE_INCOMPAT_RECOVER| \ | 1565 | EXT4_FEATURE_INCOMPAT_RECOVER| \ |
1556 | EXT4_FEATURE_INCOMPAT_META_BG) | 1566 | EXT4_FEATURE_INCOMPAT_META_BG) |
1557 | #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | 1567 | #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ |
1558 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | 1568 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ |
1559 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR) | 1569 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR) |
1560 | 1570 | ||
1561 | #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR | 1571 | #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR |
1562 | #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1572 | #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
1563 | EXT4_FEATURE_INCOMPAT_RECOVER| \ | 1573 | EXT4_FEATURE_INCOMPAT_RECOVER| \ |
1564 | EXT4_FEATURE_INCOMPAT_META_BG| \ | 1574 | EXT4_FEATURE_INCOMPAT_META_BG| \ |
1565 | EXT4_FEATURE_INCOMPAT_EXTENTS| \ | 1575 | EXT4_FEATURE_INCOMPAT_EXTENTS| \ |
1566 | EXT4_FEATURE_INCOMPAT_64BIT| \ | 1576 | EXT4_FEATURE_INCOMPAT_64BIT| \ |
1567 | EXT4_FEATURE_INCOMPAT_FLEX_BG| \ | 1577 | EXT4_FEATURE_INCOMPAT_FLEX_BG| \ |
1568 | EXT4_FEATURE_INCOMPAT_MMP | \ | 1578 | EXT4_FEATURE_INCOMPAT_MMP | \ |
1569 | EXT4_FEATURE_INCOMPAT_INLINE_DATA) | 1579 | EXT4_FEATURE_INCOMPAT_INLINE_DATA) |
1570 | #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | 1580 | #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ |
1571 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | 1581 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ |
1572 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ | 1582 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ |
1573 | EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ | 1583 | EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ |
1574 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ | 1584 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ |
1575 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ | 1585 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ |
1576 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ | 1586 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ |
1577 | EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ | 1587 | EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ |
1578 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ | 1588 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ |
1579 | EXT4_FEATURE_RO_COMPAT_QUOTA) | 1589 | EXT4_FEATURE_RO_COMPAT_QUOTA) |
1580 | 1590 | ||
1581 | /* | 1591 | /* |
1582 | * Default values for user and/or group using reserved blocks | 1592 | * Default values for user and/or group using reserved blocks |
1583 | */ | 1593 | */ |
1584 | #define EXT4_DEF_RESUID 0 | 1594 | #define EXT4_DEF_RESUID 0 |
1585 | #define EXT4_DEF_RESGID 0 | 1595 | #define EXT4_DEF_RESGID 0 |
1586 | 1596 | ||
1587 | #define EXT4_DEF_INODE_READAHEAD_BLKS 32 | 1597 | #define EXT4_DEF_INODE_READAHEAD_BLKS 32 |
1588 | 1598 | ||
1589 | /* | 1599 | /* |
1590 | * Default mount options | 1600 | * Default mount options |
1591 | */ | 1601 | */ |
1592 | #define EXT4_DEFM_DEBUG 0x0001 | 1602 | #define EXT4_DEFM_DEBUG 0x0001 |
1593 | #define EXT4_DEFM_BSDGROUPS 0x0002 | 1603 | #define EXT4_DEFM_BSDGROUPS 0x0002 |
1594 | #define EXT4_DEFM_XATTR_USER 0x0004 | 1604 | #define EXT4_DEFM_XATTR_USER 0x0004 |
1595 | #define EXT4_DEFM_ACL 0x0008 | 1605 | #define EXT4_DEFM_ACL 0x0008 |
1596 | #define EXT4_DEFM_UID16 0x0010 | 1606 | #define EXT4_DEFM_UID16 0x0010 |
1597 | #define EXT4_DEFM_JMODE 0x0060 | 1607 | #define EXT4_DEFM_JMODE 0x0060 |
1598 | #define EXT4_DEFM_JMODE_DATA 0x0020 | 1608 | #define EXT4_DEFM_JMODE_DATA 0x0020 |
1599 | #define EXT4_DEFM_JMODE_ORDERED 0x0040 | 1609 | #define EXT4_DEFM_JMODE_ORDERED 0x0040 |
1600 | #define EXT4_DEFM_JMODE_WBACK 0x0060 | 1610 | #define EXT4_DEFM_JMODE_WBACK 0x0060 |
1601 | #define EXT4_DEFM_NOBARRIER 0x0100 | 1611 | #define EXT4_DEFM_NOBARRIER 0x0100 |
1602 | #define EXT4_DEFM_BLOCK_VALIDITY 0x0200 | 1612 | #define EXT4_DEFM_BLOCK_VALIDITY 0x0200 |
1603 | #define EXT4_DEFM_DISCARD 0x0400 | 1613 | #define EXT4_DEFM_DISCARD 0x0400 |
1604 | #define EXT4_DEFM_NODELALLOC 0x0800 | 1614 | #define EXT4_DEFM_NODELALLOC 0x0800 |
1605 | 1615 | ||
1606 | /* | 1616 | /* |
1607 | * Default journal batch times | 1617 | * Default journal batch times |
1608 | */ | 1618 | */ |
1609 | #define EXT4_DEF_MIN_BATCH_TIME 0 | 1619 | #define EXT4_DEF_MIN_BATCH_TIME 0 |
1610 | #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ | 1620 | #define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */ |
1611 | 1621 | ||
1612 | /* | 1622 | /* |
1613 | * Minimum number of groups in a flexgroup before we separate out | 1623 | * Minimum number of groups in a flexgroup before we separate out |
1614 | * directories into the first block group of a flexgroup | 1624 | * directories into the first block group of a flexgroup |
1615 | */ | 1625 | */ |
1616 | #define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4 | 1626 | #define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4 |
1617 | 1627 | ||
1618 | /* | 1628 | /* |
1619 | * Structure of a directory entry | 1629 | * Structure of a directory entry |
1620 | */ | 1630 | */ |
1621 | #define EXT4_NAME_LEN 255 | 1631 | #define EXT4_NAME_LEN 255 |
1622 | 1632 | ||
1623 | struct ext4_dir_entry { | 1633 | struct ext4_dir_entry { |
1624 | __le32 inode; /* Inode number */ | 1634 | __le32 inode; /* Inode number */ |
1625 | __le16 rec_len; /* Directory entry length */ | 1635 | __le16 rec_len; /* Directory entry length */ |
1626 | __le16 name_len; /* Name length */ | 1636 | __le16 name_len; /* Name length */ |
1627 | char name[EXT4_NAME_LEN]; /* File name */ | 1637 | char name[EXT4_NAME_LEN]; /* File name */ |
1628 | }; | 1638 | }; |
1629 | 1639 | ||
1630 | /* | 1640 | /* |
1631 | * The new version of the directory entry. Since EXT4 structures are | 1641 | * The new version of the directory entry. Since EXT4 structures are |
1632 | * stored in intel byte order, and the name_len field could never be | 1642 | * stored in intel byte order, and the name_len field could never be |
1633 | * bigger than 255 chars, it's safe to reclaim the extra byte for the | 1643 | * bigger than 255 chars, it's safe to reclaim the extra byte for the |
1634 | * file_type field. | 1644 | * file_type field. |
1635 | */ | 1645 | */ |
1636 | struct ext4_dir_entry_2 { | 1646 | struct ext4_dir_entry_2 { |
1637 | __le32 inode; /* Inode number */ | 1647 | __le32 inode; /* Inode number */ |
1638 | __le16 rec_len; /* Directory entry length */ | 1648 | __le16 rec_len; /* Directory entry length */ |
1639 | __u8 name_len; /* Name length */ | 1649 | __u8 name_len; /* Name length */ |
1640 | __u8 file_type; | 1650 | __u8 file_type; |
1641 | char name[EXT4_NAME_LEN]; /* File name */ | 1651 | char name[EXT4_NAME_LEN]; /* File name */ |
1642 | }; | 1652 | }; |
1643 | 1653 | ||
1644 | /* | 1654 | /* |
1645 | * This is a bogus directory entry at the end of each leaf block that | 1655 | * This is a bogus directory entry at the end of each leaf block that |
1646 | * records checksums. | 1656 | * records checksums. |
1647 | */ | 1657 | */ |
1648 | struct ext4_dir_entry_tail { | 1658 | struct ext4_dir_entry_tail { |
1649 | __le32 det_reserved_zero1; /* Pretend to be unused */ | 1659 | __le32 det_reserved_zero1; /* Pretend to be unused */ |
1650 | __le16 det_rec_len; /* 12 */ | 1660 | __le16 det_rec_len; /* 12 */ |
1651 | __u8 det_reserved_zero2; /* Zero name length */ | 1661 | __u8 det_reserved_zero2; /* Zero name length */ |
1652 | __u8 det_reserved_ft; /* 0xDE, fake file type */ | 1662 | __u8 det_reserved_ft; /* 0xDE, fake file type */ |
1653 | __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ | 1663 | __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ |
1654 | }; | 1664 | }; |
1655 | 1665 | ||
1656 | #define EXT4_DIRENT_TAIL(block, blocksize) \ | 1666 | #define EXT4_DIRENT_TAIL(block, blocksize) \ |
1657 | ((struct ext4_dir_entry_tail *)(((void *)(block)) + \ | 1667 | ((struct ext4_dir_entry_tail *)(((void *)(block)) + \ |
1658 | ((blocksize) - \ | 1668 | ((blocksize) - \ |
1659 | sizeof(struct ext4_dir_entry_tail)))) | 1669 | sizeof(struct ext4_dir_entry_tail)))) |
1660 | 1670 | ||
1661 | /* | 1671 | /* |
1662 | * Ext4 directory file types. Only the low 3 bits are used. The | 1672 | * Ext4 directory file types. Only the low 3 bits are used. The |
1663 | * other bits are reserved for now. | 1673 | * other bits are reserved for now. |
1664 | */ | 1674 | */ |
1665 | #define EXT4_FT_UNKNOWN 0 | 1675 | #define EXT4_FT_UNKNOWN 0 |
1666 | #define EXT4_FT_REG_FILE 1 | 1676 | #define EXT4_FT_REG_FILE 1 |
1667 | #define EXT4_FT_DIR 2 | 1677 | #define EXT4_FT_DIR 2 |
1668 | #define EXT4_FT_CHRDEV 3 | 1678 | #define EXT4_FT_CHRDEV 3 |
1669 | #define EXT4_FT_BLKDEV 4 | 1679 | #define EXT4_FT_BLKDEV 4 |
1670 | #define EXT4_FT_FIFO 5 | 1680 | #define EXT4_FT_FIFO 5 |
1671 | #define EXT4_FT_SOCK 6 | 1681 | #define EXT4_FT_SOCK 6 |
1672 | #define EXT4_FT_SYMLINK 7 | 1682 | #define EXT4_FT_SYMLINK 7 |
1673 | 1683 | ||
1674 | #define EXT4_FT_MAX 8 | 1684 | #define EXT4_FT_MAX 8 |
1675 | 1685 | ||
1676 | #define EXT4_FT_DIR_CSUM 0xDE | 1686 | #define EXT4_FT_DIR_CSUM 0xDE |
1677 | 1687 | ||
1678 | /* | 1688 | /* |
1679 | * EXT4_DIR_PAD defines the directory entries boundaries | 1689 | * EXT4_DIR_PAD defines the directory entries boundaries |
1680 | * | 1690 | * |
1681 | * NOTE: It must be a multiple of 4 | 1691 | * NOTE: It must be a multiple of 4 |
1682 | */ | 1692 | */ |
1683 | #define EXT4_DIR_PAD 4 | 1693 | #define EXT4_DIR_PAD 4 |
1684 | #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) | 1694 | #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) |
1685 | #define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ | 1695 | #define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ |
1686 | ~EXT4_DIR_ROUND) | 1696 | ~EXT4_DIR_ROUND) |
1687 | #define EXT4_MAX_REC_LEN ((1<<16)-1) | 1697 | #define EXT4_MAX_REC_LEN ((1<<16)-1) |
1688 | 1698 | ||
1689 | /* | 1699 | /* |
1690 | * If we ever get support for fs block sizes > page_size, we'll need | 1700 | * If we ever get support for fs block sizes > page_size, we'll need |
1691 | * to remove the #if statements in the next two functions... | 1701 | * to remove the #if statements in the next two functions... |
1692 | */ | 1702 | */ |
1693 | static inline unsigned int | 1703 | static inline unsigned int |
1694 | ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) | 1704 | ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) |
1695 | { | 1705 | { |
1696 | unsigned len = le16_to_cpu(dlen); | 1706 | unsigned len = le16_to_cpu(dlen); |
1697 | 1707 | ||
1698 | #if (PAGE_CACHE_SIZE >= 65536) | 1708 | #if (PAGE_CACHE_SIZE >= 65536) |
1699 | if (len == EXT4_MAX_REC_LEN || len == 0) | 1709 | if (len == EXT4_MAX_REC_LEN || len == 0) |
1700 | return blocksize; | 1710 | return blocksize; |
1701 | return (len & 65532) | ((len & 3) << 16); | 1711 | return (len & 65532) | ((len & 3) << 16); |
1702 | #else | 1712 | #else |
1703 | return len; | 1713 | return len; |
1704 | #endif | 1714 | #endif |
1705 | } | 1715 | } |
1706 | 1716 | ||
1707 | static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | 1717 | static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) |
1708 | { | 1718 | { |
1709 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) | 1719 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) |
1710 | BUG(); | 1720 | BUG(); |
1711 | #if (PAGE_CACHE_SIZE >= 65536) | 1721 | #if (PAGE_CACHE_SIZE >= 65536) |
1712 | if (len < 65536) | 1722 | if (len < 65536) |
1713 | return cpu_to_le16(len); | 1723 | return cpu_to_le16(len); |
1714 | if (len == blocksize) { | 1724 | if (len == blocksize) { |
1715 | if (blocksize == 65536) | 1725 | if (blocksize == 65536) |
1716 | return cpu_to_le16(EXT4_MAX_REC_LEN); | 1726 | return cpu_to_le16(EXT4_MAX_REC_LEN); |
1717 | else | 1727 | else |
1718 | return cpu_to_le16(0); | 1728 | return cpu_to_le16(0); |
1719 | } | 1729 | } |
1720 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); | 1730 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); |
1721 | #else | 1731 | #else |
1722 | return cpu_to_le16(len); | 1732 | return cpu_to_le16(len); |
1723 | #endif | 1733 | #endif |
1724 | } | 1734 | } |
1725 | 1735 | ||
1726 | /* | 1736 | /* |
1727 | * Hash Tree Directory indexing | 1737 | * Hash Tree Directory indexing |
1728 | * (c) Daniel Phillips, 2001 | 1738 | * (c) Daniel Phillips, 2001 |
1729 | */ | 1739 | */ |
1730 | 1740 | ||
1731 | #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ | 1741 | #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ |
1732 | EXT4_FEATURE_COMPAT_DIR_INDEX) && \ | 1742 | EXT4_FEATURE_COMPAT_DIR_INDEX) && \ |
1733 | ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) | 1743 | ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) |
1734 | #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) | 1744 | #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) |
1735 | #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) | 1745 | #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) |
1736 | 1746 | ||
1737 | /* Legal values for the dx_root hash_version field: */ | 1747 | /* Legal values for the dx_root hash_version field: */ |
1738 | 1748 | ||
1739 | #define DX_HASH_LEGACY 0 | 1749 | #define DX_HASH_LEGACY 0 |
1740 | #define DX_HASH_HALF_MD4 1 | 1750 | #define DX_HASH_HALF_MD4 1 |
1741 | #define DX_HASH_TEA 2 | 1751 | #define DX_HASH_TEA 2 |
1742 | #define DX_HASH_LEGACY_UNSIGNED 3 | 1752 | #define DX_HASH_LEGACY_UNSIGNED 3 |
1743 | #define DX_HASH_HALF_MD4_UNSIGNED 4 | 1753 | #define DX_HASH_HALF_MD4_UNSIGNED 4 |
1744 | #define DX_HASH_TEA_UNSIGNED 5 | 1754 | #define DX_HASH_TEA_UNSIGNED 5 |
1745 | 1755 | ||
1746 | static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, | 1756 | static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, |
1747 | const void *address, unsigned int length) | 1757 | const void *address, unsigned int length) |
1748 | { | 1758 | { |
1749 | struct { | 1759 | struct { |
1750 | struct shash_desc shash; | 1760 | struct shash_desc shash; |
1751 | char ctx[4]; | 1761 | char ctx[4]; |
1752 | } desc; | 1762 | } desc; |
1753 | int err; | 1763 | int err; |
1754 | 1764 | ||
1755 | BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx)); | 1765 | BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx)); |
1756 | 1766 | ||
1757 | desc.shash.tfm = sbi->s_chksum_driver; | 1767 | desc.shash.tfm = sbi->s_chksum_driver; |
1758 | desc.shash.flags = 0; | 1768 | desc.shash.flags = 0; |
1759 | *(u32 *)desc.ctx = crc; | 1769 | *(u32 *)desc.ctx = crc; |
1760 | 1770 | ||
1761 | err = crypto_shash_update(&desc.shash, address, length); | 1771 | err = crypto_shash_update(&desc.shash, address, length); |
1762 | BUG_ON(err); | 1772 | BUG_ON(err); |
1763 | 1773 | ||
1764 | return *(u32 *)desc.ctx; | 1774 | return *(u32 *)desc.ctx; |
1765 | } | 1775 | } |
1766 | 1776 | ||
1767 | #ifdef __KERNEL__ | 1777 | #ifdef __KERNEL__ |
1768 | 1778 | ||
1769 | /* hash info structure used by the directory hash */ | 1779 | /* hash info structure used by the directory hash */ |
1770 | struct dx_hash_info | 1780 | struct dx_hash_info |
1771 | { | 1781 | { |
1772 | u32 hash; | 1782 | u32 hash; |
1773 | u32 minor_hash; | 1783 | u32 minor_hash; |
1774 | int hash_version; | 1784 | int hash_version; |
1775 | u32 *seed; | 1785 | u32 *seed; |
1776 | }; | 1786 | }; |
1777 | 1787 | ||
1778 | 1788 | ||
1779 | /* 32 and 64 bit signed EOF for dx directories */ | 1789 | /* 32 and 64 bit signed EOF for dx directories */ |
1780 | #define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) | 1790 | #define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) |
1781 | #define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) | 1791 | #define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) |
1782 | 1792 | ||
1783 | 1793 | ||
1784 | /* | 1794 | /* |
1785 | * Control parameters used by ext4_htree_next_block | 1795 | * Control parameters used by ext4_htree_next_block |
1786 | */ | 1796 | */ |
1787 | #define HASH_NB_ALWAYS 1 | 1797 | #define HASH_NB_ALWAYS 1 |
1788 | 1798 | ||
1789 | 1799 | ||
1790 | /* | 1800 | /* |
1791 | * Describe an inode's exact location on disk and in memory | 1801 | * Describe an inode's exact location on disk and in memory |
1792 | */ | 1802 | */ |
1793 | struct ext4_iloc | 1803 | struct ext4_iloc |
1794 | { | 1804 | { |
1795 | struct buffer_head *bh; | 1805 | struct buffer_head *bh; |
1796 | unsigned long offset; | 1806 | unsigned long offset; |
1797 | ext4_group_t block_group; | 1807 | ext4_group_t block_group; |
1798 | }; | 1808 | }; |
1799 | 1809 | ||
1800 | static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) | 1810 | static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) |
1801 | { | 1811 | { |
1802 | return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); | 1812 | return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset); |
1803 | } | 1813 | } |
1804 | 1814 | ||
1805 | /* | 1815 | /* |
1806 | * This structure is stuffed into the struct file's private_data field | 1816 | * This structure is stuffed into the struct file's private_data field |
1807 | * for directories. It is where we put information so that we can do | 1817 | * for directories. It is where we put information so that we can do |
1808 | * readdir operations in hash tree order. | 1818 | * readdir operations in hash tree order. |
1809 | */ | 1819 | */ |
1810 | struct dir_private_info { | 1820 | struct dir_private_info { |
1811 | struct rb_root root; | 1821 | struct rb_root root; |
1812 | struct rb_node *curr_node; | 1822 | struct rb_node *curr_node; |
1813 | struct fname *extra_fname; | 1823 | struct fname *extra_fname; |
1814 | loff_t last_pos; | 1824 | loff_t last_pos; |
1815 | __u32 curr_hash; | 1825 | __u32 curr_hash; |
1816 | __u32 curr_minor_hash; | 1826 | __u32 curr_minor_hash; |
1817 | __u32 next_hash; | 1827 | __u32 next_hash; |
1818 | }; | 1828 | }; |
1819 | 1829 | ||
1820 | /* calculate the first block number of the group */ | 1830 | /* calculate the first block number of the group */ |
1821 | static inline ext4_fsblk_t | 1831 | static inline ext4_fsblk_t |
1822 | ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | 1832 | ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) |
1823 | { | 1833 | { |
1824 | return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + | 1834 | return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + |
1825 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 1835 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
1826 | } | 1836 | } |
1827 | 1837 | ||
1828 | /* | 1838 | /* |
1829 | * Special error return code only used by dx_probe() and its callers. | 1839 | * Special error return code only used by dx_probe() and its callers. |
1830 | */ | 1840 | */ |
1831 | #define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1)) | 1841 | #define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1)) |
1832 | 1842 | ||
1833 | /* | 1843 | /* |
1834 | * Timeout and state flag for lazy initialization inode thread. | 1844 | * Timeout and state flag for lazy initialization inode thread. |
1835 | */ | 1845 | */ |
1836 | #define EXT4_DEF_LI_WAIT_MULT 10 | 1846 | #define EXT4_DEF_LI_WAIT_MULT 10 |
1837 | #define EXT4_DEF_LI_MAX_START_DELAY 5 | 1847 | #define EXT4_DEF_LI_MAX_START_DELAY 5 |
1838 | #define EXT4_LAZYINIT_QUIT 0x0001 | 1848 | #define EXT4_LAZYINIT_QUIT 0x0001 |
1839 | #define EXT4_LAZYINIT_RUNNING 0x0002 | 1849 | #define EXT4_LAZYINIT_RUNNING 0x0002 |
1840 | 1850 | ||
1841 | /* | 1851 | /* |
1842 | * Lazy inode table initialization info | 1852 | * Lazy inode table initialization info |
1843 | */ | 1853 | */ |
1844 | struct ext4_lazy_init { | 1854 | struct ext4_lazy_init { |
1845 | unsigned long li_state; | 1855 | unsigned long li_state; |
1846 | struct list_head li_request_list; | 1856 | struct list_head li_request_list; |
1847 | struct mutex li_list_mtx; | 1857 | struct mutex li_list_mtx; |
1848 | }; | 1858 | }; |
1849 | 1859 | ||
1850 | struct ext4_li_request { | 1860 | struct ext4_li_request { |
1851 | struct super_block *lr_super; | 1861 | struct super_block *lr_super; |
1852 | struct ext4_sb_info *lr_sbi; | 1862 | struct ext4_sb_info *lr_sbi; |
1853 | ext4_group_t lr_next_group; | 1863 | ext4_group_t lr_next_group; |
1854 | struct list_head lr_request; | 1864 | struct list_head lr_request; |
1855 | unsigned long lr_next_sched; | 1865 | unsigned long lr_next_sched; |
1856 | unsigned long lr_timeout; | 1866 | unsigned long lr_timeout; |
1857 | }; | 1867 | }; |
1858 | 1868 | ||
1859 | struct ext4_features { | 1869 | struct ext4_features { |
1860 | struct kobject f_kobj; | 1870 | struct kobject f_kobj; |
1861 | struct completion f_kobj_unregister; | 1871 | struct completion f_kobj_unregister; |
1862 | }; | 1872 | }; |
1863 | 1873 | ||
1864 | /* | 1874 | /* |
1865 | * This structure will be used for multiple mount protection. It will be | 1875 | * This structure will be used for multiple mount protection. It will be |
1866 | * written into the block number saved in the s_mmp_block field in the | 1876 | * written into the block number saved in the s_mmp_block field in the |
1867 | * superblock. Programs that check MMP should assume that if | 1877 | * superblock. Programs that check MMP should assume that if |
1868 | * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe | 1878 | * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe |
1869 | * to use the filesystem, regardless of how old the timestamp is. | 1879 | * to use the filesystem, regardless of how old the timestamp is. |
1870 | */ | 1880 | */ |
1871 | #define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ | 1881 | #define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ |
1872 | #define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ | 1882 | #define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ |
1873 | #define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ | 1883 | #define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ |
1874 | #define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */ | 1884 | #define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */ |
1875 | 1885 | ||
1876 | struct mmp_struct { | 1886 | struct mmp_struct { |
1877 | __le32 mmp_magic; /* Magic number for MMP */ | 1887 | __le32 mmp_magic; /* Magic number for MMP */ |
1878 | __le32 mmp_seq; /* Sequence no. updated periodically */ | 1888 | __le32 mmp_seq; /* Sequence no. updated periodically */ |
1879 | 1889 | ||
1880 | /* | 1890 | /* |
1881 | * mmp_time, mmp_nodename & mmp_bdevname are only used for information | 1891 | * mmp_time, mmp_nodename & mmp_bdevname are only used for information |
1882 | * purposes and do not affect the correctness of the algorithm | 1892 | * purposes and do not affect the correctness of the algorithm |
1883 | */ | 1893 | */ |
1884 | __le64 mmp_time; /* Time last updated */ | 1894 | __le64 mmp_time; /* Time last updated */ |
1885 | char mmp_nodename[64]; /* Node which last updated MMP block */ | 1895 | char mmp_nodename[64]; /* Node which last updated MMP block */ |
1886 | char mmp_bdevname[32]; /* Bdev which last updated MMP block */ | 1896 | char mmp_bdevname[32]; /* Bdev which last updated MMP block */ |
1887 | 1897 | ||
1888 | /* | 1898 | /* |
1889 | * mmp_check_interval is used to verify if the MMP block has been | 1899 | * mmp_check_interval is used to verify if the MMP block has been |
1890 | * updated on the block device. The value is updated based on the | 1900 | * updated on the block device. The value is updated based on the |
1891 | * maximum time to write the MMP block during an update cycle. | 1901 | * maximum time to write the MMP block during an update cycle. |
1892 | */ | 1902 | */ |
1893 | __le16 mmp_check_interval; | 1903 | __le16 mmp_check_interval; |
1894 | 1904 | ||
1895 | __le16 mmp_pad1; | 1905 | __le16 mmp_pad1; |
1896 | __le32 mmp_pad2[226]; | 1906 | __le32 mmp_pad2[226]; |
1897 | __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */ | 1907 | __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */ |
1898 | }; | 1908 | }; |
1899 | 1909 | ||
1900 | /* arguments passed to the mmp thread */ | 1910 | /* arguments passed to the mmp thread */ |
1901 | struct mmpd_data { | 1911 | struct mmpd_data { |
1902 | struct buffer_head *bh; /* bh from initial read_mmp_block() */ | 1912 | struct buffer_head *bh; /* bh from initial read_mmp_block() */ |
1903 | struct super_block *sb; /* super block of the fs */ | 1913 | struct super_block *sb; /* super block of the fs */ |
1904 | }; | 1914 | }; |
1905 | 1915 | ||
1906 | /* | 1916 | /* |
1907 | * Check interval multiplier | 1917 | * Check interval multiplier |
1908 | * The MMP block is written every update interval and initially checked every | 1918 | * The MMP block is written every update interval and initially checked every |
1909 | * update interval x the multiplier (the value is then adapted based on the | 1919 | * update interval x the multiplier (the value is then adapted based on the |
1910 | * write latency). The reason is that writes can be delayed under load and we | 1920 | * write latency). The reason is that writes can be delayed under load and we |
1911 | * don't want readers to incorrectly assume that the filesystem is no longer | 1921 | * don't want readers to incorrectly assume that the filesystem is no longer |
1912 | * in use. | 1922 | * in use. |
1913 | */ | 1923 | */ |
1914 | #define EXT4_MMP_CHECK_MULT 2UL | 1924 | #define EXT4_MMP_CHECK_MULT 2UL |
1915 | 1925 | ||
1916 | /* | 1926 | /* |
1917 | * Minimum interval for MMP checking in seconds. | 1927 | * Minimum interval for MMP checking in seconds. |
1918 | */ | 1928 | */ |
1919 | #define EXT4_MMP_MIN_CHECK_INTERVAL 5UL | 1929 | #define EXT4_MMP_MIN_CHECK_INTERVAL 5UL |
1920 | 1930 | ||
1921 | /* | 1931 | /* |
1922 | * Maximum interval for MMP checking in seconds. | 1932 | * Maximum interval for MMP checking in seconds. |
1923 | */ | 1933 | */ |
1924 | #define EXT4_MMP_MAX_CHECK_INTERVAL 300UL | 1934 | #define EXT4_MMP_MAX_CHECK_INTERVAL 300UL |
1925 | 1935 | ||
1926 | /* | 1936 | /* |
1927 | * Function prototypes | 1937 | * Function prototypes |
1928 | */ | 1938 | */ |
1929 | 1939 | ||
1930 | /* | 1940 | /* |
1931 | * Ok, these declarations are also in <linux/kernel.h> but none of the | 1941 | * Ok, these declarations are also in <linux/kernel.h> but none of the |
1932 | * ext4 source programs needs to include it so they are duplicated here. | 1942 | * ext4 source programs needs to include it so they are duplicated here. |
1933 | */ | 1943 | */ |
1934 | # define NORET_TYPE /**/ | 1944 | # define NORET_TYPE /**/ |
1935 | # define ATTRIB_NORET __attribute__((noreturn)) | 1945 | # define ATTRIB_NORET __attribute__((noreturn)) |
1936 | # define NORET_AND noreturn, | 1946 | # define NORET_AND noreturn, |
1937 | 1947 | ||
1938 | /* bitmap.c */ | 1948 | /* bitmap.c */ |
1939 | extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); | 1949 | extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); |
1940 | void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | 1950 | void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, |
1941 | struct ext4_group_desc *gdp, | 1951 | struct ext4_group_desc *gdp, |
1942 | struct buffer_head *bh, int sz); | 1952 | struct buffer_head *bh, int sz); |
1943 | int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | 1953 | int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, |
1944 | struct ext4_group_desc *gdp, | 1954 | struct ext4_group_desc *gdp, |
1945 | struct buffer_head *bh, int sz); | 1955 | struct buffer_head *bh, int sz); |
1946 | void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | 1956 | void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, |
1947 | struct ext4_group_desc *gdp, | 1957 | struct ext4_group_desc *gdp, |
1948 | struct buffer_head *bh); | 1958 | struct buffer_head *bh); |
1949 | int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | 1959 | int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, |
1950 | struct ext4_group_desc *gdp, | 1960 | struct ext4_group_desc *gdp, |
1951 | struct buffer_head *bh); | 1961 | struct buffer_head *bh); |
1952 | 1962 | ||
1953 | /* balloc.c */ | 1963 | /* balloc.c */ |
1954 | extern void ext4_get_group_no_and_offset(struct super_block *sb, | 1964 | extern void ext4_get_group_no_and_offset(struct super_block *sb, |
1955 | ext4_fsblk_t blocknr, | 1965 | ext4_fsblk_t blocknr, |
1956 | ext4_group_t *blockgrpp, | 1966 | ext4_group_t *blockgrpp, |
1957 | ext4_grpblk_t *offsetp); | 1967 | ext4_grpblk_t *offsetp); |
1958 | extern ext4_group_t ext4_get_group_number(struct super_block *sb, | 1968 | extern ext4_group_t ext4_get_group_number(struct super_block *sb, |
1959 | ext4_fsblk_t block); | 1969 | ext4_fsblk_t block); |
1960 | 1970 | ||
1961 | extern unsigned int ext4_block_group(struct super_block *sb, | 1971 | extern unsigned int ext4_block_group(struct super_block *sb, |
1962 | ext4_fsblk_t blocknr); | 1972 | ext4_fsblk_t blocknr); |
1963 | extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, | 1973 | extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, |
1964 | ext4_fsblk_t blocknr); | 1974 | ext4_fsblk_t blocknr); |
1965 | extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); | 1975 | extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); |
1966 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, | 1976 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, |
1967 | ext4_group_t group); | 1977 | ext4_group_t group); |
1968 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | 1978 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
1969 | ext4_fsblk_t goal, | 1979 | ext4_fsblk_t goal, |
1970 | unsigned int flags, | 1980 | unsigned int flags, |
1971 | unsigned long *count, | 1981 | unsigned long *count, |
1972 | int *errp); | 1982 | int *errp); |
1973 | extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi, | 1983 | extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi, |
1974 | s64 nclusters, unsigned int flags); | 1984 | s64 nclusters, unsigned int flags); |
1975 | extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *); | 1985 | extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *); |
1976 | extern void ext4_check_blocks_bitmap(struct super_block *); | 1986 | extern void ext4_check_blocks_bitmap(struct super_block *); |
1977 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1987 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
1978 | ext4_group_t block_group, | 1988 | ext4_group_t block_group, |
1979 | struct buffer_head ** bh); | 1989 | struct buffer_head ** bh); |
1980 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1990 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
1981 | 1991 | ||
1982 | extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, | 1992 | extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, |
1983 | ext4_group_t block_group); | 1993 | ext4_group_t block_group); |
1984 | extern int ext4_wait_block_bitmap(struct super_block *sb, | 1994 | extern int ext4_wait_block_bitmap(struct super_block *sb, |
1985 | ext4_group_t block_group, | 1995 | ext4_group_t block_group, |
1986 | struct buffer_head *bh); | 1996 | struct buffer_head *bh); |
1987 | extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, | 1997 | extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, |
1988 | ext4_group_t block_group); | 1998 | ext4_group_t block_group); |
1989 | extern unsigned ext4_free_clusters_after_init(struct super_block *sb, | 1999 | extern unsigned ext4_free_clusters_after_init(struct super_block *sb, |
1990 | ext4_group_t block_group, | 2000 | ext4_group_t block_group, |
1991 | struct ext4_group_desc *gdp); | 2001 | struct ext4_group_desc *gdp); |
1992 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); | 2002 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); |
1993 | 2003 | ||
1994 | /* dir.c */ | 2004 | /* dir.c */ |
1995 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, | 2005 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, |
1996 | struct file *, | 2006 | struct file *, |
1997 | struct ext4_dir_entry_2 *, | 2007 | struct ext4_dir_entry_2 *, |
1998 | struct buffer_head *, char *, int, | 2008 | struct buffer_head *, char *, int, |
1999 | unsigned int); | 2009 | unsigned int); |
2000 | #define ext4_check_dir_entry(dir, filp, de, bh, buf, size, offset) \ | 2010 | #define ext4_check_dir_entry(dir, filp, de, bh, buf, size, offset) \ |
2001 | unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ | 2011 | unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ |
2002 | (de), (bh), (buf), (size), (offset))) | 2012 | (de), (bh), (buf), (size), (offset))) |
2003 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | 2013 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, |
2004 | __u32 minor_hash, | 2014 | __u32 minor_hash, |
2005 | struct ext4_dir_entry_2 *dirent); | 2015 | struct ext4_dir_entry_2 *dirent); |
2006 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); | 2016 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); |
2007 | extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, | 2017 | extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, |
2008 | struct buffer_head *bh, | 2018 | struct buffer_head *bh, |
2009 | void *buf, int buf_size, | 2019 | void *buf, int buf_size, |
2010 | const char *name, int namelen, | 2020 | const char *name, int namelen, |
2011 | struct ext4_dir_entry_2 **dest_de); | 2021 | struct ext4_dir_entry_2 **dest_de); |
2012 | void ext4_insert_dentry(struct inode *inode, | 2022 | void ext4_insert_dentry(struct inode *inode, |
2013 | struct ext4_dir_entry_2 *de, | 2023 | struct ext4_dir_entry_2 *de, |
2014 | int buf_size, | 2024 | int buf_size, |
2015 | const char *name, int namelen); | 2025 | const char *name, int namelen); |
2016 | static inline void ext4_update_dx_flag(struct inode *inode) | 2026 | static inline void ext4_update_dx_flag(struct inode *inode) |
2017 | { | 2027 | { |
2018 | if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | 2028 | if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, |
2019 | EXT4_FEATURE_COMPAT_DIR_INDEX)) | 2029 | EXT4_FEATURE_COMPAT_DIR_INDEX)) |
2020 | ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); | 2030 | ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); |
2021 | } | 2031 | } |
2022 | static unsigned char ext4_filetype_table[] = { | 2032 | static unsigned char ext4_filetype_table[] = { |
2023 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 2033 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
2024 | }; | 2034 | }; |
2025 | 2035 | ||
2026 | static inline unsigned char get_dtype(struct super_block *sb, int filetype) | 2036 | static inline unsigned char get_dtype(struct super_block *sb, int filetype) |
2027 | { | 2037 | { |
2028 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || | 2038 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || |
2029 | (filetype >= EXT4_FT_MAX)) | 2039 | (filetype >= EXT4_FT_MAX)) |
2030 | return DT_UNKNOWN; | 2040 | return DT_UNKNOWN; |
2031 | 2041 | ||
2032 | return ext4_filetype_table[filetype]; | 2042 | return ext4_filetype_table[filetype]; |
2033 | } | 2043 | } |
2034 | extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, | 2044 | extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, |
2035 | void *buf, int buf_size); | 2045 | void *buf, int buf_size); |
2036 | 2046 | ||
2037 | /* fsync.c */ | 2047 | /* fsync.c */ |
2038 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); | 2048 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); |
2039 | 2049 | ||
2040 | /* hash.c */ | 2050 | /* hash.c */ |
2041 | extern int ext4fs_dirhash(const char *name, int len, struct | 2051 | extern int ext4fs_dirhash(const char *name, int len, struct |
2042 | dx_hash_info *hinfo); | 2052 | dx_hash_info *hinfo); |
2043 | 2053 | ||
2044 | /* ialloc.c */ | 2054 | /* ialloc.c */ |
2045 | extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t, | 2055 | extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t, |
2046 | const struct qstr *qstr, __u32 goal, | 2056 | const struct qstr *qstr, __u32 goal, |
2047 | uid_t *owner, int handle_type, | 2057 | uid_t *owner, int handle_type, |
2048 | unsigned int line_no, int nblocks); | 2058 | unsigned int line_no, int nblocks); |
2049 | 2059 | ||
2050 | #define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \ | 2060 | #define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \ |
2051 | __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \ | 2061 | __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \ |
2052 | 0, 0, 0) | 2062 | 0, 0, 0) |
2053 | #define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \ | 2063 | #define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \ |
2054 | type, nblocks) \ | 2064 | type, nblocks) \ |
2055 | __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \ | 2065 | __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \ |
2056 | (type), __LINE__, (nblocks)) | 2066 | (type), __LINE__, (nblocks)) |
2057 | 2067 | ||
2058 | 2068 | ||
2059 | extern void ext4_free_inode(handle_t *, struct inode *); | 2069 | extern void ext4_free_inode(handle_t *, struct inode *); |
2060 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); | 2070 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); |
2061 | extern unsigned long ext4_count_free_inodes(struct super_block *); | 2071 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
2062 | extern unsigned long ext4_count_dirs(struct super_block *); | 2072 | extern unsigned long ext4_count_dirs(struct super_block *); |
2063 | extern void ext4_check_inodes_bitmap(struct super_block *); | 2073 | extern void ext4_check_inodes_bitmap(struct super_block *); |
2064 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); | 2074 | extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); |
2065 | extern int ext4_init_inode_table(struct super_block *sb, | 2075 | extern int ext4_init_inode_table(struct super_block *sb, |
2066 | ext4_group_t group, int barrier); | 2076 | ext4_group_t group, int barrier); |
2067 | extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); | 2077 | extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); |
2068 | 2078 | ||
2069 | /* mballoc.c */ | 2079 | /* mballoc.c */ |
2070 | extern long ext4_mb_stats; | 2080 | extern long ext4_mb_stats; |
2071 | extern long ext4_mb_max_to_scan; | 2081 | extern long ext4_mb_max_to_scan; |
2072 | extern int ext4_mb_init(struct super_block *); | 2082 | extern int ext4_mb_init(struct super_block *); |
2073 | extern int ext4_mb_release(struct super_block *); | 2083 | extern int ext4_mb_release(struct super_block *); |
2074 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | 2084 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, |
2075 | struct ext4_allocation_request *, int *); | 2085 | struct ext4_allocation_request *, int *); |
2076 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 2086 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
2077 | extern void ext4_discard_preallocations(struct inode *); | 2087 | extern void ext4_discard_preallocations(struct inode *); |
2078 | extern int __init ext4_init_mballoc(void); | 2088 | extern int __init ext4_init_mballoc(void); |
2079 | extern void ext4_exit_mballoc(void); | 2089 | extern void ext4_exit_mballoc(void); |
2080 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | 2090 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
2081 | struct buffer_head *bh, ext4_fsblk_t block, | 2091 | struct buffer_head *bh, ext4_fsblk_t block, |
2082 | unsigned long count, int flags); | 2092 | unsigned long count, int flags); |
2083 | extern int ext4_mb_alloc_groupinfo(struct super_block *sb, | 2093 | extern int ext4_mb_alloc_groupinfo(struct super_block *sb, |
2084 | ext4_group_t ngroups); | 2094 | ext4_group_t ngroups); |
2085 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 2095 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
2086 | ext4_group_t i, struct ext4_group_desc *desc); | 2096 | ext4_group_t i, struct ext4_group_desc *desc); |
2087 | extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | 2097 | extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, |
2088 | ext4_fsblk_t block, unsigned long count); | 2098 | ext4_fsblk_t block, unsigned long count); |
2089 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); | 2099 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
2090 | 2100 | ||
2091 | /* inode.c */ | 2101 | /* inode.c */ |
2092 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); | 2102 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); |
2093 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); | 2103 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); |
2094 | int ext4_get_block_write(struct inode *inode, sector_t iblock, | 2104 | int ext4_get_block_write(struct inode *inode, sector_t iblock, |
2095 | struct buffer_head *bh_result, int create); | 2105 | struct buffer_head *bh_result, int create); |
2096 | int ext4_get_block(struct inode *inode, sector_t iblock, | 2106 | int ext4_get_block(struct inode *inode, sector_t iblock, |
2097 | struct buffer_head *bh_result, int create); | 2107 | struct buffer_head *bh_result, int create); |
2098 | int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2108 | int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
2099 | struct buffer_head *bh, int create); | 2109 | struct buffer_head *bh, int create); |
2100 | int ext4_walk_page_buffers(handle_t *handle, | 2110 | int ext4_walk_page_buffers(handle_t *handle, |
2101 | struct buffer_head *head, | 2111 | struct buffer_head *head, |
2102 | unsigned from, | 2112 | unsigned from, |
2103 | unsigned to, | 2113 | unsigned to, |
2104 | int *partial, | 2114 | int *partial, |
2105 | int (*fn)(handle_t *handle, | 2115 | int (*fn)(handle_t *handle, |
2106 | struct buffer_head *bh)); | 2116 | struct buffer_head *bh)); |
2107 | int do_journal_get_write_access(handle_t *handle, | 2117 | int do_journal_get_write_access(handle_t *handle, |
2108 | struct buffer_head *bh); | 2118 | struct buffer_head *bh); |
2109 | #define FALL_BACK_TO_NONDELALLOC 1 | 2119 | #define FALL_BACK_TO_NONDELALLOC 1 |
2110 | #define CONVERT_INLINE_DATA 2 | 2120 | #define CONVERT_INLINE_DATA 2 |
2111 | 2121 | ||
2112 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 2122 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
2113 | extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); | 2123 | extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); |
2114 | extern int ext4_write_inode(struct inode *, struct writeback_control *); | 2124 | extern int ext4_write_inode(struct inode *, struct writeback_control *); |
2115 | extern int ext4_setattr(struct dentry *, struct iattr *); | 2125 | extern int ext4_setattr(struct dentry *, struct iattr *); |
2116 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 2126 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
2117 | struct kstat *stat); | 2127 | struct kstat *stat); |
2118 | extern void ext4_evict_inode(struct inode *); | 2128 | extern void ext4_evict_inode(struct inode *); |
2119 | extern void ext4_clear_inode(struct inode *); | 2129 | extern void ext4_clear_inode(struct inode *); |
2120 | extern int ext4_sync_inode(handle_t *, struct inode *); | 2130 | extern int ext4_sync_inode(handle_t *, struct inode *); |
2121 | extern void ext4_dirty_inode(struct inode *, int); | 2131 | extern void ext4_dirty_inode(struct inode *, int); |
2122 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 2132 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
2123 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 2133 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
2124 | extern int ext4_inode_attach_jinode(struct inode *inode); | 2134 | extern int ext4_inode_attach_jinode(struct inode *inode); |
2125 | extern int ext4_can_truncate(struct inode *inode); | 2135 | extern int ext4_can_truncate(struct inode *inode); |
2126 | extern void ext4_truncate(struct inode *); | 2136 | extern void ext4_truncate(struct inode *); |
2127 | extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); | 2137 | extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); |
2128 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | 2138 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); |
2129 | extern void ext4_set_inode_flags(struct inode *); | 2139 | extern void ext4_set_inode_flags(struct inode *); |
2130 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 2140 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
2131 | extern int ext4_alloc_da_blocks(struct inode *inode); | 2141 | extern int ext4_alloc_da_blocks(struct inode *inode); |
2132 | extern void ext4_set_aops(struct inode *inode); | 2142 | extern void ext4_set_aops(struct inode *inode); |
2133 | extern int ext4_writepage_trans_blocks(struct inode *); | 2143 | extern int ext4_writepage_trans_blocks(struct inode *); |
2134 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 2144 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
2135 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | 2145 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
2136 | loff_t lstart, loff_t lend); | 2146 | loff_t lstart, loff_t lend); |
2137 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2147 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2138 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 2148 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
2139 | extern void ext4_da_update_reserve_space(struct inode *inode, | 2149 | extern void ext4_da_update_reserve_space(struct inode *inode, |
2140 | int used, int quota_claim); | 2150 | int used, int quota_claim); |
2141 | 2151 | ||
2142 | /* indirect.c */ | 2152 | /* indirect.c */ |
2143 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | 2153 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
2144 | struct ext4_map_blocks *map, int flags); | 2154 | struct ext4_map_blocks *map, int flags); |
2145 | extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | 2155 | extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, |
2146 | struct iov_iter *iter, loff_t offset); | 2156 | struct iov_iter *iter, loff_t offset); |
2147 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | 2157 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); |
2148 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); | 2158 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); |
2149 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); | 2159 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); |
2150 | extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode, | 2160 | extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode, |
2151 | ext4_lblk_t start, ext4_lblk_t end); | 2161 | ext4_lblk_t start, ext4_lblk_t end); |
2152 | 2162 | ||
2153 | /* ioctl.c */ | 2163 | /* ioctl.c */ |
2154 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 2164 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
2155 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); | 2165 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); |
2156 | 2166 | ||
2157 | /* migrate.c */ | 2167 | /* migrate.c */ |
2158 | extern int ext4_ext_migrate(struct inode *); | 2168 | extern int ext4_ext_migrate(struct inode *); |
2159 | extern int ext4_ind_migrate(struct inode *inode); | 2169 | extern int ext4_ind_migrate(struct inode *inode); |
2160 | 2170 | ||
2161 | /* namei.c */ | 2171 | /* namei.c */ |
2162 | extern int ext4_dirent_csum_verify(struct inode *inode, | 2172 | extern int ext4_dirent_csum_verify(struct inode *inode, |
2163 | struct ext4_dir_entry *dirent); | 2173 | struct ext4_dir_entry *dirent); |
2164 | extern int ext4_orphan_add(handle_t *, struct inode *); | 2174 | extern int ext4_orphan_add(handle_t *, struct inode *); |
2165 | extern int ext4_orphan_del(handle_t *, struct inode *); | 2175 | extern int ext4_orphan_del(handle_t *, struct inode *); |
2166 | extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | 2176 | extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, |
2167 | __u32 start_minor_hash, __u32 *next_hash); | 2177 | __u32 start_minor_hash, __u32 *next_hash); |
2168 | extern int search_dir(struct buffer_head *bh, | 2178 | extern int search_dir(struct buffer_head *bh, |
2169 | char *search_buf, | 2179 | char *search_buf, |
2170 | int buf_size, | 2180 | int buf_size, |
2171 | struct inode *dir, | 2181 | struct inode *dir, |
2172 | const struct qstr *d_name, | 2182 | const struct qstr *d_name, |
2173 | unsigned int offset, | 2183 | unsigned int offset, |
2174 | struct ext4_dir_entry_2 **res_dir); | 2184 | struct ext4_dir_entry_2 **res_dir); |
2175 | extern int ext4_generic_delete_entry(handle_t *handle, | 2185 | extern int ext4_generic_delete_entry(handle_t *handle, |
2176 | struct inode *dir, | 2186 | struct inode *dir, |
2177 | struct ext4_dir_entry_2 *de_del, | 2187 | struct ext4_dir_entry_2 *de_del, |
2178 | struct buffer_head *bh, | 2188 | struct buffer_head *bh, |
2179 | void *entry_buf, | 2189 | void *entry_buf, |
2180 | int buf_size, | 2190 | int buf_size, |
2181 | int csum_size); | 2191 | int csum_size); |
2182 | 2192 | ||
2183 | /* resize.c */ | 2193 | /* resize.c */ |
2184 | extern int ext4_group_add(struct super_block *sb, | 2194 | extern int ext4_group_add(struct super_block *sb, |
2185 | struct ext4_new_group_data *input); | 2195 | struct ext4_new_group_data *input); |
2186 | extern int ext4_group_extend(struct super_block *sb, | 2196 | extern int ext4_group_extend(struct super_block *sb, |
2187 | struct ext4_super_block *es, | 2197 | struct ext4_super_block *es, |
2188 | ext4_fsblk_t n_blocks_count); | 2198 | ext4_fsblk_t n_blocks_count); |
2189 | extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); | 2199 | extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); |
2190 | 2200 | ||
2191 | /* super.c */ | 2201 | /* super.c */ |
2192 | extern int ext4_calculate_overhead(struct super_block *sb); | 2202 | extern int ext4_calculate_overhead(struct super_block *sb); |
2193 | extern void ext4_superblock_csum_set(struct super_block *sb); | 2203 | extern void ext4_superblock_csum_set(struct super_block *sb); |
2194 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); | 2204 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); |
2195 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); | 2205 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); |
2196 | extern int ext4_alloc_flex_bg_array(struct super_block *sb, | 2206 | extern int ext4_alloc_flex_bg_array(struct super_block *sb, |
2197 | ext4_group_t ngroup); | 2207 | ext4_group_t ngroup); |
2198 | extern const char *ext4_decode_error(struct super_block *sb, int errno, | 2208 | extern const char *ext4_decode_error(struct super_block *sb, int errno, |
2199 | char nbuf[16]); | 2209 | char nbuf[16]); |
2200 | 2210 | ||
2201 | extern __printf(4, 5) | 2211 | extern __printf(4, 5) |
2202 | void __ext4_error(struct super_block *, const char *, unsigned int, | 2212 | void __ext4_error(struct super_block *, const char *, unsigned int, |
2203 | const char *, ...); | 2213 | const char *, ...); |
2204 | extern __printf(5, 6) | 2214 | extern __printf(5, 6) |
2205 | void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, | 2215 | void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, |
2206 | const char *, ...); | 2216 | const char *, ...); |
2207 | extern __printf(5, 6) | 2217 | extern __printf(5, 6) |
2208 | void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, | 2218 | void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, |
2209 | const char *, ...); | 2219 | const char *, ...); |
2210 | extern void __ext4_std_error(struct super_block *, const char *, | 2220 | extern void __ext4_std_error(struct super_block *, const char *, |
2211 | unsigned int, int); | 2221 | unsigned int, int); |
2212 | extern __printf(4, 5) | 2222 | extern __printf(4, 5) |
2213 | void __ext4_abort(struct super_block *, const char *, unsigned int, | 2223 | void __ext4_abort(struct super_block *, const char *, unsigned int, |
2214 | const char *, ...); | 2224 | const char *, ...); |
2215 | extern __printf(4, 5) | 2225 | extern __printf(4, 5) |
2216 | void __ext4_warning(struct super_block *, const char *, unsigned int, | 2226 | void __ext4_warning(struct super_block *, const char *, unsigned int, |
2217 | const char *, ...); | 2227 | const char *, ...); |
2218 | extern __printf(3, 4) | 2228 | extern __printf(3, 4) |
2219 | void __ext4_msg(struct super_block *, const char *, const char *, ...); | 2229 | void __ext4_msg(struct super_block *, const char *, const char *, ...); |
2220 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, | 2230 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, |
2221 | const char *, unsigned int, const char *); | 2231 | const char *, unsigned int, const char *); |
2222 | extern __printf(7, 8) | 2232 | extern __printf(7, 8) |
2223 | void __ext4_grp_locked_error(const char *, unsigned int, | 2233 | void __ext4_grp_locked_error(const char *, unsigned int, |
2224 | struct super_block *, ext4_group_t, | 2234 | struct super_block *, ext4_group_t, |
2225 | unsigned long, ext4_fsblk_t, | 2235 | unsigned long, ext4_fsblk_t, |
2226 | const char *, ...); | 2236 | const char *, ...); |
2227 | 2237 | ||
2228 | #ifdef CONFIG_PRINTK | 2238 | #ifdef CONFIG_PRINTK |
2229 | 2239 | ||
2230 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | 2240 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ |
2231 | __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__) | 2241 | __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__) |
2232 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | 2242 | #define ext4_error_file(file, func, line, block, fmt, ...) \ |
2233 | __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) | 2243 | __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) |
2234 | #define ext4_error(sb, fmt, ...) \ | 2244 | #define ext4_error(sb, fmt, ...) \ |
2235 | __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | 2245 | __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) |
2236 | #define ext4_abort(sb, fmt, ...) \ | 2246 | #define ext4_abort(sb, fmt, ...) \ |
2237 | __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | 2247 | __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) |
2238 | #define ext4_warning(sb, fmt, ...) \ | 2248 | #define ext4_warning(sb, fmt, ...) \ |
2239 | __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | 2249 | __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) |
2240 | #define ext4_msg(sb, level, fmt, ...) \ | 2250 | #define ext4_msg(sb, level, fmt, ...) \ |
2241 | __ext4_msg(sb, level, fmt, ##__VA_ARGS__) | 2251 | __ext4_msg(sb, level, fmt, ##__VA_ARGS__) |
2242 | #define dump_mmp_msg(sb, mmp, msg) \ | 2252 | #define dump_mmp_msg(sb, mmp, msg) \ |
2243 | __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg) | 2253 | __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg) |
2244 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | 2254 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ |
2245 | __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \ | 2255 | __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \ |
2246 | fmt, ##__VA_ARGS__) | 2256 | fmt, ##__VA_ARGS__) |
2247 | 2257 | ||
2248 | #else | 2258 | #else |
2249 | 2259 | ||
2250 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | 2260 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ |
2251 | do { \ | 2261 | do { \ |
2252 | no_printk(fmt, ##__VA_ARGS__); \ | 2262 | no_printk(fmt, ##__VA_ARGS__); \ |
2253 | __ext4_error_inode(inode, "", 0, block, " "); \ | 2263 | __ext4_error_inode(inode, "", 0, block, " "); \ |
2254 | } while (0) | 2264 | } while (0) |
2255 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | 2265 | #define ext4_error_file(file, func, line, block, fmt, ...) \ |
2256 | do { \ | 2266 | do { \ |
2257 | no_printk(fmt, ##__VA_ARGS__); \ | 2267 | no_printk(fmt, ##__VA_ARGS__); \ |
2258 | __ext4_error_file(file, "", 0, block, " "); \ | 2268 | __ext4_error_file(file, "", 0, block, " "); \ |
2259 | } while (0) | 2269 | } while (0) |
2260 | #define ext4_error(sb, fmt, ...) \ | 2270 | #define ext4_error(sb, fmt, ...) \ |
2261 | do { \ | 2271 | do { \ |
2262 | no_printk(fmt, ##__VA_ARGS__); \ | 2272 | no_printk(fmt, ##__VA_ARGS__); \ |
2263 | __ext4_error(sb, "", 0, " "); \ | 2273 | __ext4_error(sb, "", 0, " "); \ |
2264 | } while (0) | 2274 | } while (0) |
2265 | #define ext4_abort(sb, fmt, ...) \ | 2275 | #define ext4_abort(sb, fmt, ...) \ |
2266 | do { \ | 2276 | do { \ |
2267 | no_printk(fmt, ##__VA_ARGS__); \ | 2277 | no_printk(fmt, ##__VA_ARGS__); \ |
2268 | __ext4_abort(sb, "", 0, " "); \ | 2278 | __ext4_abort(sb, "", 0, " "); \ |
2269 | } while (0) | 2279 | } while (0) |
2270 | #define ext4_warning(sb, fmt, ...) \ | 2280 | #define ext4_warning(sb, fmt, ...) \ |
2271 | do { \ | 2281 | do { \ |
2272 | no_printk(fmt, ##__VA_ARGS__); \ | 2282 | no_printk(fmt, ##__VA_ARGS__); \ |
2273 | __ext4_warning(sb, "", 0, " "); \ | 2283 | __ext4_warning(sb, "", 0, " "); \ |
2274 | } while (0) | 2284 | } while (0) |
2275 | #define ext4_msg(sb, level, fmt, ...) \ | 2285 | #define ext4_msg(sb, level, fmt, ...) \ |
2276 | do { \ | 2286 | do { \ |
2277 | no_printk(fmt, ##__VA_ARGS__); \ | 2287 | no_printk(fmt, ##__VA_ARGS__); \ |
2278 | __ext4_msg(sb, "", " "); \ | 2288 | __ext4_msg(sb, "", " "); \ |
2279 | } while (0) | 2289 | } while (0) |
2280 | #define dump_mmp_msg(sb, mmp, msg) \ | 2290 | #define dump_mmp_msg(sb, mmp, msg) \ |
2281 | __dump_mmp_msg(sb, mmp, "", 0, "") | 2291 | __dump_mmp_msg(sb, mmp, "", 0, "") |
2282 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | 2292 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ |
2283 | do { \ | 2293 | do { \ |
2284 | no_printk(fmt, ##__VA_ARGS__); \ | 2294 | no_printk(fmt, ##__VA_ARGS__); \ |
2285 | __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \ | 2295 | __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \ |
2286 | } while (0) | 2296 | } while (0) |
2287 | 2297 | ||
2288 | #endif | 2298 | #endif |
2289 | 2299 | ||
2290 | extern void ext4_update_dynamic_rev(struct super_block *sb); | 2300 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
2291 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 2301 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
2292 | __u32 compat); | 2302 | __u32 compat); |
2293 | extern int ext4_update_rocompat_feature(handle_t *handle, | 2303 | extern int ext4_update_rocompat_feature(handle_t *handle, |
2294 | struct super_block *sb, __u32 rocompat); | 2304 | struct super_block *sb, __u32 rocompat); |
2295 | extern int ext4_update_incompat_feature(handle_t *handle, | 2305 | extern int ext4_update_incompat_feature(handle_t *handle, |
2296 | struct super_block *sb, __u32 incompat); | 2306 | struct super_block *sb, __u32 incompat); |
2297 | extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, | 2307 | extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, |
2298 | struct ext4_group_desc *bg); | 2308 | struct ext4_group_desc *bg); |
2299 | extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, | 2309 | extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, |
2300 | struct ext4_group_desc *bg); | 2310 | struct ext4_group_desc *bg); |
2301 | extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, | 2311 | extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, |
2302 | struct ext4_group_desc *bg); | 2312 | struct ext4_group_desc *bg); |
2303 | extern __u32 ext4_free_group_clusters(struct super_block *sb, | 2313 | extern __u32 ext4_free_group_clusters(struct super_block *sb, |
2304 | struct ext4_group_desc *bg); | 2314 | struct ext4_group_desc *bg); |
2305 | extern __u32 ext4_free_inodes_count(struct super_block *sb, | 2315 | extern __u32 ext4_free_inodes_count(struct super_block *sb, |
2306 | struct ext4_group_desc *bg); | 2316 | struct ext4_group_desc *bg); |
2307 | extern __u32 ext4_used_dirs_count(struct super_block *sb, | 2317 | extern __u32 ext4_used_dirs_count(struct super_block *sb, |
2308 | struct ext4_group_desc *bg); | 2318 | struct ext4_group_desc *bg); |
2309 | extern __u32 ext4_itable_unused_count(struct super_block *sb, | 2319 | extern __u32 ext4_itable_unused_count(struct super_block *sb, |
2310 | struct ext4_group_desc *bg); | 2320 | struct ext4_group_desc *bg); |
2311 | extern void ext4_block_bitmap_set(struct super_block *sb, | 2321 | extern void ext4_block_bitmap_set(struct super_block *sb, |
2312 | struct ext4_group_desc *bg, ext4_fsblk_t blk); | 2322 | struct ext4_group_desc *bg, ext4_fsblk_t blk); |
2313 | extern void ext4_inode_bitmap_set(struct super_block *sb, | 2323 | extern void ext4_inode_bitmap_set(struct super_block *sb, |
2314 | struct ext4_group_desc *bg, ext4_fsblk_t blk); | 2324 | struct ext4_group_desc *bg, ext4_fsblk_t blk); |
2315 | extern void ext4_inode_table_set(struct super_block *sb, | 2325 | extern void ext4_inode_table_set(struct super_block *sb, |
2316 | struct ext4_group_desc *bg, ext4_fsblk_t blk); | 2326 | struct ext4_group_desc *bg, ext4_fsblk_t blk); |
2317 | extern void ext4_free_group_clusters_set(struct super_block *sb, | 2327 | extern void ext4_free_group_clusters_set(struct super_block *sb, |
2318 | struct ext4_group_desc *bg, | 2328 | struct ext4_group_desc *bg, |
2319 | __u32 count); | 2329 | __u32 count); |
2320 | extern void ext4_free_inodes_set(struct super_block *sb, | 2330 | extern void ext4_free_inodes_set(struct super_block *sb, |
2321 | struct ext4_group_desc *bg, __u32 count); | 2331 | struct ext4_group_desc *bg, __u32 count); |
2322 | extern void ext4_used_dirs_set(struct super_block *sb, | 2332 | extern void ext4_used_dirs_set(struct super_block *sb, |
2323 | struct ext4_group_desc *bg, __u32 count); | 2333 | struct ext4_group_desc *bg, __u32 count); |
2324 | extern void ext4_itable_unused_set(struct super_block *sb, | 2334 | extern void ext4_itable_unused_set(struct super_block *sb, |
2325 | struct ext4_group_desc *bg, __u32 count); | 2335 | struct ext4_group_desc *bg, __u32 count); |
2326 | extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group, | 2336 | extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group, |
2327 | struct ext4_group_desc *gdp); | 2337 | struct ext4_group_desc *gdp); |
2328 | extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, | 2338 | extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, |
2329 | struct ext4_group_desc *gdp); | 2339 | struct ext4_group_desc *gdp); |
2330 | extern int ext4_register_li_request(struct super_block *sb, | 2340 | extern int ext4_register_li_request(struct super_block *sb, |
2331 | ext4_group_t first_not_zeroed); | 2341 | ext4_group_t first_not_zeroed); |
2332 | 2342 | ||
2333 | static inline int ext4_has_group_desc_csum(struct super_block *sb) | 2343 | static inline int ext4_has_group_desc_csum(struct super_block *sb) |
2334 | { | 2344 | { |
2335 | return EXT4_HAS_RO_COMPAT_FEATURE(sb, | 2345 | return EXT4_HAS_RO_COMPAT_FEATURE(sb, |
2336 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM) || | 2346 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM) || |
2337 | (EXT4_SB(sb)->s_chksum_driver != NULL); | 2347 | (EXT4_SB(sb)->s_chksum_driver != NULL); |
2338 | } | 2348 | } |
2339 | 2349 | ||
2340 | static inline int ext4_has_metadata_csum(struct super_block *sb) | 2350 | static inline int ext4_has_metadata_csum(struct super_block *sb) |
2341 | { | 2351 | { |
2342 | WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb, | 2352 | WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb, |
2343 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | 2353 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && |
2344 | !EXT4_SB(sb)->s_chksum_driver); | 2354 | !EXT4_SB(sb)->s_chksum_driver); |
2345 | 2355 | ||
2346 | return (EXT4_SB(sb)->s_chksum_driver != NULL); | 2356 | return (EXT4_SB(sb)->s_chksum_driver != NULL); |
2347 | } | 2357 | } |
2348 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) | 2358 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) |
2349 | { | 2359 | { |
2350 | return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | | 2360 | return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | |
2351 | le32_to_cpu(es->s_blocks_count_lo); | 2361 | le32_to_cpu(es->s_blocks_count_lo); |
2352 | } | 2362 | } |
2353 | 2363 | ||
2354 | static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es) | 2364 | static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es) |
2355 | { | 2365 | { |
2356 | return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) | | 2366 | return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) | |
2357 | le32_to_cpu(es->s_r_blocks_count_lo); | 2367 | le32_to_cpu(es->s_r_blocks_count_lo); |
2358 | } | 2368 | } |
2359 | 2369 | ||
2360 | static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es) | 2370 | static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es) |
2361 | { | 2371 | { |
2362 | return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) | | 2372 | return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) | |
2363 | le32_to_cpu(es->s_free_blocks_count_lo); | 2373 | le32_to_cpu(es->s_free_blocks_count_lo); |
2364 | } | 2374 | } |
2365 | 2375 | ||
2366 | static inline void ext4_blocks_count_set(struct ext4_super_block *es, | 2376 | static inline void ext4_blocks_count_set(struct ext4_super_block *es, |
2367 | ext4_fsblk_t blk) | 2377 | ext4_fsblk_t blk) |
2368 | { | 2378 | { |
2369 | es->s_blocks_count_lo = cpu_to_le32((u32)blk); | 2379 | es->s_blocks_count_lo = cpu_to_le32((u32)blk); |
2370 | es->s_blocks_count_hi = cpu_to_le32(blk >> 32); | 2380 | es->s_blocks_count_hi = cpu_to_le32(blk >> 32); |
2371 | } | 2381 | } |
2372 | 2382 | ||
2373 | static inline void ext4_free_blocks_count_set(struct ext4_super_block *es, | 2383 | static inline void ext4_free_blocks_count_set(struct ext4_super_block *es, |
2374 | ext4_fsblk_t blk) | 2384 | ext4_fsblk_t blk) |
2375 | { | 2385 | { |
2376 | es->s_free_blocks_count_lo = cpu_to_le32((u32)blk); | 2386 | es->s_free_blocks_count_lo = cpu_to_le32((u32)blk); |
2377 | es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32); | 2387 | es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32); |
2378 | } | 2388 | } |
2379 | 2389 | ||
2380 | static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, | 2390 | static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, |
2381 | ext4_fsblk_t blk) | 2391 | ext4_fsblk_t blk) |
2382 | { | 2392 | { |
2383 | es->s_r_blocks_count_lo = cpu_to_le32((u32)blk); | 2393 | es->s_r_blocks_count_lo = cpu_to_le32((u32)blk); |
2384 | es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); | 2394 | es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); |
2385 | } | 2395 | } |
2386 | 2396 | ||
2387 | static inline loff_t ext4_isize(struct ext4_inode *raw_inode) | 2397 | static inline loff_t ext4_isize(struct ext4_inode *raw_inode) |
2388 | { | 2398 | { |
2389 | if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) | 2399 | if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) |
2390 | return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | | 2400 | return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | |
2391 | le32_to_cpu(raw_inode->i_size_lo); | 2401 | le32_to_cpu(raw_inode->i_size_lo); |
2392 | else | 2402 | else |
2393 | return (loff_t) le32_to_cpu(raw_inode->i_size_lo); | 2403 | return (loff_t) le32_to_cpu(raw_inode->i_size_lo); |
2394 | } | 2404 | } |
2395 | 2405 | ||
2396 | static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) | 2406 | static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) |
2397 | { | 2407 | { |
2398 | raw_inode->i_size_lo = cpu_to_le32(i_size); | 2408 | raw_inode->i_size_lo = cpu_to_le32(i_size); |
2399 | raw_inode->i_size_high = cpu_to_le32(i_size >> 32); | 2409 | raw_inode->i_size_high = cpu_to_le32(i_size >> 32); |
2400 | } | 2410 | } |
2401 | 2411 | ||
2402 | static inline | 2412 | static inline |
2403 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | 2413 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, |
2404 | ext4_group_t group) | 2414 | ext4_group_t group) |
2405 | { | 2415 | { |
2406 | struct ext4_group_info ***grp_info; | 2416 | struct ext4_group_info ***grp_info; |
2407 | long indexv, indexh; | 2417 | long indexv, indexh; |
2408 | BUG_ON(group >= EXT4_SB(sb)->s_groups_count); | 2418 | BUG_ON(group >= EXT4_SB(sb)->s_groups_count); |
2409 | grp_info = EXT4_SB(sb)->s_group_info; | 2419 | grp_info = EXT4_SB(sb)->s_group_info; |
2410 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); | 2420 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); |
2411 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); | 2421 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); |
2412 | return grp_info[indexv][indexh]; | 2422 | return grp_info[indexv][indexh]; |
2413 | } | 2423 | } |
2414 | 2424 | ||
2415 | /* | 2425 | /* |
2416 | * Reading s_groups_count requires using smp_rmb() afterwards. See | 2426 | * Reading s_groups_count requires using smp_rmb() afterwards. See |
2417 | * the locking protocol documented in the comments of ext4_group_add() | 2427 | * the locking protocol documented in the comments of ext4_group_add() |
2418 | * in resize.c | 2428 | * in resize.c |
2419 | */ | 2429 | */ |
2420 | static inline ext4_group_t ext4_get_groups_count(struct super_block *sb) | 2430 | static inline ext4_group_t ext4_get_groups_count(struct super_block *sb) |
2421 | { | 2431 | { |
2422 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 2432 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; |
2423 | 2433 | ||
2424 | smp_rmb(); | 2434 | smp_rmb(); |
2425 | return ngroups; | 2435 | return ngroups; |
2426 | } | 2436 | } |
2427 | 2437 | ||
2428 | static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, | 2438 | static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, |
2429 | ext4_group_t block_group) | 2439 | ext4_group_t block_group) |
2430 | { | 2440 | { |
2431 | return block_group >> sbi->s_log_groups_per_flex; | 2441 | return block_group >> sbi->s_log_groups_per_flex; |
2432 | } | 2442 | } |
2433 | 2443 | ||
2434 | static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi) | 2444 | static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi) |
2435 | { | 2445 | { |
2436 | return 1 << sbi->s_log_groups_per_flex; | 2446 | return 1 << sbi->s_log_groups_per_flex; |
2437 | } | 2447 | } |
2438 | 2448 | ||
2439 | #define ext4_std_error(sb, errno) \ | 2449 | #define ext4_std_error(sb, errno) \ |
2440 | do { \ | 2450 | do { \ |
2441 | if ((errno)) \ | 2451 | if ((errno)) \ |
2442 | __ext4_std_error((sb), __func__, __LINE__, (errno)); \ | 2452 | __ext4_std_error((sb), __func__, __LINE__, (errno)); \ |
2443 | } while (0) | 2453 | } while (0) |
2444 | 2454 | ||
2445 | #ifdef CONFIG_SMP | 2455 | #ifdef CONFIG_SMP |
2446 | /* Each CPU can accumulate percpu_counter_batch clusters in their local | 2456 | /* Each CPU can accumulate percpu_counter_batch clusters in their local |
2447 | * counters. So we need to make sure we have free clusters more | 2457 | * counters. So we need to make sure we have free clusters more |
2448 | * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. | 2458 | * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. |
2449 | */ | 2459 | */ |
2450 | #define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) | 2460 | #define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) |
2451 | #else | 2461 | #else |
2452 | #define EXT4_FREECLUSTERS_WATERMARK 0 | 2462 | #define EXT4_FREECLUSTERS_WATERMARK 0 |
2453 | #endif | 2463 | #endif |
2454 | 2464 | ||
2455 | /* Update i_disksize. Requires i_mutex to avoid races with truncate */ | 2465 | /* Update i_disksize. Requires i_mutex to avoid races with truncate */ |
2456 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | 2466 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) |
2457 | { | 2467 | { |
2458 | WARN_ON_ONCE(S_ISREG(inode->i_mode) && | 2468 | WARN_ON_ONCE(S_ISREG(inode->i_mode) && |
2459 | !mutex_is_locked(&inode->i_mutex)); | 2469 | !mutex_is_locked(&inode->i_mutex)); |
2460 | down_write(&EXT4_I(inode)->i_data_sem); | 2470 | down_write(&EXT4_I(inode)->i_data_sem); |
2461 | if (newsize > EXT4_I(inode)->i_disksize) | 2471 | if (newsize > EXT4_I(inode)->i_disksize) |
2462 | EXT4_I(inode)->i_disksize = newsize; | 2472 | EXT4_I(inode)->i_disksize = newsize; |
2463 | up_write(&EXT4_I(inode)->i_data_sem); | 2473 | up_write(&EXT4_I(inode)->i_data_sem); |
2464 | } | 2474 | } |
2465 | 2475 | ||
2466 | /* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */ | 2476 | /* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */ |
2467 | static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) | 2477 | static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) |
2468 | { | 2478 | { |
2469 | int changed = 0; | 2479 | int changed = 0; |
2470 | 2480 | ||
2471 | if (newsize > inode->i_size) { | 2481 | if (newsize > inode->i_size) { |
2472 | i_size_write(inode, newsize); | 2482 | i_size_write(inode, newsize); |
2473 | changed = 1; | 2483 | changed = 1; |
2474 | } | 2484 | } |
2475 | if (newsize > EXT4_I(inode)->i_disksize) { | 2485 | if (newsize > EXT4_I(inode)->i_disksize) { |
2476 | ext4_update_i_disksize(inode, newsize); | 2486 | ext4_update_i_disksize(inode, newsize); |
2477 | changed |= 2; | 2487 | changed |= 2; |
2478 | } | 2488 | } |
2479 | return changed; | 2489 | return changed; |
2480 | } | 2490 | } |
2481 | 2491 | ||
2482 | struct ext4_group_info { | 2492 | struct ext4_group_info { |
2483 | unsigned long bb_state; | 2493 | unsigned long bb_state; |
2484 | struct rb_root bb_free_root; | 2494 | struct rb_root bb_free_root; |
2485 | ext4_grpblk_t bb_first_free; /* first free block */ | 2495 | ext4_grpblk_t bb_first_free; /* first free block */ |
2486 | ext4_grpblk_t bb_free; /* total free blocks */ | 2496 | ext4_grpblk_t bb_free; /* total free blocks */ |
2487 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ | 2497 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ |
2488 | ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ | 2498 | ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ |
2489 | struct list_head bb_prealloc_list; | 2499 | struct list_head bb_prealloc_list; |
2490 | #ifdef DOUBLE_CHECK | 2500 | #ifdef DOUBLE_CHECK |
2491 | void *bb_bitmap; | 2501 | void *bb_bitmap; |
2492 | #endif | 2502 | #endif |
2493 | struct rw_semaphore alloc_sem; | 2503 | struct rw_semaphore alloc_sem; |
2494 | ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block | 2504 | ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block |
2495 | * regions, index is order. | 2505 | * regions, index is order. |
2496 | * bb_counters[3] = 5 means | 2506 | * bb_counters[3] = 5 means |
2497 | * 5 free 8-block regions. */ | 2507 | * 5 free 8-block regions. */ |
2498 | }; | 2508 | }; |
2499 | 2509 | ||
2500 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | 2510 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 |
2501 | #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 | 2511 | #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1 |
2502 | #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2 | 2512 | #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2 |
2503 | #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3 | 2513 | #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3 |
2504 | 2514 | ||
2505 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | 2515 | #define EXT4_MB_GRP_NEED_INIT(grp) \ |
2506 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | 2516 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) |
2507 | #define EXT4_MB_GRP_BBITMAP_CORRUPT(grp) \ | 2517 | #define EXT4_MB_GRP_BBITMAP_CORRUPT(grp) \ |
2508 | (test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state))) | 2518 | (test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state))) |
2509 | #define EXT4_MB_GRP_IBITMAP_CORRUPT(grp) \ | 2519 | #define EXT4_MB_GRP_IBITMAP_CORRUPT(grp) \ |
2510 | (test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state))) | 2520 | (test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state))) |
2511 | 2521 | ||
2512 | #define EXT4_MB_GRP_WAS_TRIMMED(grp) \ | 2522 | #define EXT4_MB_GRP_WAS_TRIMMED(grp) \ |
2513 | (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | 2523 | (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) |
2514 | #define EXT4_MB_GRP_SET_TRIMMED(grp) \ | 2524 | #define EXT4_MB_GRP_SET_TRIMMED(grp) \ |
2515 | (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | 2525 | (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) |
2516 | #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ | 2526 | #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ |
2517 | (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) | 2527 | (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) |
2518 | 2528 | ||
2519 | #define EXT4_MAX_CONTENTION 8 | 2529 | #define EXT4_MAX_CONTENTION 8 |
2520 | #define EXT4_CONTENTION_THRESHOLD 2 | 2530 | #define EXT4_CONTENTION_THRESHOLD 2 |
2521 | 2531 | ||
2522 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, | 2532 | static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, |
2523 | ext4_group_t group) | 2533 | ext4_group_t group) |
2524 | { | 2534 | { |
2525 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); | 2535 | return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); |
2526 | } | 2536 | } |
2527 | 2537 | ||
2528 | /* | 2538 | /* |
2529 | * Returns true if the filesystem is busy enough that attempts to | 2539 | * Returns true if the filesystem is busy enough that attempts to |
2530 | * access the block group locks has run into contention. | 2540 | * access the block group locks has run into contention. |
2531 | */ | 2541 | */ |
2532 | static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) | 2542 | static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) |
2533 | { | 2543 | { |
2534 | return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); | 2544 | return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); |
2535 | } | 2545 | } |
2536 | 2546 | ||
2537 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 2547 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
2538 | { | 2548 | { |
2539 | spinlock_t *lock = ext4_group_lock_ptr(sb, group); | 2549 | spinlock_t *lock = ext4_group_lock_ptr(sb, group); |
2540 | if (spin_trylock(lock)) | 2550 | if (spin_trylock(lock)) |
2541 | /* | 2551 | /* |
2542 | * We're able to grab the lock right away, so drop the | 2552 | * We're able to grab the lock right away, so drop the |
2543 | * lock contention counter. | 2553 | * lock contention counter. |
2544 | */ | 2554 | */ |
2545 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); | 2555 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); |
2546 | else { | 2556 | else { |
2547 | /* | 2557 | /* |
2548 | * The lock is busy, so bump the contention counter, | 2558 | * The lock is busy, so bump the contention counter, |
2549 | * and then wait on the spin lock. | 2559 | * and then wait on the spin lock. |
2550 | */ | 2560 | */ |
2551 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, | 2561 | atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, |
2552 | EXT4_MAX_CONTENTION); | 2562 | EXT4_MAX_CONTENTION); |
2553 | spin_lock(lock); | 2563 | spin_lock(lock); |
2554 | } | 2564 | } |
2555 | } | 2565 | } |
2556 | 2566 | ||
2557 | static inline void ext4_unlock_group(struct super_block *sb, | 2567 | static inline void ext4_unlock_group(struct super_block *sb, |
2558 | ext4_group_t group) | 2568 | ext4_group_t group) |
2559 | { | 2569 | { |
2560 | spin_unlock(ext4_group_lock_ptr(sb, group)); | 2570 | spin_unlock(ext4_group_lock_ptr(sb, group)); |
2561 | } | 2571 | } |
2562 | 2572 | ||
2563 | /* | 2573 | /* |
2564 | * Block validity checking | 2574 | * Block validity checking |
2565 | */ | 2575 | */ |
2566 | #define ext4_check_indirect_blockref(inode, bh) \ | 2576 | #define ext4_check_indirect_blockref(inode, bh) \ |
2567 | ext4_check_blockref(__func__, __LINE__, inode, \ | 2577 | ext4_check_blockref(__func__, __LINE__, inode, \ |
2568 | (__le32 *)(bh)->b_data, \ | 2578 | (__le32 *)(bh)->b_data, \ |
2569 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | 2579 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) |
2570 | 2580 | ||
2571 | #define ext4_ind_check_inode(inode) \ | 2581 | #define ext4_ind_check_inode(inode) \ |
2572 | ext4_check_blockref(__func__, __LINE__, inode, \ | 2582 | ext4_check_blockref(__func__, __LINE__, inode, \ |
2573 | EXT4_I(inode)->i_data, \ | 2583 | EXT4_I(inode)->i_data, \ |
2574 | EXT4_NDIR_BLOCKS) | 2584 | EXT4_NDIR_BLOCKS) |
2575 | 2585 | ||
2576 | /* | 2586 | /* |
2577 | * Inodes and files operations | 2587 | * Inodes and files operations |
2578 | */ | 2588 | */ |
2579 | 2589 | ||
2580 | /* dir.c */ | 2590 | /* dir.c */ |
2581 | extern const struct file_operations ext4_dir_operations; | 2591 | extern const struct file_operations ext4_dir_operations; |
2582 | 2592 | ||
2583 | /* file.c */ | 2593 | /* file.c */ |
2584 | extern const struct inode_operations ext4_file_inode_operations; | 2594 | extern const struct inode_operations ext4_file_inode_operations; |
2585 | extern const struct file_operations ext4_file_operations; | 2595 | extern const struct file_operations ext4_file_operations; |
2586 | extern const struct file_operations ext4_dax_file_operations; | 2596 | extern const struct file_operations ext4_dax_file_operations; |
2587 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); | 2597 | extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); |
2588 | 2598 | ||
2589 | /* inline.c */ | 2599 | /* inline.c */ |
2590 | extern int ext4_get_max_inline_size(struct inode *inode); | 2600 | extern int ext4_get_max_inline_size(struct inode *inode); |
2591 | extern int ext4_find_inline_data_nolock(struct inode *inode); | 2601 | extern int ext4_find_inline_data_nolock(struct inode *inode); |
2592 | extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, | 2602 | extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, |
2593 | unsigned int len); | 2603 | unsigned int len); |
2594 | extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); | 2604 | extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); |
2595 | 2605 | ||
2596 | extern int ext4_readpage_inline(struct inode *inode, struct page *page); | 2606 | extern int ext4_readpage_inline(struct inode *inode, struct page *page); |
2597 | extern int ext4_try_to_write_inline_data(struct address_space *mapping, | 2607 | extern int ext4_try_to_write_inline_data(struct address_space *mapping, |
2598 | struct inode *inode, | 2608 | struct inode *inode, |
2599 | loff_t pos, unsigned len, | 2609 | loff_t pos, unsigned len, |
2600 | unsigned flags, | 2610 | unsigned flags, |
2601 | struct page **pagep); | 2611 | struct page **pagep); |
2602 | extern int ext4_write_inline_data_end(struct inode *inode, | 2612 | extern int ext4_write_inline_data_end(struct inode *inode, |
2603 | loff_t pos, unsigned len, | 2613 | loff_t pos, unsigned len, |
2604 | unsigned copied, | 2614 | unsigned copied, |
2605 | struct page *page); | 2615 | struct page *page); |
2606 | extern struct buffer_head * | 2616 | extern struct buffer_head * |
2607 | ext4_journalled_write_inline_data(struct inode *inode, | 2617 | ext4_journalled_write_inline_data(struct inode *inode, |
2608 | unsigned len, | 2618 | unsigned len, |
2609 | struct page *page); | 2619 | struct page *page); |
2610 | extern int ext4_da_write_inline_data_begin(struct address_space *mapping, | 2620 | extern int ext4_da_write_inline_data_begin(struct address_space *mapping, |
2611 | struct inode *inode, | 2621 | struct inode *inode, |
2612 | loff_t pos, unsigned len, | 2622 | loff_t pos, unsigned len, |
2613 | unsigned flags, | 2623 | unsigned flags, |
2614 | struct page **pagep, | 2624 | struct page **pagep, |
2615 | void **fsdata); | 2625 | void **fsdata); |
2616 | extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, | 2626 | extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, |
2617 | unsigned len, unsigned copied, | 2627 | unsigned len, unsigned copied, |
2618 | struct page *page); | 2628 | struct page *page); |
2619 | extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, | 2629 | extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry, |
2620 | struct inode *inode); | 2630 | struct inode *inode); |
2621 | extern int ext4_try_create_inline_dir(handle_t *handle, | 2631 | extern int ext4_try_create_inline_dir(handle_t *handle, |
2622 | struct inode *parent, | 2632 | struct inode *parent, |
2623 | struct inode *inode); | 2633 | struct inode *inode); |
2624 | extern int ext4_read_inline_dir(struct file *filp, | 2634 | extern int ext4_read_inline_dir(struct file *filp, |
2625 | struct dir_context *ctx, | 2635 | struct dir_context *ctx, |
2626 | int *has_inline_data); | 2636 | int *has_inline_data); |
2627 | extern int htree_inlinedir_to_tree(struct file *dir_file, | 2637 | extern int htree_inlinedir_to_tree(struct file *dir_file, |
2628 | struct inode *dir, ext4_lblk_t block, | 2638 | struct inode *dir, ext4_lblk_t block, |
2629 | struct dx_hash_info *hinfo, | 2639 | struct dx_hash_info *hinfo, |
2630 | __u32 start_hash, __u32 start_minor_hash, | 2640 | __u32 start_hash, __u32 start_minor_hash, |
2631 | int *has_inline_data); | 2641 | int *has_inline_data); |
2632 | extern struct buffer_head *ext4_find_inline_entry(struct inode *dir, | 2642 | extern struct buffer_head *ext4_find_inline_entry(struct inode *dir, |
2633 | const struct qstr *d_name, | 2643 | const struct qstr *d_name, |
2634 | struct ext4_dir_entry_2 **res_dir, | 2644 | struct ext4_dir_entry_2 **res_dir, |
2635 | int *has_inline_data); | 2645 | int *has_inline_data); |
2636 | extern int ext4_delete_inline_entry(handle_t *handle, | 2646 | extern int ext4_delete_inline_entry(handle_t *handle, |
2637 | struct inode *dir, | 2647 | struct inode *dir, |
2638 | struct ext4_dir_entry_2 *de_del, | 2648 | struct ext4_dir_entry_2 *de_del, |
2639 | struct buffer_head *bh, | 2649 | struct buffer_head *bh, |
2640 | int *has_inline_data); | 2650 | int *has_inline_data); |
2641 | extern int empty_inline_dir(struct inode *dir, int *has_inline_data); | 2651 | extern int empty_inline_dir(struct inode *dir, int *has_inline_data); |
2642 | extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, | 2652 | extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, |
2643 | struct ext4_dir_entry_2 **parent_de, | 2653 | struct ext4_dir_entry_2 **parent_de, |
2644 | int *retval); | 2654 | int *retval); |
2645 | extern int ext4_inline_data_fiemap(struct inode *inode, | 2655 | extern int ext4_inline_data_fiemap(struct inode *inode, |
2646 | struct fiemap_extent_info *fieinfo, | 2656 | struct fiemap_extent_info *fieinfo, |
2647 | int *has_inline, __u64 start, __u64 len); | 2657 | int *has_inline, __u64 start, __u64 len); |
2648 | extern int ext4_try_to_evict_inline_data(handle_t *handle, | 2658 | extern int ext4_try_to_evict_inline_data(handle_t *handle, |
2649 | struct inode *inode, | 2659 | struct inode *inode, |
2650 | int needed); | 2660 | int needed); |
2651 | extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline); | 2661 | extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline); |
2652 | 2662 | ||
2653 | extern int ext4_convert_inline_data(struct inode *inode); | 2663 | extern int ext4_convert_inline_data(struct inode *inode); |
2654 | 2664 | ||
2655 | static inline int ext4_has_inline_data(struct inode *inode) | 2665 | static inline int ext4_has_inline_data(struct inode *inode) |
2656 | { | 2666 | { |
2657 | return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) && | 2667 | return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) && |
2658 | EXT4_I(inode)->i_inline_off; | 2668 | EXT4_I(inode)->i_inline_off; |
2659 | } | 2669 | } |
2660 | 2670 | ||
2661 | /* namei.c */ | 2671 | /* namei.c */ |
2662 | extern const struct inode_operations ext4_dir_inode_operations; | 2672 | extern const struct inode_operations ext4_dir_inode_operations; |
2663 | extern const struct inode_operations ext4_special_inode_operations; | 2673 | extern const struct inode_operations ext4_special_inode_operations; |
2664 | extern struct dentry *ext4_get_parent(struct dentry *child); | 2674 | extern struct dentry *ext4_get_parent(struct dentry *child); |
2665 | extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, | 2675 | extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, |
2666 | struct ext4_dir_entry_2 *de, | 2676 | struct ext4_dir_entry_2 *de, |
2667 | int blocksize, int csum_size, | 2677 | int blocksize, int csum_size, |
2668 | unsigned int parent_ino, int dotdot_real_len); | 2678 | unsigned int parent_ino, int dotdot_real_len); |
2669 | extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t, | 2679 | extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t, |
2670 | unsigned int blocksize); | 2680 | unsigned int blocksize); |
2671 | extern int ext4_handle_dirty_dirent_node(handle_t *handle, | 2681 | extern int ext4_handle_dirty_dirent_node(handle_t *handle, |
2672 | struct inode *inode, | 2682 | struct inode *inode, |
2673 | struct buffer_head *bh); | 2683 | struct buffer_head *bh); |
2674 | #define S_SHIFT 12 | 2684 | #define S_SHIFT 12 |
2675 | static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { | 2685 | static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { |
2676 | [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, | 2686 | [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, |
2677 | [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, | 2687 | [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, |
2678 | [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, | 2688 | [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, |
2679 | [S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV, | 2689 | [S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV, |
2680 | [S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO, | 2690 | [S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO, |
2681 | [S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK, | 2691 | [S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK, |
2682 | [S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK, | 2692 | [S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK, |
2683 | }; | 2693 | }; |
2684 | 2694 | ||
2685 | static inline void ext4_set_de_type(struct super_block *sb, | 2695 | static inline void ext4_set_de_type(struct super_block *sb, |
2686 | struct ext4_dir_entry_2 *de, | 2696 | struct ext4_dir_entry_2 *de, |
2687 | umode_t mode) { | 2697 | umode_t mode) { |
2688 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE)) | 2698 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE)) |
2689 | de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; | 2699 | de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; |
2690 | } | 2700 | } |
2691 | 2701 | ||
2692 | 2702 | ||
2693 | /* symlink.c */ | 2703 | /* symlink.c */ |
2694 | extern const struct inode_operations ext4_symlink_inode_operations; | 2704 | extern const struct inode_operations ext4_symlink_inode_operations; |
2695 | extern const struct inode_operations ext4_fast_symlink_inode_operations; | 2705 | extern const struct inode_operations ext4_fast_symlink_inode_operations; |
2696 | 2706 | ||
2697 | /* block_validity */ | 2707 | /* block_validity */ |
2698 | extern void ext4_release_system_zone(struct super_block *sb); | 2708 | extern void ext4_release_system_zone(struct super_block *sb); |
2699 | extern int ext4_setup_system_zone(struct super_block *sb); | 2709 | extern int ext4_setup_system_zone(struct super_block *sb); |
2700 | extern int __init ext4_init_system_zone(void); | 2710 | extern int __init ext4_init_system_zone(void); |
2701 | extern void ext4_exit_system_zone(void); | 2711 | extern void ext4_exit_system_zone(void); |
2702 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, | 2712 | extern int ext4_data_block_valid(struct ext4_sb_info *sbi, |
2703 | ext4_fsblk_t start_blk, | 2713 | ext4_fsblk_t start_blk, |
2704 | unsigned int count); | 2714 | unsigned int count); |
2705 | extern int ext4_check_blockref(const char *, unsigned int, | 2715 | extern int ext4_check_blockref(const char *, unsigned int, |
2706 | struct inode *, __le32 *, unsigned int); | 2716 | struct inode *, __le32 *, unsigned int); |
2707 | 2717 | ||
2708 | /* extents.c */ | 2718 | /* extents.c */ |
2709 | struct ext4_ext_path; | 2719 | struct ext4_ext_path; |
2710 | struct ext4_extent; | 2720 | struct ext4_extent; |
2711 | 2721 | ||
2712 | /* | 2722 | /* |
2713 | * Maximum number of logical blocks in a file; ext4_extent's ee_block is | 2723 | * Maximum number of logical blocks in a file; ext4_extent's ee_block is |
2714 | * __le32. | 2724 | * __le32. |
2715 | */ | 2725 | */ |
2716 | #define EXT_MAX_BLOCKS 0xffffffff | 2726 | #define EXT_MAX_BLOCKS 0xffffffff |
2717 | 2727 | ||
2718 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 2728 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
2719 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 2729 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
2720 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); | 2730 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); |
2721 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | 2731 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
2722 | struct ext4_map_blocks *map, int flags); | 2732 | struct ext4_map_blocks *map, int flags); |
2723 | extern void ext4_ext_truncate(handle_t *, struct inode *); | 2733 | extern void ext4_ext_truncate(handle_t *, struct inode *); |
2724 | extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | 2734 | extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, |
2725 | ext4_lblk_t end); | 2735 | ext4_lblk_t end); |
2726 | extern void ext4_ext_init(struct super_block *); | 2736 | extern void ext4_ext_init(struct super_block *); |
2727 | extern void ext4_ext_release(struct super_block *); | 2737 | extern void ext4_ext_release(struct super_block *); |
2728 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, | 2738 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, |
2729 | loff_t len); | 2739 | loff_t len); |
2730 | extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, | 2740 | extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
2731 | loff_t offset, ssize_t len); | 2741 | loff_t offset, ssize_t len); |
2732 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | 2742 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, |
2733 | struct ext4_map_blocks *map, int flags); | 2743 | struct ext4_map_blocks *map, int flags); |
2734 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 2744 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
2735 | ext4_lblk_t lblocks); | 2745 | ext4_lblk_t lblocks); |
2736 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 2746 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
2737 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, | 2747 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
2738 | int num, | 2748 | int num, |
2739 | struct ext4_ext_path *path); | 2749 | struct ext4_ext_path *path); |
2740 | extern int ext4_can_extents_be_merged(struct inode *inode, | 2750 | extern int ext4_can_extents_be_merged(struct inode *inode, |
2741 | struct ext4_extent *ex1, | 2751 | struct ext4_extent *ex1, |
2742 | struct ext4_extent *ex2); | 2752 | struct ext4_extent *ex2); |
2743 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, | 2753 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, |
2744 | struct ext4_ext_path **, | 2754 | struct ext4_ext_path **, |
2745 | struct ext4_extent *, int); | 2755 | struct ext4_extent *, int); |
2746 | extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, | 2756 | extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, |
2747 | struct ext4_ext_path **, | 2757 | struct ext4_ext_path **, |
2748 | int flags); | 2758 | int flags); |
2749 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 2759 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
2750 | extern int ext4_ext_check_inode(struct inode *inode); | 2760 | extern int ext4_ext_check_inode(struct inode *inode); |
2751 | extern int ext4_find_delalloc_range(struct inode *inode, | 2761 | extern int ext4_find_delalloc_range(struct inode *inode, |
2752 | ext4_lblk_t lblk_start, | 2762 | ext4_lblk_t lblk_start, |
2753 | ext4_lblk_t lblk_end); | 2763 | ext4_lblk_t lblk_end); |
2754 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); | 2764 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); |
2755 | extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); | 2765 | extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); |
2756 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2766 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2757 | __u64 start, __u64 len); | 2767 | __u64 start, __u64 len); |
2758 | extern int ext4_ext_precache(struct inode *inode); | 2768 | extern int ext4_ext_precache(struct inode *inode); |
2759 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); | 2769 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); |
2760 | extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, | 2770 | extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, |
2761 | struct inode *inode2, ext4_lblk_t lblk1, | 2771 | struct inode *inode2, ext4_lblk_t lblk1, |
2762 | ext4_lblk_t lblk2, ext4_lblk_t count, | 2772 | ext4_lblk_t lblk2, ext4_lblk_t count, |
2763 | int mark_unwritten,int *err); | 2773 | int mark_unwritten,int *err); |
2764 | 2774 | ||
2765 | /* move_extent.c */ | 2775 | /* move_extent.c */ |
2766 | extern void ext4_double_down_write_data_sem(struct inode *first, | 2776 | extern void ext4_double_down_write_data_sem(struct inode *first, |
2767 | struct inode *second); | 2777 | struct inode *second); |
2768 | extern void ext4_double_up_write_data_sem(struct inode *orig_inode, | 2778 | extern void ext4_double_up_write_data_sem(struct inode *orig_inode, |
2769 | struct inode *donor_inode); | 2779 | struct inode *donor_inode); |
2770 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | 2780 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, |
2771 | __u64 start_orig, __u64 start_donor, | 2781 | __u64 start_orig, __u64 start_donor, |
2772 | __u64 len, __u64 *moved_len); | 2782 | __u64 len, __u64 *moved_len); |
2773 | 2783 | ||
2774 | /* page-io.c */ | 2784 | /* page-io.c */ |
2775 | extern int __init ext4_init_pageio(void); | 2785 | extern int __init ext4_init_pageio(void); |
2776 | extern void ext4_exit_pageio(void); | 2786 | extern void ext4_exit_pageio(void); |
2777 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2787 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2778 | extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); | 2788 | extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); |
2779 | extern int ext4_put_io_end(ext4_io_end_t *io_end); | 2789 | extern int ext4_put_io_end(ext4_io_end_t *io_end); |
2780 | extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); | 2790 | extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); |
2781 | extern void ext4_io_submit_init(struct ext4_io_submit *io, | 2791 | extern void ext4_io_submit_init(struct ext4_io_submit *io, |
2782 | struct writeback_control *wbc); | 2792 | struct writeback_control *wbc); |
2783 | extern void ext4_end_io_rsv_work(struct work_struct *work); | 2793 | extern void ext4_end_io_rsv_work(struct work_struct *work); |
2784 | extern void ext4_io_submit(struct ext4_io_submit *io); | 2794 | extern void ext4_io_submit(struct ext4_io_submit *io); |
2785 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | 2795 | extern int ext4_bio_write_page(struct ext4_io_submit *io, |
2786 | struct page *page, | 2796 | struct page *page, |
2787 | int len, | 2797 | int len, |
2788 | struct writeback_control *wbc, | 2798 | struct writeback_control *wbc, |
2789 | bool keep_towrite); | 2799 | bool keep_towrite); |
2790 | 2800 | ||
2791 | /* mmp.c */ | 2801 | /* mmp.c */ |
2792 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); | 2802 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); |
2793 | 2803 | ||
2794 | /* | 2804 | /* |
2795 | * Add new method to test whether block and inode bitmaps are properly | 2805 | * Add new method to test whether block and inode bitmaps are properly |
2796 | * initialized. With uninit_bg reading the block from disk is not enough | 2806 | * initialized. With uninit_bg reading the block from disk is not enough |
2797 | * to mark the bitmap uptodate. We need to also zero-out the bitmap | 2807 | * to mark the bitmap uptodate. We need to also zero-out the bitmap |
2798 | */ | 2808 | */ |
2799 | #define BH_BITMAP_UPTODATE BH_JBDPrivateStart | 2809 | #define BH_BITMAP_UPTODATE BH_JBDPrivateStart |
2800 | 2810 | ||
2801 | static inline int bitmap_uptodate(struct buffer_head *bh) | 2811 | static inline int bitmap_uptodate(struct buffer_head *bh) |
2802 | { | 2812 | { |
2803 | return (buffer_uptodate(bh) && | 2813 | return (buffer_uptodate(bh) && |
2804 | test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state)); | 2814 | test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state)); |
2805 | } | 2815 | } |
2806 | static inline void set_bitmap_uptodate(struct buffer_head *bh) | 2816 | static inline void set_bitmap_uptodate(struct buffer_head *bh) |
2807 | { | 2817 | { |
2808 | set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); | 2818 | set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); |
2809 | } | 2819 | } |
2810 | 2820 | ||
2811 | /* | 2821 | /* |
2812 | * Disable DIO read nolock optimization, so new dioreaders will be forced | 2822 | * Disable DIO read nolock optimization, so new dioreaders will be forced |
2813 | * to grab i_mutex | 2823 | * to grab i_mutex |
2814 | */ | 2824 | */ |
2815 | static inline void ext4_inode_block_unlocked_dio(struct inode *inode) | 2825 | static inline void ext4_inode_block_unlocked_dio(struct inode *inode) |
2816 | { | 2826 | { |
2817 | ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); | 2827 | ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); |
2818 | smp_mb(); | 2828 | smp_mb(); |
2819 | } | 2829 | } |
2820 | static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) | 2830 | static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) |
2821 | { | 2831 | { |
2822 | smp_mb(); | 2832 | smp_mb(); |
2823 | ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); | 2833 | ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK); |
2824 | } | 2834 | } |
2825 | 2835 | ||
2826 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 2836 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
2827 | 2837 | ||
2828 | /* For ioend & aio unwritten conversion wait queues */ | 2838 | /* For ioend & aio unwritten conversion wait queues */ |
2829 | #define EXT4_WQ_HASH_SZ 37 | 2839 | #define EXT4_WQ_HASH_SZ 37 |
2830 | #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ | 2840 | #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ |
2831 | EXT4_WQ_HASH_SZ]) | 2841 | EXT4_WQ_HASH_SZ]) |
2832 | #define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\ | 2842 | #define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\ |
2833 | EXT4_WQ_HASH_SZ]) | 2843 | EXT4_WQ_HASH_SZ]) |
2834 | extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | 2844 | extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; |
2835 | extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | 2845 | extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; |
2836 | 2846 | ||
2837 | #define EXT4_RESIZING 0 | 2847 | #define EXT4_RESIZING 0 |
2838 | extern int ext4_resize_begin(struct super_block *sb); | 2848 | extern int ext4_resize_begin(struct super_block *sb); |
2839 | extern void ext4_resize_end(struct super_block *sb); | 2849 | extern void ext4_resize_end(struct super_block *sb); |
2840 | 2850 | ||
2841 | #endif /* __KERNEL__ */ | 2851 | #endif /* __KERNEL__ */ |
2842 | 2852 | ||
2843 | #endif /* _EXT4_H */ | 2853 | #endif /* _EXT4_H */ |
2844 | 2854 |
fs/ext4/indirect.c
1 | /* | 1 | /* |
2 | * linux/fs/ext4/indirect.c | 2 | * linux/fs/ext4/indirect.c |
3 | * | 3 | * |
4 | * from | 4 | * from |
5 | * | 5 | * |
6 | * linux/fs/ext4/inode.c | 6 | * linux/fs/ext4/inode.c |
7 | * | 7 | * |
8 | * Copyright (C) 1992, 1993, 1994, 1995 | 8 | * Copyright (C) 1992, 1993, 1994, 1995 |
9 | * Remy Card (card@masi.ibp.fr) | 9 | * Remy Card (card@masi.ibp.fr) |
10 | * Laboratoire MASI - Institut Blaise Pascal | 10 | * Laboratoire MASI - Institut Blaise Pascal |
11 | * Universite Pierre et Marie Curie (Paris VI) | 11 | * Universite Pierre et Marie Curie (Paris VI) |
12 | * | 12 | * |
13 | * from | 13 | * from |
14 | * | 14 | * |
15 | * linux/fs/minix/inode.c | 15 | * linux/fs/minix/inode.c |
16 | * | 16 | * |
17 | * Copyright (C) 1991, 1992 Linus Torvalds | 17 | * Copyright (C) 1991, 1992 Linus Torvalds |
18 | * | 18 | * |
19 | * Goal-directed block allocation by Stephen Tweedie | 19 | * Goal-directed block allocation by Stephen Tweedie |
20 | * (sct@redhat.com), 1993, 1998 | 20 | * (sct@redhat.com), 1993, 1998 |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/aio.h> | 23 | #include <linux/aio.h> |
24 | #include "ext4_jbd2.h" | 24 | #include "ext4_jbd2.h" |
25 | #include "truncate.h" | 25 | #include "truncate.h" |
26 | 26 | ||
27 | #include <trace/events/ext4.h> | 27 | #include <trace/events/ext4.h> |
28 | 28 | ||
29 | typedef struct { | 29 | typedef struct { |
30 | __le32 *p; | 30 | __le32 *p; |
31 | __le32 key; | 31 | __le32 key; |
32 | struct buffer_head *bh; | 32 | struct buffer_head *bh; |
33 | } Indirect; | 33 | } Indirect; |
34 | 34 | ||
35 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | 35 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) |
36 | { | 36 | { |
37 | p->key = *(p->p = v); | 37 | p->key = *(p->p = v); |
38 | p->bh = bh; | 38 | p->bh = bh; |
39 | } | 39 | } |
40 | 40 | ||
41 | /** | 41 | /** |
42 | * ext4_block_to_path - parse the block number into array of offsets | 42 | * ext4_block_to_path - parse the block number into array of offsets |
43 | * @inode: inode in question (we are only interested in its superblock) | 43 | * @inode: inode in question (we are only interested in its superblock) |
44 | * @i_block: block number to be parsed | 44 | * @i_block: block number to be parsed |
45 | * @offsets: array to store the offsets in | 45 | * @offsets: array to store the offsets in |
46 | * @boundary: set this non-zero if the referred-to block is likely to be | 46 | * @boundary: set this non-zero if the referred-to block is likely to be |
47 | * followed (on disk) by an indirect block. | 47 | * followed (on disk) by an indirect block. |
48 | * | 48 | * |
49 | * To store the locations of file's data ext4 uses a data structure common | 49 | * To store the locations of file's data ext4 uses a data structure common |
50 | * for UNIX filesystems - tree of pointers anchored in the inode, with | 50 | * for UNIX filesystems - tree of pointers anchored in the inode, with |
51 | * data blocks at leaves and indirect blocks in intermediate nodes. | 51 | * data blocks at leaves and indirect blocks in intermediate nodes. |
52 | * This function translates the block number into path in that tree - | 52 | * This function translates the block number into path in that tree - |
53 | * return value is the path length and @offsets[n] is the offset of | 53 | * return value is the path length and @offsets[n] is the offset of |
54 | * pointer to (n+1)th node in the nth one. If @block is out of range | 54 | * pointer to (n+1)th node in the nth one. If @block is out of range |
55 | * (negative or too large) warning is printed and zero returned. | 55 | * (negative or too large) warning is printed and zero returned. |
56 | * | 56 | * |
57 | * Note: function doesn't find node addresses, so no IO is needed. All | 57 | * Note: function doesn't find node addresses, so no IO is needed. All |
58 | * we need to know is the capacity of indirect blocks (taken from the | 58 | * we need to know is the capacity of indirect blocks (taken from the |
59 | * inode->i_sb). | 59 | * inode->i_sb). |
60 | */ | 60 | */ |
61 | 61 | ||
62 | /* | 62 | /* |
63 | * Portability note: the last comparison (check that we fit into triple | 63 | * Portability note: the last comparison (check that we fit into triple |
64 | * indirect block) is spelled differently, because otherwise on an | 64 | * indirect block) is spelled differently, because otherwise on an |
65 | * architecture with 32-bit longs and 8Kb pages we might get into trouble | 65 | * architecture with 32-bit longs and 8Kb pages we might get into trouble |
66 | * if our filesystem had 8Kb blocks. We might use long long, but that would | 66 | * if our filesystem had 8Kb blocks. We might use long long, but that would |
67 | * kill us on x86. Oh, well, at least the sign propagation does not matter - | 67 | * kill us on x86. Oh, well, at least the sign propagation does not matter - |
68 | * i_block would have to be negative in the very beginning, so we would not | 68 | * i_block would have to be negative in the very beginning, so we would not |
69 | * get there at all. | 69 | * get there at all. |
70 | */ | 70 | */ |
71 | 71 | ||
72 | static int ext4_block_to_path(struct inode *inode, | 72 | static int ext4_block_to_path(struct inode *inode, |
73 | ext4_lblk_t i_block, | 73 | ext4_lblk_t i_block, |
74 | ext4_lblk_t offsets[4], int *boundary) | 74 | ext4_lblk_t offsets[4], int *boundary) |
75 | { | 75 | { |
76 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 76 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
77 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); | 77 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); |
78 | const long direct_blocks = EXT4_NDIR_BLOCKS, | 78 | const long direct_blocks = EXT4_NDIR_BLOCKS, |
79 | indirect_blocks = ptrs, | 79 | indirect_blocks = ptrs, |
80 | double_blocks = (1 << (ptrs_bits * 2)); | 80 | double_blocks = (1 << (ptrs_bits * 2)); |
81 | int n = 0; | 81 | int n = 0; |
82 | int final = 0; | 82 | int final = 0; |
83 | 83 | ||
84 | if (i_block < direct_blocks) { | 84 | if (i_block < direct_blocks) { |
85 | offsets[n++] = i_block; | 85 | offsets[n++] = i_block; |
86 | final = direct_blocks; | 86 | final = direct_blocks; |
87 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | 87 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
88 | offsets[n++] = EXT4_IND_BLOCK; | 88 | offsets[n++] = EXT4_IND_BLOCK; |
89 | offsets[n++] = i_block; | 89 | offsets[n++] = i_block; |
90 | final = ptrs; | 90 | final = ptrs; |
91 | } else if ((i_block -= indirect_blocks) < double_blocks) { | 91 | } else if ((i_block -= indirect_blocks) < double_blocks) { |
92 | offsets[n++] = EXT4_DIND_BLOCK; | 92 | offsets[n++] = EXT4_DIND_BLOCK; |
93 | offsets[n++] = i_block >> ptrs_bits; | 93 | offsets[n++] = i_block >> ptrs_bits; |
94 | offsets[n++] = i_block & (ptrs - 1); | 94 | offsets[n++] = i_block & (ptrs - 1); |
95 | final = ptrs; | 95 | final = ptrs; |
96 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { | 96 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { |
97 | offsets[n++] = EXT4_TIND_BLOCK; | 97 | offsets[n++] = EXT4_TIND_BLOCK; |
98 | offsets[n++] = i_block >> (ptrs_bits * 2); | 98 | offsets[n++] = i_block >> (ptrs_bits * 2); |
99 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); | 99 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); |
100 | offsets[n++] = i_block & (ptrs - 1); | 100 | offsets[n++] = i_block & (ptrs - 1); |
101 | final = ptrs; | 101 | final = ptrs; |
102 | } else { | 102 | } else { |
103 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", | 103 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", |
104 | i_block + direct_blocks + | 104 | i_block + direct_blocks + |
105 | indirect_blocks + double_blocks, inode->i_ino); | 105 | indirect_blocks + double_blocks, inode->i_ino); |
106 | } | 106 | } |
107 | if (boundary) | 107 | if (boundary) |
108 | *boundary = final - 1 - (i_block & (ptrs - 1)); | 108 | *boundary = final - 1 - (i_block & (ptrs - 1)); |
109 | return n; | 109 | return n; |
110 | } | 110 | } |
111 | 111 | ||
112 | /** | 112 | /** |
113 | * ext4_get_branch - read the chain of indirect blocks leading to data | 113 | * ext4_get_branch - read the chain of indirect blocks leading to data |
114 | * @inode: inode in question | 114 | * @inode: inode in question |
115 | * @depth: depth of the chain (1 - direct pointer, etc.) | 115 | * @depth: depth of the chain (1 - direct pointer, etc.) |
116 | * @offsets: offsets of pointers in inode/indirect blocks | 116 | * @offsets: offsets of pointers in inode/indirect blocks |
117 | * @chain: place to store the result | 117 | * @chain: place to store the result |
118 | * @err: here we store the error value | 118 | * @err: here we store the error value |
119 | * | 119 | * |
120 | * Function fills the array of triples <key, p, bh> and returns %NULL | 120 | * Function fills the array of triples <key, p, bh> and returns %NULL |
121 | * if everything went OK or the pointer to the last filled triple | 121 | * if everything went OK or the pointer to the last filled triple |
122 | * (incomplete one) otherwise. Upon the return chain[i].key contains | 122 | * (incomplete one) otherwise. Upon the return chain[i].key contains |
123 | * the number of (i+1)-th block in the chain (as it is stored in memory, | 123 | * the number of (i+1)-th block in the chain (as it is stored in memory, |
124 | * i.e. little-endian 32-bit), chain[i].p contains the address of that | 124 | * i.e. little-endian 32-bit), chain[i].p contains the address of that |
125 | * number (it points into struct inode for i==0 and into the bh->b_data | 125 | * number (it points into struct inode for i==0 and into the bh->b_data |
126 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect | 126 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect |
127 | * block for i>0 and NULL for i==0. In other words, it holds the block | 127 | * block for i>0 and NULL for i==0. In other words, it holds the block |
128 | * numbers of the chain, addresses they were taken from (and where we can | 128 | * numbers of the chain, addresses they were taken from (and where we can |
129 | * verify that chain did not change) and buffer_heads hosting these | 129 | * verify that chain did not change) and buffer_heads hosting these |
130 | * numbers. | 130 | * numbers. |
131 | * | 131 | * |
132 | * Function stops when it stumbles upon zero pointer (absent block) | 132 | * Function stops when it stumbles upon zero pointer (absent block) |
133 | * (pointer to last triple returned, *@err == 0) | 133 | * (pointer to last triple returned, *@err == 0) |
134 | * or when it gets an IO error reading an indirect block | 134 | * or when it gets an IO error reading an indirect block |
135 | * (ditto, *@err == -EIO) | 135 | * (ditto, *@err == -EIO) |
136 | * or when it reads all @depth-1 indirect blocks successfully and finds | 136 | * or when it reads all @depth-1 indirect blocks successfully and finds |
137 | * the whole chain, all way to the data (returns %NULL, *err == 0). | 137 | * the whole chain, all way to the data (returns %NULL, *err == 0). |
138 | * | 138 | * |
139 | * Need to be called with | 139 | * Need to be called with |
140 | * down_read(&EXT4_I(inode)->i_data_sem) | 140 | * down_read(&EXT4_I(inode)->i_data_sem) |
141 | */ | 141 | */ |
142 | static Indirect *ext4_get_branch(struct inode *inode, int depth, | 142 | static Indirect *ext4_get_branch(struct inode *inode, int depth, |
143 | ext4_lblk_t *offsets, | 143 | ext4_lblk_t *offsets, |
144 | Indirect chain[4], int *err) | 144 | Indirect chain[4], int *err) |
145 | { | 145 | { |
146 | struct super_block *sb = inode->i_sb; | 146 | struct super_block *sb = inode->i_sb; |
147 | Indirect *p = chain; | 147 | Indirect *p = chain; |
148 | struct buffer_head *bh; | 148 | struct buffer_head *bh; |
149 | int ret = -EIO; | 149 | int ret = -EIO; |
150 | 150 | ||
151 | *err = 0; | 151 | *err = 0; |
152 | /* i_data is not going away, no lock needed */ | 152 | /* i_data is not going away, no lock needed */ |
153 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); | 153 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); |
154 | if (!p->key) | 154 | if (!p->key) |
155 | goto no_block; | 155 | goto no_block; |
156 | while (--depth) { | 156 | while (--depth) { |
157 | bh = sb_getblk(sb, le32_to_cpu(p->key)); | 157 | bh = sb_getblk(sb, le32_to_cpu(p->key)); |
158 | if (unlikely(!bh)) { | 158 | if (unlikely(!bh)) { |
159 | ret = -ENOMEM; | 159 | ret = -ENOMEM; |
160 | goto failure; | 160 | goto failure; |
161 | } | 161 | } |
162 | 162 | ||
163 | if (!bh_uptodate_or_lock(bh)) { | 163 | if (!bh_uptodate_or_lock(bh)) { |
164 | if (bh_submit_read(bh) < 0) { | 164 | if (bh_submit_read(bh) < 0) { |
165 | put_bh(bh); | 165 | put_bh(bh); |
166 | goto failure; | 166 | goto failure; |
167 | } | 167 | } |
168 | /* validate block references */ | 168 | /* validate block references */ |
169 | if (ext4_check_indirect_blockref(inode, bh)) { | 169 | if (ext4_check_indirect_blockref(inode, bh)) { |
170 | put_bh(bh); | 170 | put_bh(bh); |
171 | goto failure; | 171 | goto failure; |
172 | } | 172 | } |
173 | } | 173 | } |
174 | 174 | ||
175 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); | 175 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); |
176 | /* Reader: end */ | 176 | /* Reader: end */ |
177 | if (!p->key) | 177 | if (!p->key) |
178 | goto no_block; | 178 | goto no_block; |
179 | } | 179 | } |
180 | return NULL; | 180 | return NULL; |
181 | 181 | ||
182 | failure: | 182 | failure: |
183 | *err = ret; | 183 | *err = ret; |
184 | no_block: | 184 | no_block: |
185 | return p; | 185 | return p; |
186 | } | 186 | } |
187 | 187 | ||
188 | /** | 188 | /** |
189 | * ext4_find_near - find a place for allocation with sufficient locality | 189 | * ext4_find_near - find a place for allocation with sufficient locality |
190 | * @inode: owner | 190 | * @inode: owner |
191 | * @ind: descriptor of indirect block. | 191 | * @ind: descriptor of indirect block. |
192 | * | 192 | * |
193 | * This function returns the preferred place for block allocation. | 193 | * This function returns the preferred place for block allocation. |
194 | * It is used when heuristic for sequential allocation fails. | 194 | * It is used when heuristic for sequential allocation fails. |
195 | * Rules are: | 195 | * Rules are: |
196 | * + if there is a block to the left of our position - allocate near it. | 196 | * + if there is a block to the left of our position - allocate near it. |
197 | * + if pointer will live in indirect block - allocate near that block. | 197 | * + if pointer will live in indirect block - allocate near that block. |
198 | * + if pointer will live in inode - allocate in the same | 198 | * + if pointer will live in inode - allocate in the same |
199 | * cylinder group. | 199 | * cylinder group. |
200 | * | 200 | * |
201 | * In the latter case we colour the starting block by the callers PID to | 201 | * In the latter case we colour the starting block by the callers PID to |
202 | * prevent it from clashing with concurrent allocations for a different inode | 202 | * prevent it from clashing with concurrent allocations for a different inode |
203 | * in the same block group. The PID is used here so that functionally related | 203 | * in the same block group. The PID is used here so that functionally related |
204 | * files will be close-by on-disk. | 204 | * files will be close-by on-disk. |
205 | * | 205 | * |
206 | * Caller must make sure that @ind is valid and will stay that way. | 206 | * Caller must make sure that @ind is valid and will stay that way. |
207 | */ | 207 | */ |
208 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | 208 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) |
209 | { | 209 | { |
210 | struct ext4_inode_info *ei = EXT4_I(inode); | 210 | struct ext4_inode_info *ei = EXT4_I(inode); |
211 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; | 211 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; |
212 | __le32 *p; | 212 | __le32 *p; |
213 | 213 | ||
214 | /* Try to find previous block */ | 214 | /* Try to find previous block */ |
215 | for (p = ind->p - 1; p >= start; p--) { | 215 | for (p = ind->p - 1; p >= start; p--) { |
216 | if (*p) | 216 | if (*p) |
217 | return le32_to_cpu(*p); | 217 | return le32_to_cpu(*p); |
218 | } | 218 | } |
219 | 219 | ||
220 | /* No such thing, so let's try location of indirect block */ | 220 | /* No such thing, so let's try location of indirect block */ |
221 | if (ind->bh) | 221 | if (ind->bh) |
222 | return ind->bh->b_blocknr; | 222 | return ind->bh->b_blocknr; |
223 | 223 | ||
224 | /* | 224 | /* |
225 | * It is going to be referred to from the inode itself? OK, just put it | 225 | * It is going to be referred to from the inode itself? OK, just put it |
226 | * into the same cylinder group then. | 226 | * into the same cylinder group then. |
227 | */ | 227 | */ |
228 | return ext4_inode_to_goal_block(inode); | 228 | return ext4_inode_to_goal_block(inode); |
229 | } | 229 | } |
230 | 230 | ||
231 | /** | 231 | /** |
232 | * ext4_find_goal - find a preferred place for allocation. | 232 | * ext4_find_goal - find a preferred place for allocation. |
233 | * @inode: owner | 233 | * @inode: owner |
234 | * @block: block we want | 234 | * @block: block we want |
235 | * @partial: pointer to the last triple within a chain | 235 | * @partial: pointer to the last triple within a chain |
236 | * | 236 | * |
237 | * Normally this function find the preferred place for block allocation, | 237 | * Normally this function find the preferred place for block allocation, |
238 | * returns it. | 238 | * returns it. |
239 | * Because this is only used for non-extent files, we limit the block nr | 239 | * Because this is only used for non-extent files, we limit the block nr |
240 | * to 32 bits. | 240 | * to 32 bits. |
241 | */ | 241 | */ |
242 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 242 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
243 | Indirect *partial) | 243 | Indirect *partial) |
244 | { | 244 | { |
245 | ext4_fsblk_t goal; | 245 | ext4_fsblk_t goal; |
246 | 246 | ||
247 | /* | 247 | /* |
248 | * XXX need to get goal block from mballoc's data structures | 248 | * XXX need to get goal block from mballoc's data structures |
249 | */ | 249 | */ |
250 | 250 | ||
251 | goal = ext4_find_near(inode, partial); | 251 | goal = ext4_find_near(inode, partial); |
252 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 252 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
253 | return goal; | 253 | return goal; |
254 | } | 254 | } |
255 | 255 | ||
256 | /** | 256 | /** |
257 | * ext4_blks_to_allocate - Look up the block map and count the number | 257 | * ext4_blks_to_allocate - Look up the block map and count the number |
258 | * of direct blocks need to be allocated for the given branch. | 258 | * of direct blocks need to be allocated for the given branch. |
259 | * | 259 | * |
260 | * @branch: chain of indirect blocks | 260 | * @branch: chain of indirect blocks |
261 | * @k: number of blocks need for indirect blocks | 261 | * @k: number of blocks need for indirect blocks |
262 | * @blks: number of data blocks to be mapped. | 262 | * @blks: number of data blocks to be mapped. |
263 | * @blocks_to_boundary: the offset in the indirect block | 263 | * @blocks_to_boundary: the offset in the indirect block |
264 | * | 264 | * |
265 | * return the total number of blocks to be allocate, including the | 265 | * return the total number of blocks to be allocate, including the |
266 | * direct and indirect blocks. | 266 | * direct and indirect blocks. |
267 | */ | 267 | */ |
268 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | 268 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, |
269 | int blocks_to_boundary) | 269 | int blocks_to_boundary) |
270 | { | 270 | { |
271 | unsigned int count = 0; | 271 | unsigned int count = 0; |
272 | 272 | ||
273 | /* | 273 | /* |
274 | * Simple case, [t,d]Indirect block(s) has not allocated yet | 274 | * Simple case, [t,d]Indirect block(s) has not allocated yet |
275 | * then it's clear blocks on that path have not allocated | 275 | * then it's clear blocks on that path have not allocated |
276 | */ | 276 | */ |
277 | if (k > 0) { | 277 | if (k > 0) { |
278 | /* right now we don't handle cross boundary allocation */ | 278 | /* right now we don't handle cross boundary allocation */ |
279 | if (blks < blocks_to_boundary + 1) | 279 | if (blks < blocks_to_boundary + 1) |
280 | count += blks; | 280 | count += blks; |
281 | else | 281 | else |
282 | count += blocks_to_boundary + 1; | 282 | count += blocks_to_boundary + 1; |
283 | return count; | 283 | return count; |
284 | } | 284 | } |
285 | 285 | ||
286 | count++; | 286 | count++; |
287 | while (count < blks && count <= blocks_to_boundary && | 287 | while (count < blks && count <= blocks_to_boundary && |
288 | le32_to_cpu(*(branch[0].p + count)) == 0) { | 288 | le32_to_cpu(*(branch[0].p + count)) == 0) { |
289 | count++; | 289 | count++; |
290 | } | 290 | } |
291 | return count; | 291 | return count; |
292 | } | 292 | } |
293 | 293 | ||
294 | /** | 294 | /** |
295 | * ext4_alloc_branch - allocate and set up a chain of blocks. | 295 | * ext4_alloc_branch - allocate and set up a chain of blocks. |
296 | * @handle: handle for this transaction | 296 | * @handle: handle for this transaction |
297 | * @inode: owner | 297 | * @inode: owner |
298 | * @indirect_blks: number of allocated indirect blocks | 298 | * @indirect_blks: number of allocated indirect blocks |
299 | * @blks: number of allocated direct blocks | 299 | * @blks: number of allocated direct blocks |
300 | * @goal: preferred place for allocation | 300 | * @goal: preferred place for allocation |
301 | * @offsets: offsets (in the blocks) to store the pointers to next. | 301 | * @offsets: offsets (in the blocks) to store the pointers to next. |
302 | * @branch: place to store the chain in. | 302 | * @branch: place to store the chain in. |
303 | * | 303 | * |
304 | * This function allocates blocks, zeroes out all but the last one, | 304 | * This function allocates blocks, zeroes out all but the last one, |
305 | * links them into chain and (if we are synchronous) writes them to disk. | 305 | * links them into chain and (if we are synchronous) writes them to disk. |
306 | * In other words, it prepares a branch that can be spliced onto the | 306 | * In other words, it prepares a branch that can be spliced onto the |
307 | * inode. It stores the information about that chain in the branch[], in | 307 | * inode. It stores the information about that chain in the branch[], in |
308 | * the same format as ext4_get_branch() would do. We are calling it after | 308 | * the same format as ext4_get_branch() would do. We are calling it after |
309 | * we had read the existing part of chain and partial points to the last | 309 | * we had read the existing part of chain and partial points to the last |
310 | * triple of that (one with zero ->key). Upon the exit we have the same | 310 | * triple of that (one with zero ->key). Upon the exit we have the same |
311 | * picture as after the successful ext4_get_block(), except that in one | 311 | * picture as after the successful ext4_get_block(), except that in one |
312 | * place chain is disconnected - *branch->p is still zero (we did not | 312 | * place chain is disconnected - *branch->p is still zero (we did not |
313 | * set the last link), but branch->key contains the number that should | 313 | * set the last link), but branch->key contains the number that should |
314 | * be placed into *branch->p to fill that gap. | 314 | * be placed into *branch->p to fill that gap. |
315 | * | 315 | * |
316 | * If allocation fails we free all blocks we've allocated (and forget | 316 | * If allocation fails we free all blocks we've allocated (and forget |
317 | * their buffer_heads) and return the error value the from failed | 317 | * their buffer_heads) and return the error value the from failed |
318 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain | 318 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain |
319 | * as described above and return 0. | 319 | * as described above and return 0. |
320 | */ | 320 | */ |
321 | static int ext4_alloc_branch(handle_t *handle, | 321 | static int ext4_alloc_branch(handle_t *handle, |
322 | struct ext4_allocation_request *ar, | 322 | struct ext4_allocation_request *ar, |
323 | int indirect_blks, ext4_lblk_t *offsets, | 323 | int indirect_blks, ext4_lblk_t *offsets, |
324 | Indirect *branch) | 324 | Indirect *branch) |
325 | { | 325 | { |
326 | struct buffer_head * bh; | 326 | struct buffer_head * bh; |
327 | ext4_fsblk_t b, new_blocks[4]; | 327 | ext4_fsblk_t b, new_blocks[4]; |
328 | __le32 *p; | 328 | __le32 *p; |
329 | int i, j, err, len = 1; | 329 | int i, j, err, len = 1; |
330 | 330 | ||
331 | for (i = 0; i <= indirect_blks; i++) { | 331 | for (i = 0; i <= indirect_blks; i++) { |
332 | if (i == indirect_blks) { | 332 | if (i == indirect_blks) { |
333 | new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); | 333 | new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); |
334 | } else | 334 | } else |
335 | ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, | 335 | ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, |
336 | ar->inode, ar->goal, | 336 | ar->inode, ar->goal, |
337 | ar->flags & EXT4_MB_DELALLOC_RESERVED, | 337 | ar->flags & EXT4_MB_DELALLOC_RESERVED, |
338 | NULL, &err); | 338 | NULL, &err); |
339 | if (err) { | 339 | if (err) { |
340 | i--; | 340 | i--; |
341 | goto failed; | 341 | goto failed; |
342 | } | 342 | } |
343 | branch[i].key = cpu_to_le32(new_blocks[i]); | 343 | branch[i].key = cpu_to_le32(new_blocks[i]); |
344 | if (i == 0) | 344 | if (i == 0) |
345 | continue; | 345 | continue; |
346 | 346 | ||
347 | bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]); | 347 | bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]); |
348 | if (unlikely(!bh)) { | 348 | if (unlikely(!bh)) { |
349 | err = -ENOMEM; | 349 | err = -ENOMEM; |
350 | goto failed; | 350 | goto failed; |
351 | } | 351 | } |
352 | lock_buffer(bh); | 352 | lock_buffer(bh); |
353 | BUFFER_TRACE(bh, "call get_create_access"); | 353 | BUFFER_TRACE(bh, "call get_create_access"); |
354 | err = ext4_journal_get_create_access(handle, bh); | 354 | err = ext4_journal_get_create_access(handle, bh); |
355 | if (err) { | 355 | if (err) { |
356 | unlock_buffer(bh); | 356 | unlock_buffer(bh); |
357 | goto failed; | 357 | goto failed; |
358 | } | 358 | } |
359 | 359 | ||
360 | memset(bh->b_data, 0, bh->b_size); | 360 | memset(bh->b_data, 0, bh->b_size); |
361 | p = branch[i].p = (__le32 *) bh->b_data + offsets[i]; | 361 | p = branch[i].p = (__le32 *) bh->b_data + offsets[i]; |
362 | b = new_blocks[i]; | 362 | b = new_blocks[i]; |
363 | 363 | ||
364 | if (i == indirect_blks) | 364 | if (i == indirect_blks) |
365 | len = ar->len; | 365 | len = ar->len; |
366 | for (j = 0; j < len; j++) | 366 | for (j = 0; j < len; j++) |
367 | *p++ = cpu_to_le32(b++); | 367 | *p++ = cpu_to_le32(b++); |
368 | 368 | ||
369 | BUFFER_TRACE(bh, "marking uptodate"); | 369 | BUFFER_TRACE(bh, "marking uptodate"); |
370 | set_buffer_uptodate(bh); | 370 | set_buffer_uptodate(bh); |
371 | unlock_buffer(bh); | 371 | unlock_buffer(bh); |
372 | 372 | ||
373 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 373 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
374 | err = ext4_handle_dirty_metadata(handle, ar->inode, bh); | 374 | err = ext4_handle_dirty_metadata(handle, ar->inode, bh); |
375 | if (err) | 375 | if (err) |
376 | goto failed; | 376 | goto failed; |
377 | } | 377 | } |
378 | return 0; | 378 | return 0; |
379 | failed: | 379 | failed: |
380 | for (; i >= 0; i--) { | 380 | for (; i >= 0; i--) { |
381 | /* | 381 | /* |
382 | * We want to ext4_forget() only freshly allocated indirect | 382 | * We want to ext4_forget() only freshly allocated indirect |
383 | * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and | 383 | * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and |
384 | * buffer at branch[0].bh is indirect block / inode already | 384 | * buffer at branch[0].bh is indirect block / inode already |
385 | * existing before ext4_alloc_branch() was called. | 385 | * existing before ext4_alloc_branch() was called. |
386 | */ | 386 | */ |
387 | if (i > 0 && i != indirect_blks && branch[i].bh) | 387 | if (i > 0 && i != indirect_blks && branch[i].bh) |
388 | ext4_forget(handle, 1, ar->inode, branch[i].bh, | 388 | ext4_forget(handle, 1, ar->inode, branch[i].bh, |
389 | branch[i].bh->b_blocknr); | 389 | branch[i].bh->b_blocknr); |
390 | ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i], | 390 | ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i], |
391 | (i == indirect_blks) ? ar->len : 1, 0); | 391 | (i == indirect_blks) ? ar->len : 1, 0); |
392 | } | 392 | } |
393 | return err; | 393 | return err; |
394 | } | 394 | } |
395 | 395 | ||
396 | /** | 396 | /** |
397 | * ext4_splice_branch - splice the allocated branch onto inode. | 397 | * ext4_splice_branch - splice the allocated branch onto inode. |
398 | * @handle: handle for this transaction | 398 | * @handle: handle for this transaction |
399 | * @inode: owner | 399 | * @inode: owner |
400 | * @block: (logical) number of block we are adding | 400 | * @block: (logical) number of block we are adding |
401 | * @chain: chain of indirect blocks (with a missing link - see | 401 | * @chain: chain of indirect blocks (with a missing link - see |
402 | * ext4_alloc_branch) | 402 | * ext4_alloc_branch) |
403 | * @where: location of missing link | 403 | * @where: location of missing link |
404 | * @num: number of indirect blocks we are adding | 404 | * @num: number of indirect blocks we are adding |
405 | * @blks: number of direct blocks we are adding | 405 | * @blks: number of direct blocks we are adding |
406 | * | 406 | * |
407 | * This function fills the missing link and does all housekeeping needed in | 407 | * This function fills the missing link and does all housekeeping needed in |
408 | * inode (->i_blocks, etc.). In case of success we end up with the full | 408 | * inode (->i_blocks, etc.). In case of success we end up with the full |
409 | * chain to new block and return 0. | 409 | * chain to new block and return 0. |
410 | */ | 410 | */ |
411 | static int ext4_splice_branch(handle_t *handle, | 411 | static int ext4_splice_branch(handle_t *handle, |
412 | struct ext4_allocation_request *ar, | 412 | struct ext4_allocation_request *ar, |
413 | Indirect *where, int num) | 413 | Indirect *where, int num) |
414 | { | 414 | { |
415 | int i; | 415 | int i; |
416 | int err = 0; | 416 | int err = 0; |
417 | ext4_fsblk_t current_block; | 417 | ext4_fsblk_t current_block; |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * If we're splicing into a [td]indirect block (as opposed to the | 420 | * If we're splicing into a [td]indirect block (as opposed to the |
421 | * inode) then we need to get write access to the [td]indirect block | 421 | * inode) then we need to get write access to the [td]indirect block |
422 | * before the splice. | 422 | * before the splice. |
423 | */ | 423 | */ |
424 | if (where->bh) { | 424 | if (where->bh) { |
425 | BUFFER_TRACE(where->bh, "get_write_access"); | 425 | BUFFER_TRACE(where->bh, "get_write_access"); |
426 | err = ext4_journal_get_write_access(handle, where->bh); | 426 | err = ext4_journal_get_write_access(handle, where->bh); |
427 | if (err) | 427 | if (err) |
428 | goto err_out; | 428 | goto err_out; |
429 | } | 429 | } |
430 | /* That's it */ | 430 | /* That's it */ |
431 | 431 | ||
432 | *where->p = where->key; | 432 | *where->p = where->key; |
433 | 433 | ||
434 | /* | 434 | /* |
435 | * Update the host buffer_head or inode to point to more just allocated | 435 | * Update the host buffer_head or inode to point to more just allocated |
436 | * direct blocks blocks | 436 | * direct blocks blocks |
437 | */ | 437 | */ |
438 | if (num == 0 && ar->len > 1) { | 438 | if (num == 0 && ar->len > 1) { |
439 | current_block = le32_to_cpu(where->key) + 1; | 439 | current_block = le32_to_cpu(where->key) + 1; |
440 | for (i = 1; i < ar->len; i++) | 440 | for (i = 1; i < ar->len; i++) |
441 | *(where->p + i) = cpu_to_le32(current_block++); | 441 | *(where->p + i) = cpu_to_le32(current_block++); |
442 | } | 442 | } |
443 | 443 | ||
444 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 444 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
445 | /* had we spliced it onto indirect block? */ | 445 | /* had we spliced it onto indirect block? */ |
446 | if (where->bh) { | 446 | if (where->bh) { |
447 | /* | 447 | /* |
448 | * If we spliced it onto an indirect block, we haven't | 448 | * If we spliced it onto an indirect block, we haven't |
449 | * altered the inode. Note however that if it is being spliced | 449 | * altered the inode. Note however that if it is being spliced |
450 | * onto an indirect block at the very end of the file (the | 450 | * onto an indirect block at the very end of the file (the |
451 | * file is growing) then we *will* alter the inode to reflect | 451 | * file is growing) then we *will* alter the inode to reflect |
452 | * the new i_size. But that is not done here - it is done in | 452 | * the new i_size. But that is not done here - it is done in |
453 | * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. | 453 | * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. |
454 | */ | 454 | */ |
455 | jbd_debug(5, "splicing indirect only\n"); | 455 | jbd_debug(5, "splicing indirect only\n"); |
456 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); | 456 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); |
457 | err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); | 457 | err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); |
458 | if (err) | 458 | if (err) |
459 | goto err_out; | 459 | goto err_out; |
460 | } else { | 460 | } else { |
461 | /* | 461 | /* |
462 | * OK, we spliced it into the inode itself on a direct block. | 462 | * OK, we spliced it into the inode itself on a direct block. |
463 | */ | 463 | */ |
464 | ext4_mark_inode_dirty(handle, ar->inode); | 464 | ext4_mark_inode_dirty(handle, ar->inode); |
465 | jbd_debug(5, "splicing direct\n"); | 465 | jbd_debug(5, "splicing direct\n"); |
466 | } | 466 | } |
467 | return err; | 467 | return err; |
468 | 468 | ||
469 | err_out: | 469 | err_out: |
470 | for (i = 1; i <= num; i++) { | 470 | for (i = 1; i <= num; i++) { |
471 | /* | 471 | /* |
472 | * branch[i].bh is newly allocated, so there is no | 472 | * branch[i].bh is newly allocated, so there is no |
473 | * need to revoke the block, which is why we don't | 473 | * need to revoke the block, which is why we don't |
474 | * need to set EXT4_FREE_BLOCKS_METADATA. | 474 | * need to set EXT4_FREE_BLOCKS_METADATA. |
475 | */ | 475 | */ |
476 | ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1, | 476 | ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1, |
477 | EXT4_FREE_BLOCKS_FORGET); | 477 | EXT4_FREE_BLOCKS_FORGET); |
478 | } | 478 | } |
479 | ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key), | 479 | ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key), |
480 | ar->len, 0); | 480 | ar->len, 0); |
481 | 481 | ||
482 | return err; | 482 | return err; |
483 | } | 483 | } |
484 | 484 | ||
485 | /* | 485 | /* |
486 | * The ext4_ind_map_blocks() function handles non-extents inodes | 486 | * The ext4_ind_map_blocks() function handles non-extents inodes |
487 | * (i.e., using the traditional indirect/double-indirect i_blocks | 487 | * (i.e., using the traditional indirect/double-indirect i_blocks |
488 | * scheme) for ext4_map_blocks(). | 488 | * scheme) for ext4_map_blocks(). |
489 | * | 489 | * |
490 | * Allocation strategy is simple: if we have to allocate something, we will | 490 | * Allocation strategy is simple: if we have to allocate something, we will |
491 | * have to go the whole way to leaf. So let's do it before attaching anything | 491 | * have to go the whole way to leaf. So let's do it before attaching anything |
492 | * to tree, set linkage between the newborn blocks, write them if sync is | 492 | * to tree, set linkage between the newborn blocks, write them if sync is |
493 | * required, recheck the path, free and repeat if check fails, otherwise | 493 | * required, recheck the path, free and repeat if check fails, otherwise |
494 | * set the last missing link (that will protect us from any truncate-generated | 494 | * set the last missing link (that will protect us from any truncate-generated |
495 | * removals - all blocks on the path are immune now) and possibly force the | 495 | * removals - all blocks on the path are immune now) and possibly force the |
496 | * write on the parent block. | 496 | * write on the parent block. |
497 | * That has a nice additional property: no special recovery from the failed | 497 | * That has a nice additional property: no special recovery from the failed |
498 | * allocations is needed - we simply release blocks and do not touch anything | 498 | * allocations is needed - we simply release blocks and do not touch anything |
499 | * reachable from inode. | 499 | * reachable from inode. |
500 | * | 500 | * |
501 | * `handle' can be NULL if create == 0. | 501 | * `handle' can be NULL if create == 0. |
502 | * | 502 | * |
503 | * return > 0, # of blocks mapped or allocated. | 503 | * return > 0, # of blocks mapped or allocated. |
504 | * return = 0, if plain lookup failed. | 504 | * return = 0, if plain lookup failed. |
505 | * return < 0, error case. | 505 | * return < 0, error case. |
506 | * | 506 | * |
507 | * The ext4_ind_get_blocks() function should be called with | 507 | * The ext4_ind_get_blocks() function should be called with |
508 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem | 508 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem |
509 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or | 509 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or |
510 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system | 510 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system |
511 | * blocks. | 511 | * blocks. |
512 | */ | 512 | */ |
513 | int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | 513 | int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
514 | struct ext4_map_blocks *map, | 514 | struct ext4_map_blocks *map, |
515 | int flags) | 515 | int flags) |
516 | { | 516 | { |
517 | struct ext4_allocation_request ar; | 517 | struct ext4_allocation_request ar; |
518 | int err = -EIO; | 518 | int err = -EIO; |
519 | ext4_lblk_t offsets[4]; | 519 | ext4_lblk_t offsets[4]; |
520 | Indirect chain[4]; | 520 | Indirect chain[4]; |
521 | Indirect *partial; | 521 | Indirect *partial; |
522 | int indirect_blks; | 522 | int indirect_blks; |
523 | int blocks_to_boundary = 0; | 523 | int blocks_to_boundary = 0; |
524 | int depth; | 524 | int depth; |
525 | int count = 0; | 525 | int count = 0; |
526 | ext4_fsblk_t first_block = 0; | 526 | ext4_fsblk_t first_block = 0; |
527 | 527 | ||
528 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | 528 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); |
529 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | 529 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
530 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 530 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
531 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | 531 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
532 | &blocks_to_boundary); | 532 | &blocks_to_boundary); |
533 | 533 | ||
534 | if (depth == 0) | 534 | if (depth == 0) |
535 | goto out; | 535 | goto out; |
536 | 536 | ||
537 | partial = ext4_get_branch(inode, depth, offsets, chain, &err); | 537 | partial = ext4_get_branch(inode, depth, offsets, chain, &err); |
538 | 538 | ||
539 | /* Simplest case - block found, no allocation needed */ | 539 | /* Simplest case - block found, no allocation needed */ |
540 | if (!partial) { | 540 | if (!partial) { |
541 | first_block = le32_to_cpu(chain[depth - 1].key); | 541 | first_block = le32_to_cpu(chain[depth - 1].key); |
542 | count++; | 542 | count++; |
543 | /*map more blocks*/ | 543 | /*map more blocks*/ |
544 | while (count < map->m_len && count <= blocks_to_boundary) { | 544 | while (count < map->m_len && count <= blocks_to_boundary) { |
545 | ext4_fsblk_t blk; | 545 | ext4_fsblk_t blk; |
546 | 546 | ||
547 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | 547 | blk = le32_to_cpu(*(chain[depth-1].p + count)); |
548 | 548 | ||
549 | if (blk == first_block + count) | 549 | if (blk == first_block + count) |
550 | count++; | 550 | count++; |
551 | else | 551 | else |
552 | break; | 552 | break; |
553 | } | 553 | } |
554 | goto got_it; | 554 | goto got_it; |
555 | } | 555 | } |
556 | 556 | ||
557 | /* Next simple case - plain lookup or failed read of indirect block */ | 557 | /* Next simple case - plain lookup or failed read of indirect block */ |
558 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) | 558 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) |
559 | goto cleanup; | 559 | goto cleanup; |
560 | 560 | ||
561 | /* | 561 | /* |
562 | * Okay, we need to do block allocation. | 562 | * Okay, we need to do block allocation. |
563 | */ | 563 | */ |
564 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 564 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, |
565 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | 565 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { |
566 | EXT4_ERROR_INODE(inode, "Can't allocate blocks for " | 566 | EXT4_ERROR_INODE(inode, "Can't allocate blocks for " |
567 | "non-extent mapped inodes with bigalloc"); | 567 | "non-extent mapped inodes with bigalloc"); |
568 | return -ENOSPC; | 568 | return -ENOSPC; |
569 | } | 569 | } |
570 | 570 | ||
571 | /* Set up for the direct block allocation */ | 571 | /* Set up for the direct block allocation */ |
572 | memset(&ar, 0, sizeof(ar)); | 572 | memset(&ar, 0, sizeof(ar)); |
573 | ar.inode = inode; | 573 | ar.inode = inode; |
574 | ar.logical = map->m_lblk; | 574 | ar.logical = map->m_lblk; |
575 | if (S_ISREG(inode->i_mode)) | 575 | if (S_ISREG(inode->i_mode)) |
576 | ar.flags = EXT4_MB_HINT_DATA; | 576 | ar.flags = EXT4_MB_HINT_DATA; |
577 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 577 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
578 | ar.flags |= EXT4_MB_DELALLOC_RESERVED; | 578 | ar.flags |= EXT4_MB_DELALLOC_RESERVED; |
579 | 579 | ||
580 | ar.goal = ext4_find_goal(inode, map->m_lblk, partial); | 580 | ar.goal = ext4_find_goal(inode, map->m_lblk, partial); |
581 | 581 | ||
582 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 582 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
583 | indirect_blks = (chain + depth) - partial - 1; | 583 | indirect_blks = (chain + depth) - partial - 1; |
584 | 584 | ||
585 | /* | 585 | /* |
586 | * Next look up the indirect map to count the totoal number of | 586 | * Next look up the indirect map to count the totoal number of |
587 | * direct blocks to allocate for this branch. | 587 | * direct blocks to allocate for this branch. |
588 | */ | 588 | */ |
589 | ar.len = ext4_blks_to_allocate(partial, indirect_blks, | 589 | ar.len = ext4_blks_to_allocate(partial, indirect_blks, |
590 | map->m_len, blocks_to_boundary); | 590 | map->m_len, blocks_to_boundary); |
591 | 591 | ||
592 | /* | 592 | /* |
593 | * Block out ext4_truncate while we alter the tree | 593 | * Block out ext4_truncate while we alter the tree |
594 | */ | 594 | */ |
595 | err = ext4_alloc_branch(handle, &ar, indirect_blks, | 595 | err = ext4_alloc_branch(handle, &ar, indirect_blks, |
596 | offsets + (partial - chain), partial); | 596 | offsets + (partial - chain), partial); |
597 | 597 | ||
598 | /* | 598 | /* |
599 | * The ext4_splice_branch call will free and forget any buffers | 599 | * The ext4_splice_branch call will free and forget any buffers |
600 | * on the new chain if there is a failure, but that risks using | 600 | * on the new chain if there is a failure, but that risks using |
601 | * up transaction credits, especially for bitmaps where the | 601 | * up transaction credits, especially for bitmaps where the |
602 | * credits cannot be returned. Can we handle this somehow? We | 602 | * credits cannot be returned. Can we handle this somehow? We |
603 | * may need to return -EAGAIN upwards in the worst case. --sct | 603 | * may need to return -EAGAIN upwards in the worst case. --sct |
604 | */ | 604 | */ |
605 | if (!err) | 605 | if (!err) |
606 | err = ext4_splice_branch(handle, &ar, partial, indirect_blks); | 606 | err = ext4_splice_branch(handle, &ar, partial, indirect_blks); |
607 | if (err) | 607 | if (err) |
608 | goto cleanup; | 608 | goto cleanup; |
609 | 609 | ||
610 | map->m_flags |= EXT4_MAP_NEW; | 610 | map->m_flags |= EXT4_MAP_NEW; |
611 | 611 | ||
612 | ext4_update_inode_fsync_trans(handle, inode, 1); | 612 | ext4_update_inode_fsync_trans(handle, inode, 1); |
613 | count = ar.len; | 613 | count = ar.len; |
614 | got_it: | 614 | got_it: |
615 | map->m_flags |= EXT4_MAP_MAPPED; | 615 | map->m_flags |= EXT4_MAP_MAPPED; |
616 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | 616 | map->m_pblk = le32_to_cpu(chain[depth-1].key); |
617 | map->m_len = count; | 617 | map->m_len = count; |
618 | if (count > blocks_to_boundary) | 618 | if (count > blocks_to_boundary) |
619 | map->m_flags |= EXT4_MAP_BOUNDARY; | 619 | map->m_flags |= EXT4_MAP_BOUNDARY; |
620 | err = count; | 620 | err = count; |
621 | /* Clean up and exit */ | 621 | /* Clean up and exit */ |
622 | partial = chain + depth - 1; /* the whole chain */ | 622 | partial = chain + depth - 1; /* the whole chain */ |
623 | cleanup: | 623 | cleanup: |
624 | while (partial > chain) { | 624 | while (partial > chain) { |
625 | BUFFER_TRACE(partial->bh, "call brelse"); | 625 | BUFFER_TRACE(partial->bh, "call brelse"); |
626 | brelse(partial->bh); | 626 | brelse(partial->bh); |
627 | partial--; | 627 | partial--; |
628 | } | 628 | } |
629 | out: | 629 | out: |
630 | trace_ext4_ind_map_blocks_exit(inode, flags, map, err); | 630 | trace_ext4_ind_map_blocks_exit(inode, flags, map, err); |
631 | return err; | 631 | return err; |
632 | } | 632 | } |
633 | 633 | ||
634 | /* | 634 | /* |
635 | * O_DIRECT for ext3 (or indirect map) based files | 635 | * O_DIRECT for ext3 (or indirect map) based files |
636 | * | 636 | * |
637 | * If the O_DIRECT write will extend the file then add this inode to the | 637 | * If the O_DIRECT write will extend the file then add this inode to the |
638 | * orphan list. So recovery will truncate it back to the original size | 638 | * orphan list. So recovery will truncate it back to the original size |
639 | * if the machine crashes during the write. | 639 | * if the machine crashes during the write. |
640 | * | 640 | * |
641 | * If the O_DIRECT write is intantiating holes inside i_size and the machine | 641 | * If the O_DIRECT write is intantiating holes inside i_size and the machine |
642 | * crashes then stale disk data _may_ be exposed inside the file. But current | 642 | * crashes then stale disk data _may_ be exposed inside the file. But current |
643 | * VFS code falls back into buffered path in that case so we are safe. | 643 | * VFS code falls back into buffered path in that case so we are safe. |
644 | */ | 644 | */ |
645 | ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | 645 | ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, |
646 | struct iov_iter *iter, loff_t offset) | 646 | struct iov_iter *iter, loff_t offset) |
647 | { | 647 | { |
648 | struct file *file = iocb->ki_filp; | 648 | struct file *file = iocb->ki_filp; |
649 | struct inode *inode = file->f_mapping->host; | 649 | struct inode *inode = file->f_mapping->host; |
650 | struct ext4_inode_info *ei = EXT4_I(inode); | 650 | struct ext4_inode_info *ei = EXT4_I(inode); |
651 | handle_t *handle; | 651 | handle_t *handle; |
652 | ssize_t ret; | 652 | ssize_t ret; |
653 | int orphan = 0; | 653 | int orphan = 0; |
654 | size_t count = iov_iter_count(iter); | 654 | size_t count = iov_iter_count(iter); |
655 | int retries = 0; | 655 | int retries = 0; |
656 | 656 | ||
657 | if (rw == WRITE) { | 657 | if (rw == WRITE) { |
658 | loff_t final_size = offset + count; | 658 | loff_t final_size = offset + count; |
659 | 659 | ||
660 | if (final_size > inode->i_size) { | 660 | if (final_size > inode->i_size) { |
661 | /* Credits for sb + inode write */ | 661 | /* Credits for sb + inode write */ |
662 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | 662 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); |
663 | if (IS_ERR(handle)) { | 663 | if (IS_ERR(handle)) { |
664 | ret = PTR_ERR(handle); | 664 | ret = PTR_ERR(handle); |
665 | goto out; | 665 | goto out; |
666 | } | 666 | } |
667 | ret = ext4_orphan_add(handle, inode); | 667 | ret = ext4_orphan_add(handle, inode); |
668 | if (ret) { | 668 | if (ret) { |
669 | ext4_journal_stop(handle); | 669 | ext4_journal_stop(handle); |
670 | goto out; | 670 | goto out; |
671 | } | 671 | } |
672 | orphan = 1; | 672 | orphan = 1; |
673 | ei->i_disksize = inode->i_size; | 673 | ei->i_disksize = inode->i_size; |
674 | ext4_journal_stop(handle); | 674 | ext4_journal_stop(handle); |
675 | } | 675 | } |
676 | } | 676 | } |
677 | 677 | ||
678 | retry: | 678 | retry: |
679 | if (rw == READ && ext4_should_dioread_nolock(inode)) { | 679 | if (rw == READ && ext4_should_dioread_nolock(inode)) { |
680 | /* | 680 | /* |
681 | * Nolock dioread optimization may be dynamically disabled | 681 | * Nolock dioread optimization may be dynamically disabled |
682 | * via ext4_inode_block_unlocked_dio(). Check inode's state | 682 | * via ext4_inode_block_unlocked_dio(). Check inode's state |
683 | * while holding extra i_dio_count ref. | 683 | * while holding extra i_dio_count ref. |
684 | */ | 684 | */ |
685 | atomic_inc(&inode->i_dio_count); | 685 | atomic_inc(&inode->i_dio_count); |
686 | smp_mb(); | 686 | smp_mb(); |
687 | if (unlikely(ext4_test_inode_state(inode, | 687 | if (unlikely(ext4_test_inode_state(inode, |
688 | EXT4_STATE_DIOREAD_LOCK))) { | 688 | EXT4_STATE_DIOREAD_LOCK))) { |
689 | inode_dio_done(inode); | 689 | inode_dio_done(inode); |
690 | goto locked; | 690 | goto locked; |
691 | } | 691 | } |
692 | if (IS_DAX(inode)) | 692 | if (IS_DAX(inode)) |
693 | ret = dax_do_io(rw, iocb, inode, iter, offset, | 693 | ret = dax_do_io(rw, iocb, inode, iter, offset, |
694 | ext4_get_block, NULL, 0); | 694 | ext4_get_block, NULL, 0); |
695 | else | 695 | else |
696 | ret = __blockdev_direct_IO(rw, iocb, inode, | 696 | ret = __blockdev_direct_IO(rw, iocb, inode, |
697 | inode->i_sb->s_bdev, iter, offset, | 697 | inode->i_sb->s_bdev, iter, offset, |
698 | ext4_get_block, NULL, NULL, 0); | 698 | ext4_get_block, NULL, NULL, 0); |
699 | inode_dio_done(inode); | 699 | inode_dio_done(inode); |
700 | } else { | 700 | } else { |
701 | locked: | 701 | locked: |
702 | if (IS_DAX(inode)) | 702 | if (IS_DAX(inode)) |
703 | ret = dax_do_io(rw, iocb, inode, iter, offset, | 703 | ret = dax_do_io(rw, iocb, inode, iter, offset, |
704 | ext4_get_block, NULL, DIO_LOCKING); | 704 | ext4_get_block, NULL, DIO_LOCKING); |
705 | else | 705 | else |
706 | ret = blockdev_direct_IO(rw, iocb, inode, iter, | 706 | ret = blockdev_direct_IO(rw, iocb, inode, iter, |
707 | offset, ext4_get_block); | 707 | offset, ext4_get_block); |
708 | 708 | ||
709 | if (unlikely((rw & WRITE) && ret < 0)) { | 709 | if (unlikely((rw & WRITE) && ret < 0)) { |
710 | loff_t isize = i_size_read(inode); | 710 | loff_t isize = i_size_read(inode); |
711 | loff_t end = offset + count; | 711 | loff_t end = offset + count; |
712 | 712 | ||
713 | if (end > isize) | 713 | if (end > isize) |
714 | ext4_truncate_failed_write(inode); | 714 | ext4_truncate_failed_write(inode); |
715 | } | 715 | } |
716 | } | 716 | } |
717 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 717 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
718 | goto retry; | 718 | goto retry; |
719 | 719 | ||
720 | if (orphan) { | 720 | if (orphan) { |
721 | int err; | 721 | int err; |
722 | 722 | ||
723 | /* Credits for sb + inode write */ | 723 | /* Credits for sb + inode write */ |
724 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | 724 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); |
725 | if (IS_ERR(handle)) { | 725 | if (IS_ERR(handle)) { |
726 | /* This is really bad luck. We've written the data | 726 | /* This is really bad luck. We've written the data |
727 | * but cannot extend i_size. Bail out and pretend | 727 | * but cannot extend i_size. Bail out and pretend |
728 | * the write failed... */ | 728 | * the write failed... */ |
729 | ret = PTR_ERR(handle); | 729 | ret = PTR_ERR(handle); |
730 | if (inode->i_nlink) | 730 | if (inode->i_nlink) |
731 | ext4_orphan_del(NULL, inode); | 731 | ext4_orphan_del(NULL, inode); |
732 | 732 | ||
733 | goto out; | 733 | goto out; |
734 | } | 734 | } |
735 | if (inode->i_nlink) | 735 | if (inode->i_nlink) |
736 | ext4_orphan_del(handle, inode); | 736 | ext4_orphan_del(handle, inode); |
737 | if (ret > 0) { | 737 | if (ret > 0) { |
738 | loff_t end = offset + ret; | 738 | loff_t end = offset + ret; |
739 | if (end > inode->i_size) { | 739 | if (end > inode->i_size) { |
740 | ei->i_disksize = end; | 740 | ei->i_disksize = end; |
741 | i_size_write(inode, end); | 741 | i_size_write(inode, end); |
742 | /* | 742 | /* |
743 | * We're going to return a positive `ret' | 743 | * We're going to return a positive `ret' |
744 | * here due to non-zero-length I/O, so there's | 744 | * here due to non-zero-length I/O, so there's |
745 | * no way of reporting error returns from | 745 | * no way of reporting error returns from |
746 | * ext4_mark_inode_dirty() to userspace. So | 746 | * ext4_mark_inode_dirty() to userspace. So |
747 | * ignore it. | 747 | * ignore it. |
748 | */ | 748 | */ |
749 | ext4_mark_inode_dirty(handle, inode); | 749 | ext4_mark_inode_dirty(handle, inode); |
750 | } | 750 | } |
751 | } | 751 | } |
752 | err = ext4_journal_stop(handle); | 752 | err = ext4_journal_stop(handle); |
753 | if (ret == 0) | 753 | if (ret == 0) |
754 | ret = err; | 754 | ret = err; |
755 | } | 755 | } |
756 | out: | 756 | out: |
757 | return ret; | 757 | return ret; |
758 | } | 758 | } |
759 | 759 | ||
760 | /* | 760 | /* |
761 | * Calculate the number of metadata blocks need to reserve | 761 | * Calculate the number of metadata blocks need to reserve |
762 | * to allocate a new block at @lblocks for non extent file based file | 762 | * to allocate a new block at @lblocks for non extent file based file |
763 | */ | 763 | */ |
764 | int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) | 764 | int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) |
765 | { | 765 | { |
766 | struct ext4_inode_info *ei = EXT4_I(inode); | 766 | struct ext4_inode_info *ei = EXT4_I(inode); |
767 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); | 767 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); |
768 | int blk_bits; | 768 | int blk_bits; |
769 | 769 | ||
770 | if (lblock < EXT4_NDIR_BLOCKS) | 770 | if (lblock < EXT4_NDIR_BLOCKS) |
771 | return 0; | 771 | return 0; |
772 | 772 | ||
773 | lblock -= EXT4_NDIR_BLOCKS; | 773 | lblock -= EXT4_NDIR_BLOCKS; |
774 | 774 | ||
775 | if (ei->i_da_metadata_calc_len && | 775 | if (ei->i_da_metadata_calc_len && |
776 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | 776 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { |
777 | ei->i_da_metadata_calc_len++; | 777 | ei->i_da_metadata_calc_len++; |
778 | return 0; | 778 | return 0; |
779 | } | 779 | } |
780 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | 780 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; |
781 | ei->i_da_metadata_calc_len = 1; | 781 | ei->i_da_metadata_calc_len = 1; |
782 | blk_bits = order_base_2(lblock); | 782 | blk_bits = order_base_2(lblock); |
783 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | 783 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; |
784 | } | 784 | } |
785 | 785 | ||
786 | /* | 786 | /* |
787 | * Calculate number of indirect blocks touched by mapping @nrblocks logically | 787 | * Calculate number of indirect blocks touched by mapping @nrblocks logically |
788 | * contiguous blocks | 788 | * contiguous blocks |
789 | */ | 789 | */ |
790 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks) | 790 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks) |
791 | { | 791 | { |
792 | /* | 792 | /* |
793 | * With N contiguous data blocks, we need at most | 793 | * With N contiguous data blocks, we need at most |
794 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | 794 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
795 | * 2 dindirect blocks, and 1 tindirect block | 795 | * 2 dindirect blocks, and 1 tindirect block |
796 | */ | 796 | */ |
797 | return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | 797 | return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
798 | } | 798 | } |
799 | 799 | ||
800 | /* | 800 | /* |
801 | * Truncate transactions can be complex and absolutely huge. So we need to | 801 | * Truncate transactions can be complex and absolutely huge. So we need to |
802 | * be able to restart the transaction at a conventient checkpoint to make | 802 | * be able to restart the transaction at a conventient checkpoint to make |
803 | * sure we don't overflow the journal. | 803 | * sure we don't overflow the journal. |
804 | * | 804 | * |
805 | * Try to extend this transaction for the purposes of truncation. If | 805 | * Try to extend this transaction for the purposes of truncation. If |
806 | * extend fails, we need to propagate the failure up and restart the | 806 | * extend fails, we need to propagate the failure up and restart the |
807 | * transaction in the top-level truncate loop. --sct | 807 | * transaction in the top-level truncate loop. --sct |
808 | * | 808 | * |
809 | * Returns 0 if we managed to create more room. If we can't create more | 809 | * Returns 0 if we managed to create more room. If we can't create more |
810 | * room, and the transaction must be restarted we return 1. | 810 | * room, and the transaction must be restarted we return 1. |
811 | */ | 811 | */ |
812 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | 812 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) |
813 | { | 813 | { |
814 | if (!ext4_handle_valid(handle)) | 814 | if (!ext4_handle_valid(handle)) |
815 | return 0; | 815 | return 0; |
816 | if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) | 816 | if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) |
817 | return 0; | 817 | return 0; |
818 | if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) | 818 | if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) |
819 | return 0; | 819 | return 0; |
820 | return 1; | 820 | return 1; |
821 | } | 821 | } |
822 | 822 | ||
823 | /* | 823 | /* |
824 | * Probably it should be a library function... search for first non-zero word | 824 | * Probably it should be a library function... search for first non-zero word |
825 | * or memcmp with zero_page, whatever is better for particular architecture. | 825 | * or memcmp with zero_page, whatever is better for particular architecture. |
826 | * Linus? | 826 | * Linus? |
827 | */ | 827 | */ |
828 | static inline int all_zeroes(__le32 *p, __le32 *q) | 828 | static inline int all_zeroes(__le32 *p, __le32 *q) |
829 | { | 829 | { |
830 | while (p < q) | 830 | while (p < q) |
831 | if (*p++) | 831 | if (*p++) |
832 | return 0; | 832 | return 0; |
833 | return 1; | 833 | return 1; |
834 | } | 834 | } |
835 | 835 | ||
836 | /** | 836 | /** |
837 | * ext4_find_shared - find the indirect blocks for partial truncation. | 837 | * ext4_find_shared - find the indirect blocks for partial truncation. |
838 | * @inode: inode in question | 838 | * @inode: inode in question |
839 | * @depth: depth of the affected branch | 839 | * @depth: depth of the affected branch |
840 | * @offsets: offsets of pointers in that branch (see ext4_block_to_path) | 840 | * @offsets: offsets of pointers in that branch (see ext4_block_to_path) |
841 | * @chain: place to store the pointers to partial indirect blocks | 841 | * @chain: place to store the pointers to partial indirect blocks |
842 | * @top: place to the (detached) top of branch | 842 | * @top: place to the (detached) top of branch |
843 | * | 843 | * |
844 | * This is a helper function used by ext4_truncate(). | 844 | * This is a helper function used by ext4_truncate(). |
845 | * | 845 | * |
846 | * When we do truncate() we may have to clean the ends of several | 846 | * When we do truncate() we may have to clean the ends of several |
847 | * indirect blocks but leave the blocks themselves alive. Block is | 847 | * indirect blocks but leave the blocks themselves alive. Block is |
848 | * partially truncated if some data below the new i_size is referred | 848 | * partially truncated if some data below the new i_size is referred |
849 | * from it (and it is on the path to the first completely truncated | 849 | * from it (and it is on the path to the first completely truncated |
850 | * data block, indeed). We have to free the top of that path along | 850 | * data block, indeed). We have to free the top of that path along |
851 | * with everything to the right of the path. Since no allocation | 851 | * with everything to the right of the path. Since no allocation |
852 | * past the truncation point is possible until ext4_truncate() | 852 | * past the truncation point is possible until ext4_truncate() |
853 | * finishes, we may safely do the latter, but top of branch may | 853 | * finishes, we may safely do the latter, but top of branch may |
854 | * require special attention - pageout below the truncation point | 854 | * require special attention - pageout below the truncation point |
855 | * might try to populate it. | 855 | * might try to populate it. |
856 | * | 856 | * |
857 | * We atomically detach the top of branch from the tree, store the | 857 | * We atomically detach the top of branch from the tree, store the |
858 | * block number of its root in *@top, pointers to buffer_heads of | 858 | * block number of its root in *@top, pointers to buffer_heads of |
859 | * partially truncated blocks - in @chain[].bh and pointers to | 859 | * partially truncated blocks - in @chain[].bh and pointers to |
860 | * their last elements that should not be removed - in | 860 | * their last elements that should not be removed - in |
861 | * @chain[].p. Return value is the pointer to last filled element | 861 | * @chain[].p. Return value is the pointer to last filled element |
862 | * of @chain. | 862 | * of @chain. |
863 | * | 863 | * |
864 | * The work left to caller to do the actual freeing of subtrees: | 864 | * The work left to caller to do the actual freeing of subtrees: |
865 | * a) free the subtree starting from *@top | 865 | * a) free the subtree starting from *@top |
866 | * b) free the subtrees whose roots are stored in | 866 | * b) free the subtrees whose roots are stored in |
867 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) | 867 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) |
868 | * c) free the subtrees growing from the inode past the @chain[0]. | 868 | * c) free the subtrees growing from the inode past the @chain[0]. |
869 | * (no partially truncated stuff there). */ | 869 | * (no partially truncated stuff there). */ |
870 | 870 | ||
871 | static Indirect *ext4_find_shared(struct inode *inode, int depth, | 871 | static Indirect *ext4_find_shared(struct inode *inode, int depth, |
872 | ext4_lblk_t offsets[4], Indirect chain[4], | 872 | ext4_lblk_t offsets[4], Indirect chain[4], |
873 | __le32 *top) | 873 | __le32 *top) |
874 | { | 874 | { |
875 | Indirect *partial, *p; | 875 | Indirect *partial, *p; |
876 | int k, err; | 876 | int k, err; |
877 | 877 | ||
878 | *top = 0; | 878 | *top = 0; |
879 | /* Make k index the deepest non-null offset + 1 */ | 879 | /* Make k index the deepest non-null offset + 1 */ |
880 | for (k = depth; k > 1 && !offsets[k-1]; k--) | 880 | for (k = depth; k > 1 && !offsets[k-1]; k--) |
881 | ; | 881 | ; |
882 | partial = ext4_get_branch(inode, k, offsets, chain, &err); | 882 | partial = ext4_get_branch(inode, k, offsets, chain, &err); |
883 | /* Writer: pointers */ | 883 | /* Writer: pointers */ |
884 | if (!partial) | 884 | if (!partial) |
885 | partial = chain + k-1; | 885 | partial = chain + k-1; |
886 | /* | 886 | /* |
887 | * If the branch acquired continuation since we've looked at it - | 887 | * If the branch acquired continuation since we've looked at it - |
888 | * fine, it should all survive and (new) top doesn't belong to us. | 888 | * fine, it should all survive and (new) top doesn't belong to us. |
889 | */ | 889 | */ |
890 | if (!partial->key && *partial->p) | 890 | if (!partial->key && *partial->p) |
891 | /* Writer: end */ | 891 | /* Writer: end */ |
892 | goto no_top; | 892 | goto no_top; |
893 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) | 893 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) |
894 | ; | 894 | ; |
895 | /* | 895 | /* |
896 | * OK, we've found the last block that must survive. The rest of our | 896 | * OK, we've found the last block that must survive. The rest of our |
897 | * branch should be detached before unlocking. However, if that rest | 897 | * branch should be detached before unlocking. However, if that rest |
898 | * of branch is all ours and does not grow immediately from the inode | 898 | * of branch is all ours and does not grow immediately from the inode |
899 | * it's easier to cheat and just decrement partial->p. | 899 | * it's easier to cheat and just decrement partial->p. |
900 | */ | 900 | */ |
901 | if (p == chain + k - 1 && p > chain) { | 901 | if (p == chain + k - 1 && p > chain) { |
902 | p->p--; | 902 | p->p--; |
903 | } else { | 903 | } else { |
904 | *top = *p->p; | 904 | *top = *p->p; |
905 | /* Nope, don't do this in ext4. Must leave the tree intact */ | 905 | /* Nope, don't do this in ext4. Must leave the tree intact */ |
906 | #if 0 | 906 | #if 0 |
907 | *p->p = 0; | 907 | *p->p = 0; |
908 | #endif | 908 | #endif |
909 | } | 909 | } |
910 | /* Writer: end */ | 910 | /* Writer: end */ |
911 | 911 | ||
912 | while (partial > p) { | 912 | while (partial > p) { |
913 | brelse(partial->bh); | 913 | brelse(partial->bh); |
914 | partial--; | 914 | partial--; |
915 | } | 915 | } |
916 | no_top: | 916 | no_top: |
917 | return partial; | 917 | return partial; |
918 | } | 918 | } |
919 | 919 | ||
920 | /* | 920 | /* |
921 | * Zero a number of block pointers in either an inode or an indirect block. | 921 | * Zero a number of block pointers in either an inode or an indirect block. |
922 | * If we restart the transaction we must again get write access to the | 922 | * If we restart the transaction we must again get write access to the |
923 | * indirect block for further modification. | 923 | * indirect block for further modification. |
924 | * | 924 | * |
925 | * We release `count' blocks on disk, but (last - first) may be greater | 925 | * We release `count' blocks on disk, but (last - first) may be greater |
926 | * than `count' because there can be holes in there. | 926 | * than `count' because there can be holes in there. |
927 | * | 927 | * |
928 | * Return 0 on success, 1 on invalid block range | 928 | * Return 0 on success, 1 on invalid block range |
929 | * and < 0 on fatal error. | 929 | * and < 0 on fatal error. |
930 | */ | 930 | */ |
931 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | 931 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
932 | struct buffer_head *bh, | 932 | struct buffer_head *bh, |
933 | ext4_fsblk_t block_to_free, | 933 | ext4_fsblk_t block_to_free, |
934 | unsigned long count, __le32 *first, | 934 | unsigned long count, __le32 *first, |
935 | __le32 *last) | 935 | __le32 *last) |
936 | { | 936 | { |
937 | __le32 *p; | 937 | __le32 *p; |
938 | int flags = EXT4_FREE_BLOCKS_VALIDATED; | 938 | int flags = EXT4_FREE_BLOCKS_VALIDATED; |
939 | int err; | 939 | int err; |
940 | 940 | ||
941 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 941 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
942 | flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; | 942 | flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; |
943 | else if (ext4_should_journal_data(inode)) | 943 | else if (ext4_should_journal_data(inode)) |
944 | flags |= EXT4_FREE_BLOCKS_FORGET; | 944 | flags |= EXT4_FREE_BLOCKS_FORGET; |
945 | 945 | ||
946 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | 946 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, |
947 | count)) { | 947 | count)) { |
948 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " | 948 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " |
949 | "blocks %llu len %lu", | 949 | "blocks %llu len %lu", |
950 | (unsigned long long) block_to_free, count); | 950 | (unsigned long long) block_to_free, count); |
951 | return 1; | 951 | return 1; |
952 | } | 952 | } |
953 | 953 | ||
954 | if (try_to_extend_transaction(handle, inode)) { | 954 | if (try_to_extend_transaction(handle, inode)) { |
955 | if (bh) { | 955 | if (bh) { |
956 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 956 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
957 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 957 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
958 | if (unlikely(err)) | 958 | if (unlikely(err)) |
959 | goto out_err; | 959 | goto out_err; |
960 | } | 960 | } |
961 | err = ext4_mark_inode_dirty(handle, inode); | 961 | err = ext4_mark_inode_dirty(handle, inode); |
962 | if (unlikely(err)) | 962 | if (unlikely(err)) |
963 | goto out_err; | 963 | goto out_err; |
964 | err = ext4_truncate_restart_trans(handle, inode, | 964 | err = ext4_truncate_restart_trans(handle, inode, |
965 | ext4_blocks_for_truncate(inode)); | 965 | ext4_blocks_for_truncate(inode)); |
966 | if (unlikely(err)) | 966 | if (unlikely(err)) |
967 | goto out_err; | 967 | goto out_err; |
968 | if (bh) { | 968 | if (bh) { |
969 | BUFFER_TRACE(bh, "retaking write access"); | 969 | BUFFER_TRACE(bh, "retaking write access"); |
970 | err = ext4_journal_get_write_access(handle, bh); | 970 | err = ext4_journal_get_write_access(handle, bh); |
971 | if (unlikely(err)) | 971 | if (unlikely(err)) |
972 | goto out_err; | 972 | goto out_err; |
973 | } | 973 | } |
974 | } | 974 | } |
975 | 975 | ||
976 | for (p = first; p < last; p++) | 976 | for (p = first; p < last; p++) |
977 | *p = 0; | 977 | *p = 0; |
978 | 978 | ||
979 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); | 979 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); |
980 | return 0; | 980 | return 0; |
981 | out_err: | 981 | out_err: |
982 | ext4_std_error(inode->i_sb, err); | 982 | ext4_std_error(inode->i_sb, err); |
983 | return err; | 983 | return err; |
984 | } | 984 | } |
985 | 985 | ||
986 | /** | 986 | /** |
987 | * ext4_free_data - free a list of data blocks | 987 | * ext4_free_data - free a list of data blocks |
988 | * @handle: handle for this transaction | 988 | * @handle: handle for this transaction |
989 | * @inode: inode we are dealing with | 989 | * @inode: inode we are dealing with |
990 | * @this_bh: indirect buffer_head which contains *@first and *@last | 990 | * @this_bh: indirect buffer_head which contains *@first and *@last |
991 | * @first: array of block numbers | 991 | * @first: array of block numbers |
992 | * @last: points immediately past the end of array | 992 | * @last: points immediately past the end of array |
993 | * | 993 | * |
994 | * We are freeing all blocks referred from that array (numbers are stored as | 994 | * We are freeing all blocks referred from that array (numbers are stored as |
995 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | 995 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. |
996 | * | 996 | * |
997 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | 997 | * We accumulate contiguous runs of blocks to free. Conveniently, if these |
998 | * blocks are contiguous then releasing them at one time will only affect one | 998 | * blocks are contiguous then releasing them at one time will only affect one |
999 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't | 999 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't |
1000 | * actually use a lot of journal space. | 1000 | * actually use a lot of journal space. |
1001 | * | 1001 | * |
1002 | * @this_bh will be %NULL if @first and @last point into the inode's direct | 1002 | * @this_bh will be %NULL if @first and @last point into the inode's direct |
1003 | * block pointers. | 1003 | * block pointers. |
1004 | */ | 1004 | */ |
1005 | static void ext4_free_data(handle_t *handle, struct inode *inode, | 1005 | static void ext4_free_data(handle_t *handle, struct inode *inode, |
1006 | struct buffer_head *this_bh, | 1006 | struct buffer_head *this_bh, |
1007 | __le32 *first, __le32 *last) | 1007 | __le32 *first, __le32 *last) |
1008 | { | 1008 | { |
1009 | ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ | 1009 | ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ |
1010 | unsigned long count = 0; /* Number of blocks in the run */ | 1010 | unsigned long count = 0; /* Number of blocks in the run */ |
1011 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind | 1011 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind |
1012 | corresponding to | 1012 | corresponding to |
1013 | block_to_free */ | 1013 | block_to_free */ |
1014 | ext4_fsblk_t nr; /* Current block # */ | 1014 | ext4_fsblk_t nr; /* Current block # */ |
1015 | __le32 *p; /* Pointer into inode/ind | 1015 | __le32 *p; /* Pointer into inode/ind |
1016 | for current block */ | 1016 | for current block */ |
1017 | int err = 0; | 1017 | int err = 0; |
1018 | 1018 | ||
1019 | if (this_bh) { /* For indirect block */ | 1019 | if (this_bh) { /* For indirect block */ |
1020 | BUFFER_TRACE(this_bh, "get_write_access"); | 1020 | BUFFER_TRACE(this_bh, "get_write_access"); |
1021 | err = ext4_journal_get_write_access(handle, this_bh); | 1021 | err = ext4_journal_get_write_access(handle, this_bh); |
1022 | /* Important: if we can't update the indirect pointers | 1022 | /* Important: if we can't update the indirect pointers |
1023 | * to the blocks, we can't free them. */ | 1023 | * to the blocks, we can't free them. */ |
1024 | if (err) | 1024 | if (err) |
1025 | return; | 1025 | return; |
1026 | } | 1026 | } |
1027 | 1027 | ||
1028 | for (p = first; p < last; p++) { | 1028 | for (p = first; p < last; p++) { |
1029 | nr = le32_to_cpu(*p); | 1029 | nr = le32_to_cpu(*p); |
1030 | if (nr) { | 1030 | if (nr) { |
1031 | /* accumulate blocks to free if they're contiguous */ | 1031 | /* accumulate blocks to free if they're contiguous */ |
1032 | if (count == 0) { | 1032 | if (count == 0) { |
1033 | block_to_free = nr; | 1033 | block_to_free = nr; |
1034 | block_to_free_p = p; | 1034 | block_to_free_p = p; |
1035 | count = 1; | 1035 | count = 1; |
1036 | } else if (nr == block_to_free + count) { | 1036 | } else if (nr == block_to_free + count) { |
1037 | count++; | 1037 | count++; |
1038 | } else { | 1038 | } else { |
1039 | err = ext4_clear_blocks(handle, inode, this_bh, | 1039 | err = ext4_clear_blocks(handle, inode, this_bh, |
1040 | block_to_free, count, | 1040 | block_to_free, count, |
1041 | block_to_free_p, p); | 1041 | block_to_free_p, p); |
1042 | if (err) | 1042 | if (err) |
1043 | break; | 1043 | break; |
1044 | block_to_free = nr; | 1044 | block_to_free = nr; |
1045 | block_to_free_p = p; | 1045 | block_to_free_p = p; |
1046 | count = 1; | 1046 | count = 1; |
1047 | } | 1047 | } |
1048 | } | 1048 | } |
1049 | } | 1049 | } |
1050 | 1050 | ||
1051 | if (!err && count > 0) | 1051 | if (!err && count > 0) |
1052 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, | 1052 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, |
1053 | count, block_to_free_p, p); | 1053 | count, block_to_free_p, p); |
1054 | if (err < 0) | 1054 | if (err < 0) |
1055 | /* fatal error */ | 1055 | /* fatal error */ |
1056 | return; | 1056 | return; |
1057 | 1057 | ||
1058 | if (this_bh) { | 1058 | if (this_bh) { |
1059 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | 1059 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); |
1060 | 1060 | ||
1061 | /* | 1061 | /* |
1062 | * The buffer head should have an attached journal head at this | 1062 | * The buffer head should have an attached journal head at this |
1063 | * point. However, if the data is corrupted and an indirect | 1063 | * point. However, if the data is corrupted and an indirect |
1064 | * block pointed to itself, it would have been detached when | 1064 | * block pointed to itself, it would have been detached when |
1065 | * the block was cleared. Check for this instead of OOPSing. | 1065 | * the block was cleared. Check for this instead of OOPSing. |
1066 | */ | 1066 | */ |
1067 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 1067 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) |
1068 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 1068 | ext4_handle_dirty_metadata(handle, inode, this_bh); |
1069 | else | 1069 | else |
1070 | EXT4_ERROR_INODE(inode, | 1070 | EXT4_ERROR_INODE(inode, |
1071 | "circular indirect block detected at " | 1071 | "circular indirect block detected at " |
1072 | "block %llu", | 1072 | "block %llu", |
1073 | (unsigned long long) this_bh->b_blocknr); | 1073 | (unsigned long long) this_bh->b_blocknr); |
1074 | } | 1074 | } |
1075 | } | 1075 | } |
1076 | 1076 | ||
1077 | /** | 1077 | /** |
1078 | * ext4_free_branches - free an array of branches | 1078 | * ext4_free_branches - free an array of branches |
1079 | * @handle: JBD handle for this transaction | 1079 | * @handle: JBD handle for this transaction |
1080 | * @inode: inode we are dealing with | 1080 | * @inode: inode we are dealing with |
1081 | * @parent_bh: the buffer_head which contains *@first and *@last | 1081 | * @parent_bh: the buffer_head which contains *@first and *@last |
1082 | * @first: array of block numbers | 1082 | * @first: array of block numbers |
1083 | * @last: pointer immediately past the end of array | 1083 | * @last: pointer immediately past the end of array |
1084 | * @depth: depth of the branches to free | 1084 | * @depth: depth of the branches to free |
1085 | * | 1085 | * |
1086 | * We are freeing all blocks referred from these branches (numbers are | 1086 | * We are freeing all blocks referred from these branches (numbers are |
1087 | * stored as little-endian 32-bit) and updating @inode->i_blocks | 1087 | * stored as little-endian 32-bit) and updating @inode->i_blocks |
1088 | * appropriately. | 1088 | * appropriately. |
1089 | */ | 1089 | */ |
1090 | static void ext4_free_branches(handle_t *handle, struct inode *inode, | 1090 | static void ext4_free_branches(handle_t *handle, struct inode *inode, |
1091 | struct buffer_head *parent_bh, | 1091 | struct buffer_head *parent_bh, |
1092 | __le32 *first, __le32 *last, int depth) | 1092 | __le32 *first, __le32 *last, int depth) |
1093 | { | 1093 | { |
1094 | ext4_fsblk_t nr; | 1094 | ext4_fsblk_t nr; |
1095 | __le32 *p; | 1095 | __le32 *p; |
1096 | 1096 | ||
1097 | if (ext4_handle_is_aborted(handle)) | 1097 | if (ext4_handle_is_aborted(handle)) |
1098 | return; | 1098 | return; |
1099 | 1099 | ||
1100 | if (depth--) { | 1100 | if (depth--) { |
1101 | struct buffer_head *bh; | 1101 | struct buffer_head *bh; |
1102 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 1102 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
1103 | p = last; | 1103 | p = last; |
1104 | while (--p >= first) { | 1104 | while (--p >= first) { |
1105 | nr = le32_to_cpu(*p); | 1105 | nr = le32_to_cpu(*p); |
1106 | if (!nr) | 1106 | if (!nr) |
1107 | continue; /* A hole */ | 1107 | continue; /* A hole */ |
1108 | 1108 | ||
1109 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 1109 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
1110 | nr, 1)) { | 1110 | nr, 1)) { |
1111 | EXT4_ERROR_INODE(inode, | 1111 | EXT4_ERROR_INODE(inode, |
1112 | "invalid indirect mapped " | 1112 | "invalid indirect mapped " |
1113 | "block %lu (level %d)", | 1113 | "block %lu (level %d)", |
1114 | (unsigned long) nr, depth); | 1114 | (unsigned long) nr, depth); |
1115 | break; | 1115 | break; |
1116 | } | 1116 | } |
1117 | 1117 | ||
1118 | /* Go read the buffer for the next level down */ | 1118 | /* Go read the buffer for the next level down */ |
1119 | bh = sb_bread(inode->i_sb, nr); | 1119 | bh = sb_bread(inode->i_sb, nr); |
1120 | 1120 | ||
1121 | /* | 1121 | /* |
1122 | * A read failure? Report error and clear slot | 1122 | * A read failure? Report error and clear slot |
1123 | * (should be rare). | 1123 | * (should be rare). |
1124 | */ | 1124 | */ |
1125 | if (!bh) { | 1125 | if (!bh) { |
1126 | EXT4_ERROR_INODE_BLOCK(inode, nr, | 1126 | EXT4_ERROR_INODE_BLOCK(inode, nr, |
1127 | "Read failure"); | 1127 | "Read failure"); |
1128 | continue; | 1128 | continue; |
1129 | } | 1129 | } |
1130 | 1130 | ||
1131 | /* This zaps the entire block. Bottom up. */ | 1131 | /* This zaps the entire block. Bottom up. */ |
1132 | BUFFER_TRACE(bh, "free child branches"); | 1132 | BUFFER_TRACE(bh, "free child branches"); |
1133 | ext4_free_branches(handle, inode, bh, | 1133 | ext4_free_branches(handle, inode, bh, |
1134 | (__le32 *) bh->b_data, | 1134 | (__le32 *) bh->b_data, |
1135 | (__le32 *) bh->b_data + addr_per_block, | 1135 | (__le32 *) bh->b_data + addr_per_block, |
1136 | depth); | 1136 | depth); |
1137 | brelse(bh); | 1137 | brelse(bh); |
1138 | 1138 | ||
1139 | /* | 1139 | /* |
1140 | * Everything below this this pointer has been | 1140 | * Everything below this this pointer has been |
1141 | * released. Now let this top-of-subtree go. | 1141 | * released. Now let this top-of-subtree go. |
1142 | * | 1142 | * |
1143 | * We want the freeing of this indirect block to be | 1143 | * We want the freeing of this indirect block to be |
1144 | * atomic in the journal with the updating of the | 1144 | * atomic in the journal with the updating of the |
1145 | * bitmap block which owns it. So make some room in | 1145 | * bitmap block which owns it. So make some room in |
1146 | * the journal. | 1146 | * the journal. |
1147 | * | 1147 | * |
1148 | * We zero the parent pointer *after* freeing its | 1148 | * We zero the parent pointer *after* freeing its |
1149 | * pointee in the bitmaps, so if extend_transaction() | 1149 | * pointee in the bitmaps, so if extend_transaction() |
1150 | * for some reason fails to put the bitmap changes and | 1150 | * for some reason fails to put the bitmap changes and |
1151 | * the release into the same transaction, recovery | 1151 | * the release into the same transaction, recovery |
1152 | * will merely complain about releasing a free block, | 1152 | * will merely complain about releasing a free block, |
1153 | * rather than leaking blocks. | 1153 | * rather than leaking blocks. |
1154 | */ | 1154 | */ |
1155 | if (ext4_handle_is_aborted(handle)) | 1155 | if (ext4_handle_is_aborted(handle)) |
1156 | return; | 1156 | return; |
1157 | if (try_to_extend_transaction(handle, inode)) { | 1157 | if (try_to_extend_transaction(handle, inode)) { |
1158 | ext4_mark_inode_dirty(handle, inode); | 1158 | ext4_mark_inode_dirty(handle, inode); |
1159 | ext4_truncate_restart_trans(handle, inode, | 1159 | ext4_truncate_restart_trans(handle, inode, |
1160 | ext4_blocks_for_truncate(inode)); | 1160 | ext4_blocks_for_truncate(inode)); |
1161 | } | 1161 | } |
1162 | 1162 | ||
1163 | /* | 1163 | /* |
1164 | * The forget flag here is critical because if | 1164 | * The forget flag here is critical because if |
1165 | * we are journaling (and not doing data | 1165 | * we are journaling (and not doing data |
1166 | * journaling), we have to make sure a revoke | 1166 | * journaling), we have to make sure a revoke |
1167 | * record is written to prevent the journal | 1167 | * record is written to prevent the journal |
1168 | * replay from overwriting the (former) | 1168 | * replay from overwriting the (former) |
1169 | * indirect block if it gets reallocated as a | 1169 | * indirect block if it gets reallocated as a |
1170 | * data block. This must happen in the same | 1170 | * data block. This must happen in the same |
1171 | * transaction where the data blocks are | 1171 | * transaction where the data blocks are |
1172 | * actually freed. | 1172 | * actually freed. |
1173 | */ | 1173 | */ |
1174 | ext4_free_blocks(handle, inode, NULL, nr, 1, | 1174 | ext4_free_blocks(handle, inode, NULL, nr, 1, |
1175 | EXT4_FREE_BLOCKS_METADATA| | 1175 | EXT4_FREE_BLOCKS_METADATA| |
1176 | EXT4_FREE_BLOCKS_FORGET); | 1176 | EXT4_FREE_BLOCKS_FORGET); |
1177 | 1177 | ||
1178 | if (parent_bh) { | 1178 | if (parent_bh) { |
1179 | /* | 1179 | /* |
1180 | * The block which we have just freed is | 1180 | * The block which we have just freed is |
1181 | * pointed to by an indirect block: journal it | 1181 | * pointed to by an indirect block: journal it |
1182 | */ | 1182 | */ |
1183 | BUFFER_TRACE(parent_bh, "get_write_access"); | 1183 | BUFFER_TRACE(parent_bh, "get_write_access"); |
1184 | if (!ext4_journal_get_write_access(handle, | 1184 | if (!ext4_journal_get_write_access(handle, |
1185 | parent_bh)){ | 1185 | parent_bh)){ |
1186 | *p = 0; | 1186 | *p = 0; |
1187 | BUFFER_TRACE(parent_bh, | 1187 | BUFFER_TRACE(parent_bh, |
1188 | "call ext4_handle_dirty_metadata"); | 1188 | "call ext4_handle_dirty_metadata"); |
1189 | ext4_handle_dirty_metadata(handle, | 1189 | ext4_handle_dirty_metadata(handle, |
1190 | inode, | 1190 | inode, |
1191 | parent_bh); | 1191 | parent_bh); |
1192 | } | 1192 | } |
1193 | } | 1193 | } |
1194 | } | 1194 | } |
1195 | } else { | 1195 | } else { |
1196 | /* We have reached the bottom of the tree. */ | 1196 | /* We have reached the bottom of the tree. */ |
1197 | BUFFER_TRACE(parent_bh, "free data blocks"); | 1197 | BUFFER_TRACE(parent_bh, "free data blocks"); |
1198 | ext4_free_data(handle, inode, parent_bh, first, last); | 1198 | ext4_free_data(handle, inode, parent_bh, first, last); |
1199 | } | 1199 | } |
1200 | } | 1200 | } |
1201 | 1201 | ||
1202 | void ext4_ind_truncate(handle_t *handle, struct inode *inode) | 1202 | void ext4_ind_truncate(handle_t *handle, struct inode *inode) |
1203 | { | 1203 | { |
1204 | struct ext4_inode_info *ei = EXT4_I(inode); | 1204 | struct ext4_inode_info *ei = EXT4_I(inode); |
1205 | __le32 *i_data = ei->i_data; | 1205 | __le32 *i_data = ei->i_data; |
1206 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 1206 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
1207 | ext4_lblk_t offsets[4]; | 1207 | ext4_lblk_t offsets[4]; |
1208 | Indirect chain[4]; | 1208 | Indirect chain[4]; |
1209 | Indirect *partial; | 1209 | Indirect *partial; |
1210 | __le32 nr = 0; | 1210 | __le32 nr = 0; |
1211 | int n = 0; | 1211 | int n = 0; |
1212 | ext4_lblk_t last_block, max_block; | 1212 | ext4_lblk_t last_block, max_block; |
1213 | unsigned blocksize = inode->i_sb->s_blocksize; | 1213 | unsigned blocksize = inode->i_sb->s_blocksize; |
1214 | 1214 | ||
1215 | last_block = (inode->i_size + blocksize-1) | 1215 | last_block = (inode->i_size + blocksize-1) |
1216 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 1216 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
1217 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | 1217 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) |
1218 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 1218 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
1219 | 1219 | ||
1220 | if (last_block != max_block) { | 1220 | if (last_block != max_block) { |
1221 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 1221 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
1222 | if (n == 0) | 1222 | if (n == 0) |
1223 | return; | 1223 | return; |
1224 | } | 1224 | } |
1225 | 1225 | ||
1226 | ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); | 1226 | ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); |
1227 | 1227 | ||
1228 | /* | 1228 | /* |
1229 | * The orphan list entry will now protect us from any crash which | 1229 | * The orphan list entry will now protect us from any crash which |
1230 | * occurs before the truncate completes, so it is now safe to propagate | 1230 | * occurs before the truncate completes, so it is now safe to propagate |
1231 | * the new, shorter inode size (held for now in i_size) into the | 1231 | * the new, shorter inode size (held for now in i_size) into the |
1232 | * on-disk inode. We do this via i_disksize, which is the value which | 1232 | * on-disk inode. We do this via i_disksize, which is the value which |
1233 | * ext4 *really* writes onto the disk inode. | 1233 | * ext4 *really* writes onto the disk inode. |
1234 | */ | 1234 | */ |
1235 | ei->i_disksize = inode->i_size; | 1235 | ei->i_disksize = inode->i_size; |
1236 | 1236 | ||
1237 | if (last_block == max_block) { | 1237 | if (last_block == max_block) { |
1238 | /* | 1238 | /* |
1239 | * It is unnecessary to free any data blocks if last_block is | 1239 | * It is unnecessary to free any data blocks if last_block is |
1240 | * equal to the indirect block limit. | 1240 | * equal to the indirect block limit. |
1241 | */ | 1241 | */ |
1242 | return; | 1242 | return; |
1243 | } else if (n == 1) { /* direct blocks */ | 1243 | } else if (n == 1) { /* direct blocks */ |
1244 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 1244 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
1245 | i_data + EXT4_NDIR_BLOCKS); | 1245 | i_data + EXT4_NDIR_BLOCKS); |
1246 | goto do_indirects; | 1246 | goto do_indirects; |
1247 | } | 1247 | } |
1248 | 1248 | ||
1249 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); | 1249 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); |
1250 | /* Kill the top of shared branch (not detached) */ | 1250 | /* Kill the top of shared branch (not detached) */ |
1251 | if (nr) { | 1251 | if (nr) { |
1252 | if (partial == chain) { | 1252 | if (partial == chain) { |
1253 | /* Shared branch grows from the inode */ | 1253 | /* Shared branch grows from the inode */ |
1254 | ext4_free_branches(handle, inode, NULL, | 1254 | ext4_free_branches(handle, inode, NULL, |
1255 | &nr, &nr+1, (chain+n-1) - partial); | 1255 | &nr, &nr+1, (chain+n-1) - partial); |
1256 | *partial->p = 0; | 1256 | *partial->p = 0; |
1257 | /* | 1257 | /* |
1258 | * We mark the inode dirty prior to restart, | 1258 | * We mark the inode dirty prior to restart, |
1259 | * and prior to stop. No need for it here. | 1259 | * and prior to stop. No need for it here. |
1260 | */ | 1260 | */ |
1261 | } else { | 1261 | } else { |
1262 | /* Shared branch grows from an indirect block */ | 1262 | /* Shared branch grows from an indirect block */ |
1263 | BUFFER_TRACE(partial->bh, "get_write_access"); | 1263 | BUFFER_TRACE(partial->bh, "get_write_access"); |
1264 | ext4_free_branches(handle, inode, partial->bh, | 1264 | ext4_free_branches(handle, inode, partial->bh, |
1265 | partial->p, | 1265 | partial->p, |
1266 | partial->p+1, (chain+n-1) - partial); | 1266 | partial->p+1, (chain+n-1) - partial); |
1267 | } | 1267 | } |
1268 | } | 1268 | } |
1269 | /* Clear the ends of indirect blocks on the shared branch */ | 1269 | /* Clear the ends of indirect blocks on the shared branch */ |
1270 | while (partial > chain) { | 1270 | while (partial > chain) { |
1271 | ext4_free_branches(handle, inode, partial->bh, partial->p + 1, | 1271 | ext4_free_branches(handle, inode, partial->bh, partial->p + 1, |
1272 | (__le32*)partial->bh->b_data+addr_per_block, | 1272 | (__le32*)partial->bh->b_data+addr_per_block, |
1273 | (chain+n-1) - partial); | 1273 | (chain+n-1) - partial); |
1274 | BUFFER_TRACE(partial->bh, "call brelse"); | 1274 | BUFFER_TRACE(partial->bh, "call brelse"); |
1275 | brelse(partial->bh); | 1275 | brelse(partial->bh); |
1276 | partial--; | 1276 | partial--; |
1277 | } | 1277 | } |
1278 | do_indirects: | 1278 | do_indirects: |
1279 | /* Kill the remaining (whole) subtrees */ | 1279 | /* Kill the remaining (whole) subtrees */ |
1280 | switch (offsets[0]) { | 1280 | switch (offsets[0]) { |
1281 | default: | 1281 | default: |
1282 | nr = i_data[EXT4_IND_BLOCK]; | 1282 | nr = i_data[EXT4_IND_BLOCK]; |
1283 | if (nr) { | 1283 | if (nr) { |
1284 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); | 1284 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); |
1285 | i_data[EXT4_IND_BLOCK] = 0; | 1285 | i_data[EXT4_IND_BLOCK] = 0; |
1286 | } | 1286 | } |
1287 | case EXT4_IND_BLOCK: | 1287 | case EXT4_IND_BLOCK: |
1288 | nr = i_data[EXT4_DIND_BLOCK]; | 1288 | nr = i_data[EXT4_DIND_BLOCK]; |
1289 | if (nr) { | 1289 | if (nr) { |
1290 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); | 1290 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); |
1291 | i_data[EXT4_DIND_BLOCK] = 0; | 1291 | i_data[EXT4_DIND_BLOCK] = 0; |
1292 | } | 1292 | } |
1293 | case EXT4_DIND_BLOCK: | 1293 | case EXT4_DIND_BLOCK: |
1294 | nr = i_data[EXT4_TIND_BLOCK]; | 1294 | nr = i_data[EXT4_TIND_BLOCK]; |
1295 | if (nr) { | 1295 | if (nr) { |
1296 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); | 1296 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); |
1297 | i_data[EXT4_TIND_BLOCK] = 0; | 1297 | i_data[EXT4_TIND_BLOCK] = 0; |
1298 | } | 1298 | } |
1299 | case EXT4_TIND_BLOCK: | 1299 | case EXT4_TIND_BLOCK: |
1300 | ; | 1300 | ; |
1301 | } | 1301 | } |
1302 | } | 1302 | } |
1303 | 1303 | ||
1304 | /** | 1304 | /** |
1305 | * ext4_ind_remove_space - remove space from the range | 1305 | * ext4_ind_remove_space - remove space from the range |
1306 | * @handle: JBD handle for this transaction | 1306 | * @handle: JBD handle for this transaction |
1307 | * @inode: inode we are dealing with | 1307 | * @inode: inode we are dealing with |
1308 | * @start: First block to remove | 1308 | * @start: First block to remove |
1309 | * @end: One block after the last block to remove (exclusive) | 1309 | * @end: One block after the last block to remove (exclusive) |
1310 | * | 1310 | * |
1311 | * Free the blocks in the defined range (end is exclusive endpoint of | 1311 | * Free the blocks in the defined range (end is exclusive endpoint of |
1312 | * range). This is used by ext4_punch_hole(). | 1312 | * range). This is used by ext4_punch_hole(). |
1313 | */ | 1313 | */ |
1314 | int ext4_ind_remove_space(handle_t *handle, struct inode *inode, | 1314 | int ext4_ind_remove_space(handle_t *handle, struct inode *inode, |
1315 | ext4_lblk_t start, ext4_lblk_t end) | 1315 | ext4_lblk_t start, ext4_lblk_t end) |
1316 | { | 1316 | { |
1317 | struct ext4_inode_info *ei = EXT4_I(inode); | 1317 | struct ext4_inode_info *ei = EXT4_I(inode); |
1318 | __le32 *i_data = ei->i_data; | 1318 | __le32 *i_data = ei->i_data; |
1319 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 1319 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
1320 | ext4_lblk_t offsets[4], offsets2[4]; | 1320 | ext4_lblk_t offsets[4], offsets2[4]; |
1321 | Indirect chain[4], chain2[4]; | 1321 | Indirect chain[4], chain2[4]; |
1322 | Indirect *partial, *partial2; | 1322 | Indirect *partial, *partial2; |
1323 | ext4_lblk_t max_block; | 1323 | ext4_lblk_t max_block; |
1324 | __le32 nr = 0, nr2 = 0; | 1324 | __le32 nr = 0, nr2 = 0; |
1325 | int n = 0, n2 = 0; | 1325 | int n = 0, n2 = 0; |
1326 | unsigned blocksize = inode->i_sb->s_blocksize; | 1326 | unsigned blocksize = inode->i_sb->s_blocksize; |
1327 | 1327 | ||
1328 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | 1328 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) |
1329 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 1329 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
1330 | if (end >= max_block) | 1330 | if (end >= max_block) |
1331 | end = max_block; | 1331 | end = max_block; |
1332 | if ((start >= end) || (start > max_block)) | 1332 | if ((start >= end) || (start > max_block)) |
1333 | return 0; | 1333 | return 0; |
1334 | 1334 | ||
1335 | n = ext4_block_to_path(inode, start, offsets, NULL); | 1335 | n = ext4_block_to_path(inode, start, offsets, NULL); |
1336 | n2 = ext4_block_to_path(inode, end, offsets2, NULL); | 1336 | n2 = ext4_block_to_path(inode, end, offsets2, NULL); |
1337 | 1337 | ||
1338 | BUG_ON(n > n2); | 1338 | BUG_ON(n > n2); |
1339 | 1339 | ||
1340 | if ((n == 1) && (n == n2)) { | 1340 | if ((n == 1) && (n == n2)) { |
1341 | /* We're punching only within direct block range */ | 1341 | /* We're punching only within direct block range */ |
1342 | ext4_free_data(handle, inode, NULL, i_data + offsets[0], | 1342 | ext4_free_data(handle, inode, NULL, i_data + offsets[0], |
1343 | i_data + offsets2[0]); | 1343 | i_data + offsets2[0]); |
1344 | return 0; | 1344 | return 0; |
1345 | } else if (n2 > n) { | 1345 | } else if (n2 > n) { |
1346 | /* | 1346 | /* |
1347 | * Start and end are on a different levels so we're going to | 1347 | * Start and end are on a different levels so we're going to |
1348 | * free partial block at start, and partial block at end of | 1348 | * free partial block at start, and partial block at end of |
1349 | * the range. If there are some levels in between then | 1349 | * the range. If there are some levels in between then |
1350 | * do_indirects label will take care of that. | 1350 | * do_indirects label will take care of that. |
1351 | */ | 1351 | */ |
1352 | 1352 | ||
1353 | if (n == 1) { | 1353 | if (n == 1) { |
1354 | /* | 1354 | /* |
1355 | * Start is at the direct block level, free | 1355 | * Start is at the direct block level, free |
1356 | * everything to the end of the level. | 1356 | * everything to the end of the level. |
1357 | */ | 1357 | */ |
1358 | ext4_free_data(handle, inode, NULL, i_data + offsets[0], | 1358 | ext4_free_data(handle, inode, NULL, i_data + offsets[0], |
1359 | i_data + EXT4_NDIR_BLOCKS); | 1359 | i_data + EXT4_NDIR_BLOCKS); |
1360 | goto end_range; | 1360 | goto end_range; |
1361 | } | 1361 | } |
1362 | 1362 | ||
1363 | 1363 | ||
1364 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); | 1364 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); |
1365 | if (nr) { | 1365 | if (nr) { |
1366 | if (partial == chain) { | 1366 | if (partial == chain) { |
1367 | /* Shared branch grows from the inode */ | 1367 | /* Shared branch grows from the inode */ |
1368 | ext4_free_branches(handle, inode, NULL, | 1368 | ext4_free_branches(handle, inode, NULL, |
1369 | &nr, &nr+1, (chain+n-1) - partial); | 1369 | &nr, &nr+1, (chain+n-1) - partial); |
1370 | *partial->p = 0; | 1370 | *partial->p = 0; |
1371 | } else { | 1371 | } else { |
1372 | /* Shared branch grows from an indirect block */ | 1372 | /* Shared branch grows from an indirect block */ |
1373 | BUFFER_TRACE(partial->bh, "get_write_access"); | 1373 | BUFFER_TRACE(partial->bh, "get_write_access"); |
1374 | ext4_free_branches(handle, inode, partial->bh, | 1374 | ext4_free_branches(handle, inode, partial->bh, |
1375 | partial->p, | 1375 | partial->p, |
1376 | partial->p+1, (chain+n-1) - partial); | 1376 | partial->p+1, (chain+n-1) - partial); |
1377 | } | 1377 | } |
1378 | } | 1378 | } |
1379 | 1379 | ||
1380 | /* | 1380 | /* |
1381 | * Clear the ends of indirect blocks on the shared branch | 1381 | * Clear the ends of indirect blocks on the shared branch |
1382 | * at the start of the range | 1382 | * at the start of the range |
1383 | */ | 1383 | */ |
1384 | while (partial > chain) { | 1384 | while (partial > chain) { |
1385 | ext4_free_branches(handle, inode, partial->bh, | 1385 | ext4_free_branches(handle, inode, partial->bh, |
1386 | partial->p + 1, | 1386 | partial->p + 1, |
1387 | (__le32 *)partial->bh->b_data+addr_per_block, | 1387 | (__le32 *)partial->bh->b_data+addr_per_block, |
1388 | (chain+n-1) - partial); | 1388 | (chain+n-1) - partial); |
1389 | BUFFER_TRACE(partial->bh, "call brelse"); | 1389 | BUFFER_TRACE(partial->bh, "call brelse"); |
1390 | brelse(partial->bh); | 1390 | brelse(partial->bh); |
1391 | partial--; | 1391 | partial--; |
1392 | } | 1392 | } |
1393 | 1393 | ||
1394 | end_range: | 1394 | end_range: |
1395 | partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); | 1395 | partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); |
1396 | if (nr2) { | 1396 | if (nr2) { |
1397 | if (partial2 == chain2) { | 1397 | if (partial2 == chain2) { |
1398 | /* | 1398 | /* |
1399 | * Remember, end is exclusive so here we're at | 1399 | * Remember, end is exclusive so here we're at |
1400 | * the start of the next level we're not going | 1400 | * the start of the next level we're not going |
1401 | * to free. Everything was covered by the start | 1401 | * to free. Everything was covered by the start |
1402 | * of the range. | 1402 | * of the range. |
1403 | */ | 1403 | */ |
1404 | return 0; | 1404 | goto do_indirects; |
1405 | } else { | ||
1406 | /* Shared branch grows from an indirect block */ | ||
1407 | partial2--; | ||
1408 | } | 1405 | } |
1409 | } else { | 1406 | } else { |
1410 | /* | 1407 | /* |
1411 | * ext4_find_shared returns Indirect structure which | 1408 | * ext4_find_shared returns Indirect structure which |
1412 | * points to the last element which should not be | 1409 | * points to the last element which should not be |
1413 | * removed by truncate. But this is end of the range | 1410 | * removed by truncate. But this is end of the range |
1414 | * in punch_hole so we need to point to the next element | 1411 | * in punch_hole so we need to point to the next element |
1415 | */ | 1412 | */ |
1416 | partial2->p++; | 1413 | partial2->p++; |
1417 | } | 1414 | } |
1418 | 1415 | ||
1419 | /* | 1416 | /* |
1420 | * Clear the ends of indirect blocks on the shared branch | 1417 | * Clear the ends of indirect blocks on the shared branch |
1421 | * at the end of the range | 1418 | * at the end of the range |
1422 | */ | 1419 | */ |
1423 | while (partial2 > chain2) { | 1420 | while (partial2 > chain2) { |
1424 | ext4_free_branches(handle, inode, partial2->bh, | 1421 | ext4_free_branches(handle, inode, partial2->bh, |
1425 | (__le32 *)partial2->bh->b_data, | 1422 | (__le32 *)partial2->bh->b_data, |
1426 | partial2->p, | 1423 | partial2->p, |
1427 | (chain2+n2-1) - partial2); | 1424 | (chain2+n2-1) - partial2); |
1428 | BUFFER_TRACE(partial2->bh, "call brelse"); | 1425 | BUFFER_TRACE(partial2->bh, "call brelse"); |
1429 | brelse(partial2->bh); | 1426 | brelse(partial2->bh); |
1430 | partial2--; | 1427 | partial2--; |
1431 | } | 1428 | } |
1432 | goto do_indirects; | 1429 | goto do_indirects; |
1433 | } | 1430 | } |
1434 | 1431 | ||
1435 | /* Punch happened within the same level (n == n2) */ | 1432 | /* Punch happened within the same level (n == n2) */ |
1436 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); | 1433 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); |
1437 | partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); | 1434 | partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); |
1438 | /* | 1435 | |
1439 | * ext4_find_shared returns Indirect structure which | 1436 | /* Free top, but only if partial2 isn't its subtree. */ |
1440 | * points to the last element which should not be | 1437 | if (nr) { |
1441 | * removed by truncate. But this is end of the range | 1438 | int level = min(partial - chain, partial2 - chain2); |
1442 | * in punch_hole so we need to point to the next element | 1439 | int i; |
1443 | */ | 1440 | int subtree = 1; |
1444 | partial2->p++; | 1441 | |
1445 | while ((partial > chain) || (partial2 > chain2)) { | 1442 | for (i = 0; i <= level; i++) { |
1446 | /* We're at the same block, so we're almost finished */ | 1443 | if (offsets[i] != offsets2[i]) { |
1447 | if ((partial->bh && partial2->bh) && | 1444 | subtree = 0; |
1448 | (partial->bh->b_blocknr == partial2->bh->b_blocknr)) { | 1445 | break; |
1449 | if ((partial > chain) && (partial2 > chain2)) { | 1446 | } |
1447 | } | ||
1448 | |||
1449 | if (!subtree) { | ||
1450 | if (partial == chain) { | ||
1451 | /* Shared branch grows from the inode */ | ||
1452 | ext4_free_branches(handle, inode, NULL, | ||
1453 | &nr, &nr+1, | ||
1454 | (chain+n-1) - partial); | ||
1455 | *partial->p = 0; | ||
1456 | } else { | ||
1457 | /* Shared branch grows from an indirect block */ | ||
1458 | BUFFER_TRACE(partial->bh, "get_write_access"); | ||
1450 | ext4_free_branches(handle, inode, partial->bh, | 1459 | ext4_free_branches(handle, inode, partial->bh, |
1451 | partial->p + 1, | 1460 | partial->p, |
1452 | partial2->p, | 1461 | partial->p+1, |
1453 | (chain+n-1) - partial); | 1462 | (chain+n-1) - partial); |
1454 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
1455 | brelse(partial->bh); | ||
1456 | BUFFER_TRACE(partial2->bh, "call brelse"); | ||
1457 | brelse(partial2->bh); | ||
1458 | } | 1463 | } |
1459 | return 0; | ||
1460 | } | 1464 | } |
1465 | } | ||
1466 | |||
1467 | if (!nr2) { | ||
1461 | /* | 1468 | /* |
1462 | * Clear the ends of indirect blocks on the shared branch | 1469 | * ext4_find_shared returns Indirect structure which |
1463 | * at the start of the range | 1470 | * points to the last element which should not be |
1471 | * removed by truncate. But this is end of the range | ||
1472 | * in punch_hole so we need to point to the next element | ||
1464 | */ | 1473 | */ |
1465 | if (partial > chain) { | 1474 | partial2->p++; |
1475 | } | ||
1476 | |||
1477 | while (partial > chain || partial2 > chain2) { | ||
1478 | int depth = (chain+n-1) - partial; | ||
1479 | int depth2 = (chain2+n2-1) - partial2; | ||
1480 | |||
1481 | if (partial > chain && partial2 > chain2 && | ||
1482 | partial->bh->b_blocknr == partial2->bh->b_blocknr) { | ||
1483 | /* | ||
1484 | * We've converged on the same block. Clear the range, | ||
1485 | * then we're done. | ||
1486 | */ | ||
1466 | ext4_free_branches(handle, inode, partial->bh, | 1487 | ext4_free_branches(handle, inode, partial->bh, |
1467 | partial->p + 1, | 1488 | partial->p + 1, |
1468 | (__le32 *)partial->bh->b_data+addr_per_block, | 1489 | partial2->p, |
1469 | (chain+n-1) - partial); | 1490 | (chain+n-1) - partial); |
1470 | BUFFER_TRACE(partial->bh, "call brelse"); | 1491 | BUFFER_TRACE(partial->bh, "call brelse"); |
1471 | brelse(partial->bh); | 1492 | brelse(partial->bh); |
1472 | partial--; | 1493 | BUFFER_TRACE(partial2->bh, "call brelse"); |
1494 | brelse(partial2->bh); | ||
1495 | return 0; | ||
1473 | } | 1496 | } |
1497 | |||
1474 | /* | 1498 | /* |
1475 | * Clear the ends of indirect blocks on the shared branch | 1499 | * The start and end partial branches may not be at the same |
1476 | * at the end of the range | 1500 | * level even though the punch happened within one level. So, we |
1501 | * give them a chance to arrive at the same level, then walk | ||
1502 | * them in step with each other until we converge on the same | ||
1503 | * block. | ||
1477 | */ | 1504 | */ |
1478 | if (partial2 > chain2) { | 1505 | if (partial > chain && depth <= depth2) { |
1506 | ext4_free_branches(handle, inode, partial->bh, | ||
1507 | partial->p + 1, | ||
1508 | (__le32 *)partial->bh->b_data+addr_per_block, | ||
1509 | (chain+n-1) - partial); | ||
1510 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
1511 | brelse(partial->bh); | ||
1512 | partial--; | ||
1513 | } | ||
1514 | if (partial2 > chain2 && depth2 <= depth) { | ||
1479 | ext4_free_branches(handle, inode, partial2->bh, | 1515 | ext4_free_branches(handle, inode, partial2->bh, |
1480 | (__le32 *)partial2->bh->b_data, | 1516 | (__le32 *)partial2->bh->b_data, |
1481 | partial2->p, | 1517 | partial2->p, |
1482 | (chain2+n-1) - partial2); | 1518 | (chain2+n2-1) - partial2); |
1483 | BUFFER_TRACE(partial2->bh, "call brelse"); | 1519 | BUFFER_TRACE(partial2->bh, "call brelse"); |
1484 | brelse(partial2->bh); | 1520 | brelse(partial2->bh); |
1485 | partial2--; | 1521 | partial2--; |
1486 | } | 1522 | } |
1487 | } | 1523 | } |
1524 | return 0; | ||
1488 | 1525 | ||
1489 | do_indirects: | 1526 | do_indirects: |
1490 | /* Kill the remaining (whole) subtrees */ | 1527 | /* Kill the remaining (whole) subtrees */ |
1491 | switch (offsets[0]) { | 1528 | switch (offsets[0]) { |
1492 | default: | 1529 | default: |
1493 | if (++n >= n2) | 1530 | if (++n >= n2) |
1494 | return 0; | 1531 | return 0; |
1495 | nr = i_data[EXT4_IND_BLOCK]; | 1532 | nr = i_data[EXT4_IND_BLOCK]; |
1496 | if (nr) { | 1533 | if (nr) { |
1497 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); | 1534 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); |
1498 | i_data[EXT4_IND_BLOCK] = 0; | 1535 | i_data[EXT4_IND_BLOCK] = 0; |
1499 | } | 1536 | } |
1500 | case EXT4_IND_BLOCK: | 1537 | case EXT4_IND_BLOCK: |
1501 | if (++n >= n2) | 1538 | if (++n >= n2) |
1502 | return 0; | 1539 | return 0; |
1503 | nr = i_data[EXT4_DIND_BLOCK]; | 1540 | nr = i_data[EXT4_DIND_BLOCK]; |
1504 | if (nr) { | 1541 | if (nr) { |
1505 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); | 1542 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); |
1506 | i_data[EXT4_DIND_BLOCK] = 0; | 1543 | i_data[EXT4_DIND_BLOCK] = 0; |
1507 | } | 1544 | } |
1508 | case EXT4_DIND_BLOCK: | 1545 | case EXT4_DIND_BLOCK: |
1509 | if (++n >= n2) | 1546 | if (++n >= n2) |
1510 | return 0; | 1547 | return 0; |
1511 | nr = i_data[EXT4_TIND_BLOCK]; | 1548 | nr = i_data[EXT4_TIND_BLOCK]; |
1512 | if (nr) { | 1549 | if (nr) { |
1513 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); | 1550 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); |
fs/ext4/inode.c
1 | /* | 1 | /* |
2 | * linux/fs/ext4/inode.c | 2 | * linux/fs/ext4/inode.c |
3 | * | 3 | * |
4 | * Copyright (C) 1992, 1993, 1994, 1995 | 4 | * Copyright (C) 1992, 1993, 1994, 1995 |
5 | * Remy Card (card@masi.ibp.fr) | 5 | * Remy Card (card@masi.ibp.fr) |
6 | * Laboratoire MASI - Institut Blaise Pascal | 6 | * Laboratoire MASI - Institut Blaise Pascal |
7 | * Universite Pierre et Marie Curie (Paris VI) | 7 | * Universite Pierre et Marie Curie (Paris VI) |
8 | * | 8 | * |
9 | * from | 9 | * from |
10 | * | 10 | * |
11 | * linux/fs/minix/inode.c | 11 | * linux/fs/minix/inode.c |
12 | * | 12 | * |
13 | * Copyright (C) 1991, 1992 Linus Torvalds | 13 | * Copyright (C) 1991, 1992 Linus Torvalds |
14 | * | 14 | * |
15 | * 64-bit file support on 64-bit platforms by Jakub Jelinek | 15 | * 64-bit file support on 64-bit platforms by Jakub Jelinek |
16 | * (jj@sunsite.ms.mff.cuni.cz) | 16 | * (jj@sunsite.ms.mff.cuni.cz) |
17 | * | 17 | * |
18 | * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 | 18 | * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/time.h> | 22 | #include <linux/time.h> |
23 | #include <linux/jbd2.h> | 23 | #include <linux/jbd2.h> |
24 | #include <linux/highuid.h> | 24 | #include <linux/highuid.h> |
25 | #include <linux/pagemap.h> | 25 | #include <linux/pagemap.h> |
26 | #include <linux/quotaops.h> | 26 | #include <linux/quotaops.h> |
27 | #include <linux/string.h> | 27 | #include <linux/string.h> |
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
30 | #include <linux/pagevec.h> | 30 | #include <linux/pagevec.h> |
31 | #include <linux/mpage.h> | 31 | #include <linux/mpage.h> |
32 | #include <linux/namei.h> | 32 | #include <linux/namei.h> |
33 | #include <linux/uio.h> | 33 | #include <linux/uio.h> |
34 | #include <linux/bio.h> | 34 | #include <linux/bio.h> |
35 | #include <linux/workqueue.h> | 35 | #include <linux/workqueue.h> |
36 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
37 | #include <linux/printk.h> | 37 | #include <linux/printk.h> |
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
40 | #include <linux/aio.h> | 40 | #include <linux/aio.h> |
41 | #include <linux/bitops.h> | 41 | #include <linux/bitops.h> |
42 | 42 | ||
43 | #include "ext4_jbd2.h" | 43 | #include "ext4_jbd2.h" |
44 | #include "xattr.h" | 44 | #include "xattr.h" |
45 | #include "acl.h" | 45 | #include "acl.h" |
46 | #include "truncate.h" | 46 | #include "truncate.h" |
47 | 47 | ||
48 | #include <trace/events/ext4.h> | 48 | #include <trace/events/ext4.h> |
49 | 49 | ||
50 | #define MPAGE_DA_EXTENT_TAIL 0x01 | 50 | #define MPAGE_DA_EXTENT_TAIL 0x01 |
51 | 51 | ||
52 | static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, | 52 | static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, |
53 | struct ext4_inode_info *ei) | 53 | struct ext4_inode_info *ei) |
54 | { | 54 | { |
55 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 55 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
56 | __u16 csum_lo; | 56 | __u16 csum_lo; |
57 | __u16 csum_hi = 0; | 57 | __u16 csum_hi = 0; |
58 | __u32 csum; | 58 | __u32 csum; |
59 | 59 | ||
60 | csum_lo = le16_to_cpu(raw->i_checksum_lo); | 60 | csum_lo = le16_to_cpu(raw->i_checksum_lo); |
61 | raw->i_checksum_lo = 0; | 61 | raw->i_checksum_lo = 0; |
62 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && | 62 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && |
63 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { | 63 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { |
64 | csum_hi = le16_to_cpu(raw->i_checksum_hi); | 64 | csum_hi = le16_to_cpu(raw->i_checksum_hi); |
65 | raw->i_checksum_hi = 0; | 65 | raw->i_checksum_hi = 0; |
66 | } | 66 | } |
67 | 67 | ||
68 | csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, | 68 | csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, |
69 | EXT4_INODE_SIZE(inode->i_sb)); | 69 | EXT4_INODE_SIZE(inode->i_sb)); |
70 | 70 | ||
71 | raw->i_checksum_lo = cpu_to_le16(csum_lo); | 71 | raw->i_checksum_lo = cpu_to_le16(csum_lo); |
72 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && | 72 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && |
73 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) | 73 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) |
74 | raw->i_checksum_hi = cpu_to_le16(csum_hi); | 74 | raw->i_checksum_hi = cpu_to_le16(csum_hi); |
75 | 75 | ||
76 | return csum; | 76 | return csum; |
77 | } | 77 | } |
78 | 78 | ||
79 | static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, | 79 | static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, |
80 | struct ext4_inode_info *ei) | 80 | struct ext4_inode_info *ei) |
81 | { | 81 | { |
82 | __u32 provided, calculated; | 82 | __u32 provided, calculated; |
83 | 83 | ||
84 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 84 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
85 | cpu_to_le32(EXT4_OS_LINUX) || | 85 | cpu_to_le32(EXT4_OS_LINUX) || |
86 | !ext4_has_metadata_csum(inode->i_sb)) | 86 | !ext4_has_metadata_csum(inode->i_sb)) |
87 | return 1; | 87 | return 1; |
88 | 88 | ||
89 | provided = le16_to_cpu(raw->i_checksum_lo); | 89 | provided = le16_to_cpu(raw->i_checksum_lo); |
90 | calculated = ext4_inode_csum(inode, raw, ei); | 90 | calculated = ext4_inode_csum(inode, raw, ei); |
91 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && | 91 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && |
92 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) | 92 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) |
93 | provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16; | 93 | provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16; |
94 | else | 94 | else |
95 | calculated &= 0xFFFF; | 95 | calculated &= 0xFFFF; |
96 | 96 | ||
97 | return provided == calculated; | 97 | return provided == calculated; |
98 | } | 98 | } |
99 | 99 | ||
100 | static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, | 100 | static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, |
101 | struct ext4_inode_info *ei) | 101 | struct ext4_inode_info *ei) |
102 | { | 102 | { |
103 | __u32 csum; | 103 | __u32 csum; |
104 | 104 | ||
105 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 105 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
106 | cpu_to_le32(EXT4_OS_LINUX) || | 106 | cpu_to_le32(EXT4_OS_LINUX) || |
107 | !ext4_has_metadata_csum(inode->i_sb)) | 107 | !ext4_has_metadata_csum(inode->i_sb)) |
108 | return; | 108 | return; |
109 | 109 | ||
110 | csum = ext4_inode_csum(inode, raw, ei); | 110 | csum = ext4_inode_csum(inode, raw, ei); |
111 | raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF); | 111 | raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF); |
112 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && | 112 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && |
113 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) | 113 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) |
114 | raw->i_checksum_hi = cpu_to_le16(csum >> 16); | 114 | raw->i_checksum_hi = cpu_to_le16(csum >> 16); |
115 | } | 115 | } |
116 | 116 | ||
117 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 117 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
118 | loff_t new_size) | 118 | loff_t new_size) |
119 | { | 119 | { |
120 | trace_ext4_begin_ordered_truncate(inode, new_size); | 120 | trace_ext4_begin_ordered_truncate(inode, new_size); |
121 | /* | 121 | /* |
122 | * If jinode is zero, then we never opened the file for | 122 | * If jinode is zero, then we never opened the file for |
123 | * writing, so there's no need to call | 123 | * writing, so there's no need to call |
124 | * jbd2_journal_begin_ordered_truncate() since there's no | 124 | * jbd2_journal_begin_ordered_truncate() since there's no |
125 | * outstanding writes we need to flush. | 125 | * outstanding writes we need to flush. |
126 | */ | 126 | */ |
127 | if (!EXT4_I(inode)->jinode) | 127 | if (!EXT4_I(inode)->jinode) |
128 | return 0; | 128 | return 0; |
129 | return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), | 129 | return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), |
130 | EXT4_I(inode)->jinode, | 130 | EXT4_I(inode)->jinode, |
131 | new_size); | 131 | new_size); |
132 | } | 132 | } |
133 | 133 | ||
134 | static void ext4_invalidatepage(struct page *page, unsigned int offset, | 134 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
135 | unsigned int length); | 135 | unsigned int length); |
136 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | 136 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); |
137 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | 137 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); |
138 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, | 138 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
139 | int pextents); | 139 | int pextents); |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * Test whether an inode is a fast symlink. | 142 | * Test whether an inode is a fast symlink. |
143 | */ | 143 | */ |
144 | static int ext4_inode_is_fast_symlink(struct inode *inode) | 144 | static int ext4_inode_is_fast_symlink(struct inode *inode) |
145 | { | 145 | { |
146 | int ea_blocks = EXT4_I(inode)->i_file_acl ? | 146 | int ea_blocks = EXT4_I(inode)->i_file_acl ? |
147 | EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; | 147 | EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; |
148 | 148 | ||
149 | if (ext4_has_inline_data(inode)) | 149 | if (ext4_has_inline_data(inode)) |
150 | return 0; | 150 | return 0; |
151 | 151 | ||
152 | return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); | 152 | return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); |
153 | } | 153 | } |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * Restart the transaction associated with *handle. This does a commit, | 156 | * Restart the transaction associated with *handle. This does a commit, |
157 | * so before we call here everything must be consistently dirtied against | 157 | * so before we call here everything must be consistently dirtied against |
158 | * this transaction. | 158 | * this transaction. |
159 | */ | 159 | */ |
160 | int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | 160 | int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, |
161 | int nblocks) | 161 | int nblocks) |
162 | { | 162 | { |
163 | int ret; | 163 | int ret; |
164 | 164 | ||
165 | /* | 165 | /* |
166 | * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this | 166 | * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this |
167 | * moment, get_block can be called only for blocks inside i_size since | 167 | * moment, get_block can be called only for blocks inside i_size since |
168 | * page cache has been already dropped and writes are blocked by | 168 | * page cache has been already dropped and writes are blocked by |
169 | * i_mutex. So we can safely drop the i_data_sem here. | 169 | * i_mutex. So we can safely drop the i_data_sem here. |
170 | */ | 170 | */ |
171 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 171 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
172 | jbd_debug(2, "restarting handle %p\n", handle); | 172 | jbd_debug(2, "restarting handle %p\n", handle); |
173 | up_write(&EXT4_I(inode)->i_data_sem); | 173 | up_write(&EXT4_I(inode)->i_data_sem); |
174 | ret = ext4_journal_restart(handle, nblocks); | 174 | ret = ext4_journal_restart(handle, nblocks); |
175 | down_write(&EXT4_I(inode)->i_data_sem); | 175 | down_write(&EXT4_I(inode)->i_data_sem); |
176 | ext4_discard_preallocations(inode); | 176 | ext4_discard_preallocations(inode); |
177 | 177 | ||
178 | return ret; | 178 | return ret; |
179 | } | 179 | } |
180 | 180 | ||
181 | /* | 181 | /* |
182 | * Called at the last iput() if i_nlink is zero. | 182 | * Called at the last iput() if i_nlink is zero. |
183 | */ | 183 | */ |
184 | void ext4_evict_inode(struct inode *inode) | 184 | void ext4_evict_inode(struct inode *inode) |
185 | { | 185 | { |
186 | handle_t *handle; | 186 | handle_t *handle; |
187 | int err; | 187 | int err; |
188 | 188 | ||
189 | trace_ext4_evict_inode(inode); | 189 | trace_ext4_evict_inode(inode); |
190 | 190 | ||
191 | if (inode->i_nlink) { | 191 | if (inode->i_nlink) { |
192 | /* | 192 | /* |
193 | * When journalling data dirty buffers are tracked only in the | 193 | * When journalling data dirty buffers are tracked only in the |
194 | * journal. So although mm thinks everything is clean and | 194 | * journal. So although mm thinks everything is clean and |
195 | * ready for reaping the inode might still have some pages to | 195 | * ready for reaping the inode might still have some pages to |
196 | * write in the running transaction or waiting to be | 196 | * write in the running transaction or waiting to be |
197 | * checkpointed. Thus calling jbd2_journal_invalidatepage() | 197 | * checkpointed. Thus calling jbd2_journal_invalidatepage() |
198 | * (via truncate_inode_pages()) to discard these buffers can | 198 | * (via truncate_inode_pages()) to discard these buffers can |
199 | * cause data loss. Also even if we did not discard these | 199 | * cause data loss. Also even if we did not discard these |
200 | * buffers, we would have no way to find them after the inode | 200 | * buffers, we would have no way to find them after the inode |
201 | * is reaped and thus user could see stale data if he tries to | 201 | * is reaped and thus user could see stale data if he tries to |
202 | * read them before the transaction is checkpointed. So be | 202 | * read them before the transaction is checkpointed. So be |
203 | * careful and force everything to disk here... We use | 203 | * careful and force everything to disk here... We use |
204 | * ei->i_datasync_tid to store the newest transaction | 204 | * ei->i_datasync_tid to store the newest transaction |
205 | * containing inode's data. | 205 | * containing inode's data. |
206 | * | 206 | * |
207 | * Note that directories do not have this problem because they | 207 | * Note that directories do not have this problem because they |
208 | * don't use page cache. | 208 | * don't use page cache. |
209 | */ | 209 | */ |
210 | if (ext4_should_journal_data(inode) && | 210 | if (ext4_should_journal_data(inode) && |
211 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && | 211 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && |
212 | inode->i_ino != EXT4_JOURNAL_INO) { | 212 | inode->i_ino != EXT4_JOURNAL_INO) { |
213 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 213 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
214 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; | 214 | tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; |
215 | 215 | ||
216 | jbd2_complete_transaction(journal, commit_tid); | 216 | jbd2_complete_transaction(journal, commit_tid); |
217 | filemap_write_and_wait(&inode->i_data); | 217 | filemap_write_and_wait(&inode->i_data); |
218 | } | 218 | } |
219 | truncate_inode_pages_final(&inode->i_data); | 219 | truncate_inode_pages_final(&inode->i_data); |
220 | 220 | ||
221 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | 221 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
222 | goto no_delete; | 222 | goto no_delete; |
223 | } | 223 | } |
224 | 224 | ||
225 | if (is_bad_inode(inode)) | 225 | if (is_bad_inode(inode)) |
226 | goto no_delete; | 226 | goto no_delete; |
227 | dquot_initialize(inode); | 227 | dquot_initialize(inode); |
228 | 228 | ||
229 | if (ext4_should_order_data(inode)) | 229 | if (ext4_should_order_data(inode)) |
230 | ext4_begin_ordered_truncate(inode, 0); | 230 | ext4_begin_ordered_truncate(inode, 0); |
231 | truncate_inode_pages_final(&inode->i_data); | 231 | truncate_inode_pages_final(&inode->i_data); |
232 | 232 | ||
233 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | 233 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Protect us against freezing - iput() caller didn't have to have any | 236 | * Protect us against freezing - iput() caller didn't have to have any |
237 | * protection against it | 237 | * protection against it |
238 | */ | 238 | */ |
239 | sb_start_intwrite(inode->i_sb); | 239 | sb_start_intwrite(inode->i_sb); |
240 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, | 240 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, |
241 | ext4_blocks_for_truncate(inode)+3); | 241 | ext4_blocks_for_truncate(inode)+3); |
242 | if (IS_ERR(handle)) { | 242 | if (IS_ERR(handle)) { |
243 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); | 243 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); |
244 | /* | 244 | /* |
245 | * If we're going to skip the normal cleanup, we still need to | 245 | * If we're going to skip the normal cleanup, we still need to |
246 | * make sure that the in-core orphan linked list is properly | 246 | * make sure that the in-core orphan linked list is properly |
247 | * cleaned up. | 247 | * cleaned up. |
248 | */ | 248 | */ |
249 | ext4_orphan_del(NULL, inode); | 249 | ext4_orphan_del(NULL, inode); |
250 | sb_end_intwrite(inode->i_sb); | 250 | sb_end_intwrite(inode->i_sb); |
251 | goto no_delete; | 251 | goto no_delete; |
252 | } | 252 | } |
253 | 253 | ||
254 | if (IS_SYNC(inode)) | 254 | if (IS_SYNC(inode)) |
255 | ext4_handle_sync(handle); | 255 | ext4_handle_sync(handle); |
256 | inode->i_size = 0; | 256 | inode->i_size = 0; |
257 | err = ext4_mark_inode_dirty(handle, inode); | 257 | err = ext4_mark_inode_dirty(handle, inode); |
258 | if (err) { | 258 | if (err) { |
259 | ext4_warning(inode->i_sb, | 259 | ext4_warning(inode->i_sb, |
260 | "couldn't mark inode dirty (err %d)", err); | 260 | "couldn't mark inode dirty (err %d)", err); |
261 | goto stop_handle; | 261 | goto stop_handle; |
262 | } | 262 | } |
263 | if (inode->i_blocks) | 263 | if (inode->i_blocks) |
264 | ext4_truncate(inode); | 264 | ext4_truncate(inode); |
265 | 265 | ||
266 | /* | 266 | /* |
267 | * ext4_ext_truncate() doesn't reserve any slop when it | 267 | * ext4_ext_truncate() doesn't reserve any slop when it |
268 | * restarts journal transactions; therefore there may not be | 268 | * restarts journal transactions; therefore there may not be |
269 | * enough credits left in the handle to remove the inode from | 269 | * enough credits left in the handle to remove the inode from |
270 | * the orphan list and set the dtime field. | 270 | * the orphan list and set the dtime field. |
271 | */ | 271 | */ |
272 | if (!ext4_handle_has_enough_credits(handle, 3)) { | 272 | if (!ext4_handle_has_enough_credits(handle, 3)) { |
273 | err = ext4_journal_extend(handle, 3); | 273 | err = ext4_journal_extend(handle, 3); |
274 | if (err > 0) | 274 | if (err > 0) |
275 | err = ext4_journal_restart(handle, 3); | 275 | err = ext4_journal_restart(handle, 3); |
276 | if (err != 0) { | 276 | if (err != 0) { |
277 | ext4_warning(inode->i_sb, | 277 | ext4_warning(inode->i_sb, |
278 | "couldn't extend journal (err %d)", err); | 278 | "couldn't extend journal (err %d)", err); |
279 | stop_handle: | 279 | stop_handle: |
280 | ext4_journal_stop(handle); | 280 | ext4_journal_stop(handle); |
281 | ext4_orphan_del(NULL, inode); | 281 | ext4_orphan_del(NULL, inode); |
282 | sb_end_intwrite(inode->i_sb); | 282 | sb_end_intwrite(inode->i_sb); |
283 | goto no_delete; | 283 | goto no_delete; |
284 | } | 284 | } |
285 | } | 285 | } |
286 | 286 | ||
287 | /* | 287 | /* |
288 | * Kill off the orphan record which ext4_truncate created. | 288 | * Kill off the orphan record which ext4_truncate created. |
289 | * AKPM: I think this can be inside the above `if'. | 289 | * AKPM: I think this can be inside the above `if'. |
290 | * Note that ext4_orphan_del() has to be able to cope with the | 290 | * Note that ext4_orphan_del() has to be able to cope with the |
291 | * deletion of a non-existent orphan - this is because we don't | 291 | * deletion of a non-existent orphan - this is because we don't |
292 | * know if ext4_truncate() actually created an orphan record. | 292 | * know if ext4_truncate() actually created an orphan record. |
293 | * (Well, we could do this if we need to, but heck - it works) | 293 | * (Well, we could do this if we need to, but heck - it works) |
294 | */ | 294 | */ |
295 | ext4_orphan_del(handle, inode); | 295 | ext4_orphan_del(handle, inode); |
296 | EXT4_I(inode)->i_dtime = get_seconds(); | 296 | EXT4_I(inode)->i_dtime = get_seconds(); |
297 | 297 | ||
298 | /* | 298 | /* |
299 | * One subtle ordering requirement: if anything has gone wrong | 299 | * One subtle ordering requirement: if anything has gone wrong |
300 | * (transaction abort, IO errors, whatever), then we can still | 300 | * (transaction abort, IO errors, whatever), then we can still |
301 | * do these next steps (the fs will already have been marked as | 301 | * do these next steps (the fs will already have been marked as |
302 | * having errors), but we can't free the inode if the mark_dirty | 302 | * having errors), but we can't free the inode if the mark_dirty |
303 | * fails. | 303 | * fails. |
304 | */ | 304 | */ |
305 | if (ext4_mark_inode_dirty(handle, inode)) | 305 | if (ext4_mark_inode_dirty(handle, inode)) |
306 | /* If that failed, just do the required in-core inode clear. */ | 306 | /* If that failed, just do the required in-core inode clear. */ |
307 | ext4_clear_inode(inode); | 307 | ext4_clear_inode(inode); |
308 | else | 308 | else |
309 | ext4_free_inode(handle, inode); | 309 | ext4_free_inode(handle, inode); |
310 | ext4_journal_stop(handle); | 310 | ext4_journal_stop(handle); |
311 | sb_end_intwrite(inode->i_sb); | 311 | sb_end_intwrite(inode->i_sb); |
312 | return; | 312 | return; |
313 | no_delete: | 313 | no_delete: |
314 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ | 314 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ |
315 | } | 315 | } |
316 | 316 | ||
317 | #ifdef CONFIG_QUOTA | 317 | #ifdef CONFIG_QUOTA |
318 | qsize_t *ext4_get_reserved_space(struct inode *inode) | 318 | qsize_t *ext4_get_reserved_space(struct inode *inode) |
319 | { | 319 | { |
320 | return &EXT4_I(inode)->i_reserved_quota; | 320 | return &EXT4_I(inode)->i_reserved_quota; |
321 | } | 321 | } |
322 | #endif | 322 | #endif |
323 | 323 | ||
324 | /* | 324 | /* |
325 | * Called with i_data_sem down, which is important since we can call | 325 | * Called with i_data_sem down, which is important since we can call |
326 | * ext4_discard_preallocations() from here. | 326 | * ext4_discard_preallocations() from here. |
327 | */ | 327 | */ |
328 | void ext4_da_update_reserve_space(struct inode *inode, | 328 | void ext4_da_update_reserve_space(struct inode *inode, |
329 | int used, int quota_claim) | 329 | int used, int quota_claim) |
330 | { | 330 | { |
331 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 331 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
332 | struct ext4_inode_info *ei = EXT4_I(inode); | 332 | struct ext4_inode_info *ei = EXT4_I(inode); |
333 | 333 | ||
334 | spin_lock(&ei->i_block_reservation_lock); | 334 | spin_lock(&ei->i_block_reservation_lock); |
335 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); | 335 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); |
336 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 336 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
337 | ext4_warning(inode->i_sb, "%s: ino %lu, used %d " | 337 | ext4_warning(inode->i_sb, "%s: ino %lu, used %d " |
338 | "with only %d reserved data blocks", | 338 | "with only %d reserved data blocks", |
339 | __func__, inode->i_ino, used, | 339 | __func__, inode->i_ino, used, |
340 | ei->i_reserved_data_blocks); | 340 | ei->i_reserved_data_blocks); |
341 | WARN_ON(1); | 341 | WARN_ON(1); |
342 | used = ei->i_reserved_data_blocks; | 342 | used = ei->i_reserved_data_blocks; |
343 | } | 343 | } |
344 | 344 | ||
345 | /* Update per-inode reservations */ | 345 | /* Update per-inode reservations */ |
346 | ei->i_reserved_data_blocks -= used; | 346 | ei->i_reserved_data_blocks -= used; |
347 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, used); | 347 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, used); |
348 | 348 | ||
349 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 349 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
350 | 350 | ||
351 | /* Update quota subsystem for data blocks */ | 351 | /* Update quota subsystem for data blocks */ |
352 | if (quota_claim) | 352 | if (quota_claim) |
353 | dquot_claim_block(inode, EXT4_C2B(sbi, used)); | 353 | dquot_claim_block(inode, EXT4_C2B(sbi, used)); |
354 | else { | 354 | else { |
355 | /* | 355 | /* |
356 | * We did fallocate with an offset that is already delayed | 356 | * We did fallocate with an offset that is already delayed |
357 | * allocated. So on delayed allocated writeback we should | 357 | * allocated. So on delayed allocated writeback we should |
358 | * not re-claim the quota for fallocated blocks. | 358 | * not re-claim the quota for fallocated blocks. |
359 | */ | 359 | */ |
360 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); | 360 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); |
361 | } | 361 | } |
362 | 362 | ||
363 | /* | 363 | /* |
364 | * If we have done all the pending block allocations and if | 364 | * If we have done all the pending block allocations and if |
365 | * there aren't any writers on the inode, we can discard the | 365 | * there aren't any writers on the inode, we can discard the |
366 | * inode's preallocations. | 366 | * inode's preallocations. |
367 | */ | 367 | */ |
368 | if ((ei->i_reserved_data_blocks == 0) && | 368 | if ((ei->i_reserved_data_blocks == 0) && |
369 | (atomic_read(&inode->i_writecount) == 0)) | 369 | (atomic_read(&inode->i_writecount) == 0)) |
370 | ext4_discard_preallocations(inode); | 370 | ext4_discard_preallocations(inode); |
371 | } | 371 | } |
372 | 372 | ||
373 | static int __check_block_validity(struct inode *inode, const char *func, | 373 | static int __check_block_validity(struct inode *inode, const char *func, |
374 | unsigned int line, | 374 | unsigned int line, |
375 | struct ext4_map_blocks *map) | 375 | struct ext4_map_blocks *map) |
376 | { | 376 | { |
377 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, | 377 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, |
378 | map->m_len)) { | 378 | map->m_len)) { |
379 | ext4_error_inode(inode, func, line, map->m_pblk, | 379 | ext4_error_inode(inode, func, line, map->m_pblk, |
380 | "lblock %lu mapped to illegal pblock " | 380 | "lblock %lu mapped to illegal pblock " |
381 | "(length %d)", (unsigned long) map->m_lblk, | 381 | "(length %d)", (unsigned long) map->m_lblk, |
382 | map->m_len); | 382 | map->m_len); |
383 | return -EIO; | 383 | return -EIO; |
384 | } | 384 | } |
385 | return 0; | 385 | return 0; |
386 | } | 386 | } |
387 | 387 | ||
388 | #define check_block_validity(inode, map) \ | 388 | #define check_block_validity(inode, map) \ |
389 | __check_block_validity((inode), __func__, __LINE__, (map)) | 389 | __check_block_validity((inode), __func__, __LINE__, (map)) |
390 | 390 | ||
391 | #ifdef ES_AGGRESSIVE_TEST | 391 | #ifdef ES_AGGRESSIVE_TEST |
392 | static void ext4_map_blocks_es_recheck(handle_t *handle, | 392 | static void ext4_map_blocks_es_recheck(handle_t *handle, |
393 | struct inode *inode, | 393 | struct inode *inode, |
394 | struct ext4_map_blocks *es_map, | 394 | struct ext4_map_blocks *es_map, |
395 | struct ext4_map_blocks *map, | 395 | struct ext4_map_blocks *map, |
396 | int flags) | 396 | int flags) |
397 | { | 397 | { |
398 | int retval; | 398 | int retval; |
399 | 399 | ||
400 | map->m_flags = 0; | 400 | map->m_flags = 0; |
401 | /* | 401 | /* |
402 | * There is a race window that the result is not the same. | 402 | * There is a race window that the result is not the same. |
403 | * e.g. xfstests #223 when dioread_nolock enables. The reason | 403 | * e.g. xfstests #223 when dioread_nolock enables. The reason |
404 | * is that we lookup a block mapping in extent status tree with | 404 | * is that we lookup a block mapping in extent status tree with |
405 | * out taking i_data_sem. So at the time the unwritten extent | 405 | * out taking i_data_sem. So at the time the unwritten extent |
406 | * could be converted. | 406 | * could be converted. |
407 | */ | 407 | */ |
408 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 408 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) |
409 | down_read(&EXT4_I(inode)->i_data_sem); | 409 | down_read(&EXT4_I(inode)->i_data_sem); |
410 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 410 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
411 | retval = ext4_ext_map_blocks(handle, inode, map, flags & | 411 | retval = ext4_ext_map_blocks(handle, inode, map, flags & |
412 | EXT4_GET_BLOCKS_KEEP_SIZE); | 412 | EXT4_GET_BLOCKS_KEEP_SIZE); |
413 | } else { | 413 | } else { |
414 | retval = ext4_ind_map_blocks(handle, inode, map, flags & | 414 | retval = ext4_ind_map_blocks(handle, inode, map, flags & |
415 | EXT4_GET_BLOCKS_KEEP_SIZE); | 415 | EXT4_GET_BLOCKS_KEEP_SIZE); |
416 | } | 416 | } |
417 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 417 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) |
418 | up_read((&EXT4_I(inode)->i_data_sem)); | 418 | up_read((&EXT4_I(inode)->i_data_sem)); |
419 | 419 | ||
420 | /* | 420 | /* |
421 | * We don't check m_len because extent will be collpased in status | 421 | * We don't check m_len because extent will be collpased in status |
422 | * tree. So the m_len might not equal. | 422 | * tree. So the m_len might not equal. |
423 | */ | 423 | */ |
424 | if (es_map->m_lblk != map->m_lblk || | 424 | if (es_map->m_lblk != map->m_lblk || |
425 | es_map->m_flags != map->m_flags || | 425 | es_map->m_flags != map->m_flags || |
426 | es_map->m_pblk != map->m_pblk) { | 426 | es_map->m_pblk != map->m_pblk) { |
427 | printk("ES cache assertion failed for inode: %lu " | 427 | printk("ES cache assertion failed for inode: %lu " |
428 | "es_cached ex [%d/%d/%llu/%x] != " | 428 | "es_cached ex [%d/%d/%llu/%x] != " |
429 | "found ex [%d/%d/%llu/%x] retval %d flags %x\n", | 429 | "found ex [%d/%d/%llu/%x] retval %d flags %x\n", |
430 | inode->i_ino, es_map->m_lblk, es_map->m_len, | 430 | inode->i_ino, es_map->m_lblk, es_map->m_len, |
431 | es_map->m_pblk, es_map->m_flags, map->m_lblk, | 431 | es_map->m_pblk, es_map->m_flags, map->m_lblk, |
432 | map->m_len, map->m_pblk, map->m_flags, | 432 | map->m_len, map->m_pblk, map->m_flags, |
433 | retval, flags); | 433 | retval, flags); |
434 | } | 434 | } |
435 | } | 435 | } |
436 | #endif /* ES_AGGRESSIVE_TEST */ | 436 | #endif /* ES_AGGRESSIVE_TEST */ |
437 | 437 | ||
438 | /* | 438 | /* |
439 | * The ext4_map_blocks() function tries to look up the requested blocks, | 439 | * The ext4_map_blocks() function tries to look up the requested blocks, |
440 | * and returns if the blocks are already mapped. | 440 | * and returns if the blocks are already mapped. |
441 | * | 441 | * |
442 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks | 442 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks |
443 | * and store the allocated blocks in the result buffer head and mark it | 443 | * and store the allocated blocks in the result buffer head and mark it |
444 | * mapped. | 444 | * mapped. |
445 | * | 445 | * |
446 | * If file type is extents based, it will call ext4_ext_map_blocks(), | 446 | * If file type is extents based, it will call ext4_ext_map_blocks(), |
447 | * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping | 447 | * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping |
448 | * based files | 448 | * based files |
449 | * | 449 | * |
450 | * On success, it returns the number of blocks being mapped or allocated. | 450 | * On success, it returns the number of blocks being mapped or allocated. |
451 | * if create==0 and the blocks are pre-allocated and unwritten block, | 451 | * if create==0 and the blocks are pre-allocated and unwritten block, |
452 | * the result buffer head is unmapped. If the create ==1, it will make sure | 452 | * the result buffer head is unmapped. If the create ==1, it will make sure |
453 | * the buffer head is mapped. | 453 | * the buffer head is mapped. |
454 | * | 454 | * |
455 | * It returns 0 if plain look up failed (blocks have not been allocated), in | 455 | * It returns 0 if plain look up failed (blocks have not been allocated), in |
456 | * that case, buffer head is unmapped | 456 | * that case, buffer head is unmapped |
457 | * | 457 | * |
458 | * It returns the error in case of allocation failure. | 458 | * It returns the error in case of allocation failure. |
459 | */ | 459 | */ |
460 | int ext4_map_blocks(handle_t *handle, struct inode *inode, | 460 | int ext4_map_blocks(handle_t *handle, struct inode *inode, |
461 | struct ext4_map_blocks *map, int flags) | 461 | struct ext4_map_blocks *map, int flags) |
462 | { | 462 | { |
463 | struct extent_status es; | 463 | struct extent_status es; |
464 | int retval; | 464 | int retval; |
465 | int ret = 0; | 465 | int ret = 0; |
466 | #ifdef ES_AGGRESSIVE_TEST | 466 | #ifdef ES_AGGRESSIVE_TEST |
467 | struct ext4_map_blocks orig_map; | 467 | struct ext4_map_blocks orig_map; |
468 | 468 | ||
469 | memcpy(&orig_map, map, sizeof(*map)); | 469 | memcpy(&orig_map, map, sizeof(*map)); |
470 | #endif | 470 | #endif |
471 | 471 | ||
472 | map->m_flags = 0; | 472 | map->m_flags = 0; |
473 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," | 473 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," |
474 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | 474 | "logical block %lu\n", inode->i_ino, flags, map->m_len, |
475 | (unsigned long) map->m_lblk); | 475 | (unsigned long) map->m_lblk); |
476 | 476 | ||
477 | /* | 477 | /* |
478 | * ext4_map_blocks returns an int, and m_len is an unsigned int | 478 | * ext4_map_blocks returns an int, and m_len is an unsigned int |
479 | */ | 479 | */ |
480 | if (unlikely(map->m_len > INT_MAX)) | 480 | if (unlikely(map->m_len > INT_MAX)) |
481 | map->m_len = INT_MAX; | 481 | map->m_len = INT_MAX; |
482 | 482 | ||
483 | /* We can handle the block number less than EXT_MAX_BLOCKS */ | 483 | /* We can handle the block number less than EXT_MAX_BLOCKS */ |
484 | if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS)) | 484 | if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS)) |
485 | return -EIO; | 485 | return -EIO; |
486 | 486 | ||
487 | /* Lookup extent status tree firstly */ | 487 | /* Lookup extent status tree firstly */ |
488 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 488 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
489 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 489 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
490 | map->m_pblk = ext4_es_pblock(&es) + | 490 | map->m_pblk = ext4_es_pblock(&es) + |
491 | map->m_lblk - es.es_lblk; | 491 | map->m_lblk - es.es_lblk; |
492 | map->m_flags |= ext4_es_is_written(&es) ? | 492 | map->m_flags |= ext4_es_is_written(&es) ? |
493 | EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN; | 493 | EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN; |
494 | retval = es.es_len - (map->m_lblk - es.es_lblk); | 494 | retval = es.es_len - (map->m_lblk - es.es_lblk); |
495 | if (retval > map->m_len) | 495 | if (retval > map->m_len) |
496 | retval = map->m_len; | 496 | retval = map->m_len; |
497 | map->m_len = retval; | 497 | map->m_len = retval; |
498 | } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { | 498 | } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { |
499 | retval = 0; | 499 | retval = 0; |
500 | } else { | 500 | } else { |
501 | BUG_ON(1); | 501 | BUG_ON(1); |
502 | } | 502 | } |
503 | #ifdef ES_AGGRESSIVE_TEST | 503 | #ifdef ES_AGGRESSIVE_TEST |
504 | ext4_map_blocks_es_recheck(handle, inode, map, | 504 | ext4_map_blocks_es_recheck(handle, inode, map, |
505 | &orig_map, flags); | 505 | &orig_map, flags); |
506 | #endif | 506 | #endif |
507 | goto found; | 507 | goto found; |
508 | } | 508 | } |
509 | 509 | ||
510 | /* | 510 | /* |
511 | * Try to see if we can get the block without requesting a new | 511 | * Try to see if we can get the block without requesting a new |
512 | * file system block. | 512 | * file system block. |
513 | */ | 513 | */ |
514 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 514 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) |
515 | down_read(&EXT4_I(inode)->i_data_sem); | 515 | down_read(&EXT4_I(inode)->i_data_sem); |
516 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 516 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
517 | retval = ext4_ext_map_blocks(handle, inode, map, flags & | 517 | retval = ext4_ext_map_blocks(handle, inode, map, flags & |
518 | EXT4_GET_BLOCKS_KEEP_SIZE); | 518 | EXT4_GET_BLOCKS_KEEP_SIZE); |
519 | } else { | 519 | } else { |
520 | retval = ext4_ind_map_blocks(handle, inode, map, flags & | 520 | retval = ext4_ind_map_blocks(handle, inode, map, flags & |
521 | EXT4_GET_BLOCKS_KEEP_SIZE); | 521 | EXT4_GET_BLOCKS_KEEP_SIZE); |
522 | } | 522 | } |
523 | if (retval > 0) { | 523 | if (retval > 0) { |
524 | unsigned int status; | 524 | unsigned int status; |
525 | 525 | ||
526 | if (unlikely(retval != map->m_len)) { | 526 | if (unlikely(retval != map->m_len)) { |
527 | ext4_warning(inode->i_sb, | 527 | ext4_warning(inode->i_sb, |
528 | "ES len assertion failed for inode " | 528 | "ES len assertion failed for inode " |
529 | "%lu: retval %d != map->m_len %d", | 529 | "%lu: retval %d != map->m_len %d", |
530 | inode->i_ino, retval, map->m_len); | 530 | inode->i_ino, retval, map->m_len); |
531 | WARN_ON(1); | 531 | WARN_ON(1); |
532 | } | 532 | } |
533 | 533 | ||
534 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 534 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
535 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 535 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
536 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && | 536 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && |
537 | ext4_find_delalloc_range(inode, map->m_lblk, | 537 | ext4_find_delalloc_range(inode, map->m_lblk, |
538 | map->m_lblk + map->m_len - 1)) | 538 | map->m_lblk + map->m_len - 1)) |
539 | status |= EXTENT_STATUS_DELAYED; | 539 | status |= EXTENT_STATUS_DELAYED; |
540 | ret = ext4_es_insert_extent(inode, map->m_lblk, | 540 | ret = ext4_es_insert_extent(inode, map->m_lblk, |
541 | map->m_len, map->m_pblk, status); | 541 | map->m_len, map->m_pblk, status); |
542 | if (ret < 0) | 542 | if (ret < 0) |
543 | retval = ret; | 543 | retval = ret; |
544 | } | 544 | } |
545 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) | 545 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) |
546 | up_read((&EXT4_I(inode)->i_data_sem)); | 546 | up_read((&EXT4_I(inode)->i_data_sem)); |
547 | 547 | ||
548 | found: | 548 | found: |
549 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 549 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
550 | ret = check_block_validity(inode, map); | 550 | ret = check_block_validity(inode, map); |
551 | if (ret != 0) | 551 | if (ret != 0) |
552 | return ret; | 552 | return ret; |
553 | } | 553 | } |
554 | 554 | ||
555 | /* If it is only a block(s) look up */ | 555 | /* If it is only a block(s) look up */ |
556 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) | 556 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) |
557 | return retval; | 557 | return retval; |
558 | 558 | ||
559 | /* | 559 | /* |
560 | * Returns if the blocks have already allocated | 560 | * Returns if the blocks have already allocated |
561 | * | 561 | * |
562 | * Note that if blocks have been preallocated | 562 | * Note that if blocks have been preallocated |
563 | * ext4_ext_get_block() returns the create = 0 | 563 | * ext4_ext_get_block() returns the create = 0 |
564 | * with buffer head unmapped. | 564 | * with buffer head unmapped. |
565 | */ | 565 | */ |
566 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | 566 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
567 | /* | 567 | /* |
568 | * If we need to convert extent to unwritten | 568 | * If we need to convert extent to unwritten |
569 | * we continue and do the actual work in | 569 | * we continue and do the actual work in |
570 | * ext4_ext_map_blocks() | 570 | * ext4_ext_map_blocks() |
571 | */ | 571 | */ |
572 | if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) | 572 | if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) |
573 | return retval; | 573 | return retval; |
574 | 574 | ||
575 | /* | 575 | /* |
576 | * Here we clear m_flags because after allocating an new extent, | 576 | * Here we clear m_flags because after allocating an new extent, |
577 | * it will be set again. | 577 | * it will be set again. |
578 | */ | 578 | */ |
579 | map->m_flags &= ~EXT4_MAP_FLAGS; | 579 | map->m_flags &= ~EXT4_MAP_FLAGS; |
580 | 580 | ||
581 | /* | 581 | /* |
582 | * New blocks allocate and/or writing to unwritten extent | 582 | * New blocks allocate and/or writing to unwritten extent |
583 | * will possibly result in updating i_data, so we take | 583 | * will possibly result in updating i_data, so we take |
584 | * the write lock of i_data_sem, and call get_block() | 584 | * the write lock of i_data_sem, and call get_block() |
585 | * with create == 1 flag. | 585 | * with create == 1 flag. |
586 | */ | 586 | */ |
587 | down_write(&EXT4_I(inode)->i_data_sem); | 587 | down_write(&EXT4_I(inode)->i_data_sem); |
588 | 588 | ||
589 | /* | 589 | /* |
590 | * We need to check for EXT4 here because migrate | 590 | * We need to check for EXT4 here because migrate |
591 | * could have changed the inode type in between | 591 | * could have changed the inode type in between |
592 | */ | 592 | */ |
593 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 593 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
594 | retval = ext4_ext_map_blocks(handle, inode, map, flags); | 594 | retval = ext4_ext_map_blocks(handle, inode, map, flags); |
595 | } else { | 595 | } else { |
596 | retval = ext4_ind_map_blocks(handle, inode, map, flags); | 596 | retval = ext4_ind_map_blocks(handle, inode, map, flags); |
597 | 597 | ||
598 | if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { | 598 | if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { |
599 | /* | 599 | /* |
600 | * We allocated new blocks which will result in | 600 | * We allocated new blocks which will result in |
601 | * i_data's format changing. Force the migrate | 601 | * i_data's format changing. Force the migrate |
602 | * to fail by clearing migrate flags | 602 | * to fail by clearing migrate flags |
603 | */ | 603 | */ |
604 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); | 604 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); |
605 | } | 605 | } |
606 | 606 | ||
607 | /* | 607 | /* |
608 | * Update reserved blocks/metadata blocks after successful | 608 | * Update reserved blocks/metadata blocks after successful |
609 | * block allocation which had been deferred till now. We don't | 609 | * block allocation which had been deferred till now. We don't |
610 | * support fallocate for non extent files. So we can update | 610 | * support fallocate for non extent files. So we can update |
611 | * reserve space here. | 611 | * reserve space here. |
612 | */ | 612 | */ |
613 | if ((retval > 0) && | 613 | if ((retval > 0) && |
614 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | 614 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) |
615 | ext4_da_update_reserve_space(inode, retval, 1); | 615 | ext4_da_update_reserve_space(inode, retval, 1); |
616 | } | 616 | } |
617 | 617 | ||
618 | if (retval > 0) { | 618 | if (retval > 0) { |
619 | unsigned int status; | 619 | unsigned int status; |
620 | 620 | ||
621 | if (unlikely(retval != map->m_len)) { | 621 | if (unlikely(retval != map->m_len)) { |
622 | ext4_warning(inode->i_sb, | 622 | ext4_warning(inode->i_sb, |
623 | "ES len assertion failed for inode " | 623 | "ES len assertion failed for inode " |
624 | "%lu: retval %d != map->m_len %d", | 624 | "%lu: retval %d != map->m_len %d", |
625 | inode->i_ino, retval, map->m_len); | 625 | inode->i_ino, retval, map->m_len); |
626 | WARN_ON(1); | 626 | WARN_ON(1); |
627 | } | 627 | } |
628 | 628 | ||
629 | /* | 629 | /* |
630 | * If the extent has been zeroed out, we don't need to update | 630 | * If the extent has been zeroed out, we don't need to update |
631 | * extent status tree. | 631 | * extent status tree. |
632 | */ | 632 | */ |
633 | if ((flags & EXT4_GET_BLOCKS_PRE_IO) && | 633 | if ((flags & EXT4_GET_BLOCKS_PRE_IO) && |
634 | ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 634 | ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
635 | if (ext4_es_is_written(&es)) | 635 | if (ext4_es_is_written(&es)) |
636 | goto has_zeroout; | 636 | goto has_zeroout; |
637 | } | 637 | } |
638 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 638 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
639 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 639 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
640 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && | 640 | if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && |
641 | ext4_find_delalloc_range(inode, map->m_lblk, | 641 | ext4_find_delalloc_range(inode, map->m_lblk, |
642 | map->m_lblk + map->m_len - 1)) | 642 | map->m_lblk + map->m_len - 1)) |
643 | status |= EXTENT_STATUS_DELAYED; | 643 | status |= EXTENT_STATUS_DELAYED; |
644 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | 644 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
645 | map->m_pblk, status); | 645 | map->m_pblk, status); |
646 | if (ret < 0) | 646 | if (ret < 0) |
647 | retval = ret; | 647 | retval = ret; |
648 | } | 648 | } |
649 | 649 | ||
650 | has_zeroout: | 650 | has_zeroout: |
651 | up_write((&EXT4_I(inode)->i_data_sem)); | 651 | up_write((&EXT4_I(inode)->i_data_sem)); |
652 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 652 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
653 | ret = check_block_validity(inode, map); | 653 | ret = check_block_validity(inode, map); |
654 | if (ret != 0) | 654 | if (ret != 0) |
655 | return ret; | 655 | return ret; |
656 | } | 656 | } |
657 | return retval; | 657 | return retval; |
658 | } | 658 | } |
659 | 659 | ||
660 | static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) | 660 | static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate) |
661 | { | 661 | { |
662 | struct inode *inode = bh->b_assoc_map->host; | 662 | struct inode *inode = bh->b_assoc_map->host; |
663 | /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ | 663 | /* XXX: breaks on 32-bit > 16GB. Is that even supported? */ |
664 | loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; | 664 | loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits; |
665 | int err; | 665 | int err; |
666 | if (!uptodate) | 666 | if (!uptodate) |
667 | return; | 667 | return; |
668 | WARN_ON(!buffer_unwritten(bh)); | 668 | WARN_ON(!buffer_unwritten(bh)); |
669 | err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); | 669 | err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size); |
670 | } | 670 | } |
671 | 671 | ||
672 | /* Maximum number of blocks we map for direct IO at once. */ | 672 | /* Maximum number of blocks we map for direct IO at once. */ |
673 | #define DIO_MAX_BLOCKS 4096 | 673 | #define DIO_MAX_BLOCKS 4096 |
674 | 674 | ||
675 | static int _ext4_get_block(struct inode *inode, sector_t iblock, | 675 | static int _ext4_get_block(struct inode *inode, sector_t iblock, |
676 | struct buffer_head *bh, int flags) | 676 | struct buffer_head *bh, int flags) |
677 | { | 677 | { |
678 | handle_t *handle = ext4_journal_current_handle(); | 678 | handle_t *handle = ext4_journal_current_handle(); |
679 | struct ext4_map_blocks map; | 679 | struct ext4_map_blocks map; |
680 | int ret = 0, started = 0; | 680 | int ret = 0, started = 0; |
681 | int dio_credits; | 681 | int dio_credits; |
682 | 682 | ||
683 | if (ext4_has_inline_data(inode)) | 683 | if (ext4_has_inline_data(inode)) |
684 | return -ERANGE; | 684 | return -ERANGE; |
685 | 685 | ||
686 | map.m_lblk = iblock; | 686 | map.m_lblk = iblock; |
687 | map.m_len = bh->b_size >> inode->i_blkbits; | 687 | map.m_len = bh->b_size >> inode->i_blkbits; |
688 | 688 | ||
689 | if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) { | 689 | if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) { |
690 | /* Direct IO write... */ | 690 | /* Direct IO write... */ |
691 | if (map.m_len > DIO_MAX_BLOCKS) | 691 | if (map.m_len > DIO_MAX_BLOCKS) |
692 | map.m_len = DIO_MAX_BLOCKS; | 692 | map.m_len = DIO_MAX_BLOCKS; |
693 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); | 693 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); |
694 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | 694 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
695 | dio_credits); | 695 | dio_credits); |
696 | if (IS_ERR(handle)) { | 696 | if (IS_ERR(handle)) { |
697 | ret = PTR_ERR(handle); | 697 | ret = PTR_ERR(handle); |
698 | return ret; | 698 | return ret; |
699 | } | 699 | } |
700 | started = 1; | 700 | started = 1; |
701 | } | 701 | } |
702 | 702 | ||
703 | ret = ext4_map_blocks(handle, inode, &map, flags); | 703 | ret = ext4_map_blocks(handle, inode, &map, flags); |
704 | if (ret > 0) { | 704 | if (ret > 0) { |
705 | ext4_io_end_t *io_end = ext4_inode_aio(inode); | 705 | ext4_io_end_t *io_end = ext4_inode_aio(inode); |
706 | 706 | ||
707 | map_bh(bh, inode->i_sb, map.m_pblk); | 707 | map_bh(bh, inode->i_sb, map.m_pblk); |
708 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 708 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
709 | if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { | 709 | if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { |
710 | bh->b_assoc_map = inode->i_mapping; | 710 | bh->b_assoc_map = inode->i_mapping; |
711 | bh->b_private = (void *)(unsigned long)iblock; | 711 | bh->b_private = (void *)(unsigned long)iblock; |
712 | bh->b_end_io = ext4_end_io_unwritten; | 712 | bh->b_end_io = ext4_end_io_unwritten; |
713 | } | 713 | } |
714 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) | 714 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) |
715 | set_buffer_defer_completion(bh); | 715 | set_buffer_defer_completion(bh); |
716 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 716 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
717 | ret = 0; | 717 | ret = 0; |
718 | } | 718 | } |
719 | if (started) | 719 | if (started) |
720 | ext4_journal_stop(handle); | 720 | ext4_journal_stop(handle); |
721 | return ret; | 721 | return ret; |
722 | } | 722 | } |
723 | 723 | ||
724 | int ext4_get_block(struct inode *inode, sector_t iblock, | 724 | int ext4_get_block(struct inode *inode, sector_t iblock, |
725 | struct buffer_head *bh, int create) | 725 | struct buffer_head *bh, int create) |
726 | { | 726 | { |
727 | return _ext4_get_block(inode, iblock, bh, | 727 | return _ext4_get_block(inode, iblock, bh, |
728 | create ? EXT4_GET_BLOCKS_CREATE : 0); | 728 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
729 | } | 729 | } |
730 | 730 | ||
731 | /* | 731 | /* |
732 | * `handle' can be NULL if create is zero | 732 | * `handle' can be NULL if create is zero |
733 | */ | 733 | */ |
734 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | 734 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, |
735 | ext4_lblk_t block, int create) | 735 | ext4_lblk_t block, int create) |
736 | { | 736 | { |
737 | struct ext4_map_blocks map; | 737 | struct ext4_map_blocks map; |
738 | struct buffer_head *bh; | 738 | struct buffer_head *bh; |
739 | int err; | 739 | int err; |
740 | 740 | ||
741 | J_ASSERT(handle != NULL || create == 0); | 741 | J_ASSERT(handle != NULL || create == 0); |
742 | 742 | ||
743 | map.m_lblk = block; | 743 | map.m_lblk = block; |
744 | map.m_len = 1; | 744 | map.m_len = 1; |
745 | err = ext4_map_blocks(handle, inode, &map, | 745 | err = ext4_map_blocks(handle, inode, &map, |
746 | create ? EXT4_GET_BLOCKS_CREATE : 0); | 746 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
747 | 747 | ||
748 | if (err == 0) | 748 | if (err == 0) |
749 | return create ? ERR_PTR(-ENOSPC) : NULL; | 749 | return create ? ERR_PTR(-ENOSPC) : NULL; |
750 | if (err < 0) | 750 | if (err < 0) |
751 | return ERR_PTR(err); | 751 | return ERR_PTR(err); |
752 | 752 | ||
753 | bh = sb_getblk(inode->i_sb, map.m_pblk); | 753 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
754 | if (unlikely(!bh)) | 754 | if (unlikely(!bh)) |
755 | return ERR_PTR(-ENOMEM); | 755 | return ERR_PTR(-ENOMEM); |
756 | if (map.m_flags & EXT4_MAP_NEW) { | 756 | if (map.m_flags & EXT4_MAP_NEW) { |
757 | J_ASSERT(create != 0); | 757 | J_ASSERT(create != 0); |
758 | J_ASSERT(handle != NULL); | 758 | J_ASSERT(handle != NULL); |
759 | 759 | ||
760 | /* | 760 | /* |
761 | * Now that we do not always journal data, we should | 761 | * Now that we do not always journal data, we should |
762 | * keep in mind whether this should always journal the | 762 | * keep in mind whether this should always journal the |
763 | * new buffer as metadata. For now, regular file | 763 | * new buffer as metadata. For now, regular file |
764 | * writes use ext4_get_block instead, so it's not a | 764 | * writes use ext4_get_block instead, so it's not a |
765 | * problem. | 765 | * problem. |
766 | */ | 766 | */ |
767 | lock_buffer(bh); | 767 | lock_buffer(bh); |
768 | BUFFER_TRACE(bh, "call get_create_access"); | 768 | BUFFER_TRACE(bh, "call get_create_access"); |
769 | err = ext4_journal_get_create_access(handle, bh); | 769 | err = ext4_journal_get_create_access(handle, bh); |
770 | if (unlikely(err)) { | 770 | if (unlikely(err)) { |
771 | unlock_buffer(bh); | 771 | unlock_buffer(bh); |
772 | goto errout; | 772 | goto errout; |
773 | } | 773 | } |
774 | if (!buffer_uptodate(bh)) { | 774 | if (!buffer_uptodate(bh)) { |
775 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); | 775 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
776 | set_buffer_uptodate(bh); | 776 | set_buffer_uptodate(bh); |
777 | } | 777 | } |
778 | unlock_buffer(bh); | 778 | unlock_buffer(bh); |
779 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 779 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
780 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 780 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
781 | if (unlikely(err)) | 781 | if (unlikely(err)) |
782 | goto errout; | 782 | goto errout; |
783 | } else | 783 | } else |
784 | BUFFER_TRACE(bh, "not a new buffer"); | 784 | BUFFER_TRACE(bh, "not a new buffer"); |
785 | return bh; | 785 | return bh; |
786 | errout: | 786 | errout: |
787 | brelse(bh); | 787 | brelse(bh); |
788 | return ERR_PTR(err); | 788 | return ERR_PTR(err); |
789 | } | 789 | } |
790 | 790 | ||
791 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 791 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
792 | ext4_lblk_t block, int create) | 792 | ext4_lblk_t block, int create) |
793 | { | 793 | { |
794 | struct buffer_head *bh; | 794 | struct buffer_head *bh; |
795 | 795 | ||
796 | bh = ext4_getblk(handle, inode, block, create); | 796 | bh = ext4_getblk(handle, inode, block, create); |
797 | if (IS_ERR(bh)) | 797 | if (IS_ERR(bh)) |
798 | return bh; | 798 | return bh; |
799 | if (!bh || buffer_uptodate(bh)) | 799 | if (!bh || buffer_uptodate(bh)) |
800 | return bh; | 800 | return bh; |
801 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); | 801 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
802 | wait_on_buffer(bh); | 802 | wait_on_buffer(bh); |
803 | if (buffer_uptodate(bh)) | 803 | if (buffer_uptodate(bh)) |
804 | return bh; | 804 | return bh; |
805 | put_bh(bh); | 805 | put_bh(bh); |
806 | return ERR_PTR(-EIO); | 806 | return ERR_PTR(-EIO); |
807 | } | 807 | } |
808 | 808 | ||
809 | int ext4_walk_page_buffers(handle_t *handle, | 809 | int ext4_walk_page_buffers(handle_t *handle, |
810 | struct buffer_head *head, | 810 | struct buffer_head *head, |
811 | unsigned from, | 811 | unsigned from, |
812 | unsigned to, | 812 | unsigned to, |
813 | int *partial, | 813 | int *partial, |
814 | int (*fn)(handle_t *handle, | 814 | int (*fn)(handle_t *handle, |
815 | struct buffer_head *bh)) | 815 | struct buffer_head *bh)) |
816 | { | 816 | { |
817 | struct buffer_head *bh; | 817 | struct buffer_head *bh; |
818 | unsigned block_start, block_end; | 818 | unsigned block_start, block_end; |
819 | unsigned blocksize = head->b_size; | 819 | unsigned blocksize = head->b_size; |
820 | int err, ret = 0; | 820 | int err, ret = 0; |
821 | struct buffer_head *next; | 821 | struct buffer_head *next; |
822 | 822 | ||
823 | for (bh = head, block_start = 0; | 823 | for (bh = head, block_start = 0; |
824 | ret == 0 && (bh != head || !block_start); | 824 | ret == 0 && (bh != head || !block_start); |
825 | block_start = block_end, bh = next) { | 825 | block_start = block_end, bh = next) { |
826 | next = bh->b_this_page; | 826 | next = bh->b_this_page; |
827 | block_end = block_start + blocksize; | 827 | block_end = block_start + blocksize; |
828 | if (block_end <= from || block_start >= to) { | 828 | if (block_end <= from || block_start >= to) { |
829 | if (partial && !buffer_uptodate(bh)) | 829 | if (partial && !buffer_uptodate(bh)) |
830 | *partial = 1; | 830 | *partial = 1; |
831 | continue; | 831 | continue; |
832 | } | 832 | } |
833 | err = (*fn)(handle, bh); | 833 | err = (*fn)(handle, bh); |
834 | if (!ret) | 834 | if (!ret) |
835 | ret = err; | 835 | ret = err; |
836 | } | 836 | } |
837 | return ret; | 837 | return ret; |
838 | } | 838 | } |
839 | 839 | ||
840 | /* | 840 | /* |
841 | * To preserve ordering, it is essential that the hole instantiation and | 841 | * To preserve ordering, it is essential that the hole instantiation and |
842 | * the data write be encapsulated in a single transaction. We cannot | 842 | * the data write be encapsulated in a single transaction. We cannot |
843 | * close off a transaction and start a new one between the ext4_get_block() | 843 | * close off a transaction and start a new one between the ext4_get_block() |
844 | * and the commit_write(). So doing the jbd2_journal_start at the start of | 844 | * and the commit_write(). So doing the jbd2_journal_start at the start of |
845 | * prepare_write() is the right place. | 845 | * prepare_write() is the right place. |
846 | * | 846 | * |
847 | * Also, this function can nest inside ext4_writepage(). In that case, we | 847 | * Also, this function can nest inside ext4_writepage(). In that case, we |
848 | * *know* that ext4_writepage() has generated enough buffer credits to do the | 848 | * *know* that ext4_writepage() has generated enough buffer credits to do the |
849 | * whole page. So we won't block on the journal in that case, which is good, | 849 | * whole page. So we won't block on the journal in that case, which is good, |
850 | * because the caller may be PF_MEMALLOC. | 850 | * because the caller may be PF_MEMALLOC. |
851 | * | 851 | * |
852 | * By accident, ext4 can be reentered when a transaction is open via | 852 | * By accident, ext4 can be reentered when a transaction is open via |
853 | * quota file writes. If we were to commit the transaction while thus | 853 | * quota file writes. If we were to commit the transaction while thus |
854 | * reentered, there can be a deadlock - we would be holding a quota | 854 | * reentered, there can be a deadlock - we would be holding a quota |
855 | * lock, and the commit would never complete if another thread had a | 855 | * lock, and the commit would never complete if another thread had a |
856 | * transaction open and was blocking on the quota lock - a ranking | 856 | * transaction open and was blocking on the quota lock - a ranking |
857 | * violation. | 857 | * violation. |
858 | * | 858 | * |
859 | * So what we do is to rely on the fact that jbd2_journal_stop/journal_start | 859 | * So what we do is to rely on the fact that jbd2_journal_stop/journal_start |
860 | * will _not_ run commit under these circumstances because handle->h_ref | 860 | * will _not_ run commit under these circumstances because handle->h_ref |
861 | * is elevated. We'll still have enough credits for the tiny quotafile | 861 | * is elevated. We'll still have enough credits for the tiny quotafile |
862 | * write. | 862 | * write. |
863 | */ | 863 | */ |
864 | int do_journal_get_write_access(handle_t *handle, | 864 | int do_journal_get_write_access(handle_t *handle, |
865 | struct buffer_head *bh) | 865 | struct buffer_head *bh) |
866 | { | 866 | { |
867 | int dirty = buffer_dirty(bh); | 867 | int dirty = buffer_dirty(bh); |
868 | int ret; | 868 | int ret; |
869 | 869 | ||
870 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 870 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
871 | return 0; | 871 | return 0; |
872 | /* | 872 | /* |
873 | * __block_write_begin() could have dirtied some buffers. Clean | 873 | * __block_write_begin() could have dirtied some buffers. Clean |
874 | * the dirty bit as jbd2_journal_get_write_access() could complain | 874 | * the dirty bit as jbd2_journal_get_write_access() could complain |
875 | * otherwise about fs integrity issues. Setting of the dirty bit | 875 | * otherwise about fs integrity issues. Setting of the dirty bit |
876 | * by __block_write_begin() isn't a real problem here as we clear | 876 | * by __block_write_begin() isn't a real problem here as we clear |
877 | * the bit before releasing a page lock and thus writeback cannot | 877 | * the bit before releasing a page lock and thus writeback cannot |
878 | * ever write the buffer. | 878 | * ever write the buffer. |
879 | */ | 879 | */ |
880 | if (dirty) | 880 | if (dirty) |
881 | clear_buffer_dirty(bh); | 881 | clear_buffer_dirty(bh); |
882 | BUFFER_TRACE(bh, "get write access"); | 882 | BUFFER_TRACE(bh, "get write access"); |
883 | ret = ext4_journal_get_write_access(handle, bh); | 883 | ret = ext4_journal_get_write_access(handle, bh); |
884 | if (!ret && dirty) | 884 | if (!ret && dirty) |
885 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); | 885 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); |
886 | return ret; | 886 | return ret; |
887 | } | 887 | } |
888 | 888 | ||
889 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, | 889 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, |
890 | struct buffer_head *bh_result, int create); | 890 | struct buffer_head *bh_result, int create); |
891 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 891 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
892 | loff_t pos, unsigned len, unsigned flags, | 892 | loff_t pos, unsigned len, unsigned flags, |
893 | struct page **pagep, void **fsdata) | 893 | struct page **pagep, void **fsdata) |
894 | { | 894 | { |
895 | struct inode *inode = mapping->host; | 895 | struct inode *inode = mapping->host; |
896 | int ret, needed_blocks; | 896 | int ret, needed_blocks; |
897 | handle_t *handle; | 897 | handle_t *handle; |
898 | int retries = 0; | 898 | int retries = 0; |
899 | struct page *page; | 899 | struct page *page; |
900 | pgoff_t index; | 900 | pgoff_t index; |
901 | unsigned from, to; | 901 | unsigned from, to; |
902 | 902 | ||
903 | trace_ext4_write_begin(inode, pos, len, flags); | 903 | trace_ext4_write_begin(inode, pos, len, flags); |
904 | /* | 904 | /* |
905 | * Reserve one block more for addition to orphan list in case | 905 | * Reserve one block more for addition to orphan list in case |
906 | * we allocate blocks but write fails for some reason | 906 | * we allocate blocks but write fails for some reason |
907 | */ | 907 | */ |
908 | needed_blocks = ext4_writepage_trans_blocks(inode) + 1; | 908 | needed_blocks = ext4_writepage_trans_blocks(inode) + 1; |
909 | index = pos >> PAGE_CACHE_SHIFT; | 909 | index = pos >> PAGE_CACHE_SHIFT; |
910 | from = pos & (PAGE_CACHE_SIZE - 1); | 910 | from = pos & (PAGE_CACHE_SIZE - 1); |
911 | to = from + len; | 911 | to = from + len; |
912 | 912 | ||
913 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { | 913 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { |
914 | ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, | 914 | ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, |
915 | flags, pagep); | 915 | flags, pagep); |
916 | if (ret < 0) | 916 | if (ret < 0) |
917 | return ret; | 917 | return ret; |
918 | if (ret == 1) | 918 | if (ret == 1) |
919 | return 0; | 919 | return 0; |
920 | } | 920 | } |
921 | 921 | ||
922 | /* | 922 | /* |
923 | * grab_cache_page_write_begin() can take a long time if the | 923 | * grab_cache_page_write_begin() can take a long time if the |
924 | * system is thrashing due to memory pressure, or if the page | 924 | * system is thrashing due to memory pressure, or if the page |
925 | * is being written back. So grab it first before we start | 925 | * is being written back. So grab it first before we start |
926 | * the transaction handle. This also allows us to allocate | 926 | * the transaction handle. This also allows us to allocate |
927 | * the page (if needed) without using GFP_NOFS. | 927 | * the page (if needed) without using GFP_NOFS. |
928 | */ | 928 | */ |
929 | retry_grab: | 929 | retry_grab: |
930 | page = grab_cache_page_write_begin(mapping, index, flags); | 930 | page = grab_cache_page_write_begin(mapping, index, flags); |
931 | if (!page) | 931 | if (!page) |
932 | return -ENOMEM; | 932 | return -ENOMEM; |
933 | unlock_page(page); | 933 | unlock_page(page); |
934 | 934 | ||
935 | retry_journal: | 935 | retry_journal: |
936 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); | 936 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); |
937 | if (IS_ERR(handle)) { | 937 | if (IS_ERR(handle)) { |
938 | page_cache_release(page); | 938 | page_cache_release(page); |
939 | return PTR_ERR(handle); | 939 | return PTR_ERR(handle); |
940 | } | 940 | } |
941 | 941 | ||
942 | lock_page(page); | 942 | lock_page(page); |
943 | if (page->mapping != mapping) { | 943 | if (page->mapping != mapping) { |
944 | /* The page got truncated from under us */ | 944 | /* The page got truncated from under us */ |
945 | unlock_page(page); | 945 | unlock_page(page); |
946 | page_cache_release(page); | 946 | page_cache_release(page); |
947 | ext4_journal_stop(handle); | 947 | ext4_journal_stop(handle); |
948 | goto retry_grab; | 948 | goto retry_grab; |
949 | } | 949 | } |
950 | /* In case writeback began while the page was unlocked */ | 950 | /* In case writeback began while the page was unlocked */ |
951 | wait_for_stable_page(page); | 951 | wait_for_stable_page(page); |
952 | 952 | ||
953 | if (ext4_should_dioread_nolock(inode)) | 953 | if (ext4_should_dioread_nolock(inode)) |
954 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); | 954 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); |
955 | else | 955 | else |
956 | ret = __block_write_begin(page, pos, len, ext4_get_block); | 956 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
957 | 957 | ||
958 | if (!ret && ext4_should_journal_data(inode)) { | 958 | if (!ret && ext4_should_journal_data(inode)) { |
959 | ret = ext4_walk_page_buffers(handle, page_buffers(page), | 959 | ret = ext4_walk_page_buffers(handle, page_buffers(page), |
960 | from, to, NULL, | 960 | from, to, NULL, |
961 | do_journal_get_write_access); | 961 | do_journal_get_write_access); |
962 | } | 962 | } |
963 | 963 | ||
964 | if (ret) { | 964 | if (ret) { |
965 | unlock_page(page); | 965 | unlock_page(page); |
966 | /* | 966 | /* |
967 | * __block_write_begin may have instantiated a few blocks | 967 | * __block_write_begin may have instantiated a few blocks |
968 | * outside i_size. Trim these off again. Don't need | 968 | * outside i_size. Trim these off again. Don't need |
969 | * i_size_read because we hold i_mutex. | 969 | * i_size_read because we hold i_mutex. |
970 | * | 970 | * |
971 | * Add inode to orphan list in case we crash before | 971 | * Add inode to orphan list in case we crash before |
972 | * truncate finishes | 972 | * truncate finishes |
973 | */ | 973 | */ |
974 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 974 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
975 | ext4_orphan_add(handle, inode); | 975 | ext4_orphan_add(handle, inode); |
976 | 976 | ||
977 | ext4_journal_stop(handle); | 977 | ext4_journal_stop(handle); |
978 | if (pos + len > inode->i_size) { | 978 | if (pos + len > inode->i_size) { |
979 | ext4_truncate_failed_write(inode); | 979 | ext4_truncate_failed_write(inode); |
980 | /* | 980 | /* |
981 | * If truncate failed early the inode might | 981 | * If truncate failed early the inode might |
982 | * still be on the orphan list; we need to | 982 | * still be on the orphan list; we need to |
983 | * make sure the inode is removed from the | 983 | * make sure the inode is removed from the |
984 | * orphan list in that case. | 984 | * orphan list in that case. |
985 | */ | 985 | */ |
986 | if (inode->i_nlink) | 986 | if (inode->i_nlink) |
987 | ext4_orphan_del(NULL, inode); | 987 | ext4_orphan_del(NULL, inode); |
988 | } | 988 | } |
989 | 989 | ||
990 | if (ret == -ENOSPC && | 990 | if (ret == -ENOSPC && |
991 | ext4_should_retry_alloc(inode->i_sb, &retries)) | 991 | ext4_should_retry_alloc(inode->i_sb, &retries)) |
992 | goto retry_journal; | 992 | goto retry_journal; |
993 | page_cache_release(page); | 993 | page_cache_release(page); |
994 | return ret; | 994 | return ret; |
995 | } | 995 | } |
996 | *pagep = page; | 996 | *pagep = page; |
997 | return ret; | 997 | return ret; |
998 | } | 998 | } |
999 | 999 | ||
1000 | /* For write_end() in data=journal mode */ | 1000 | /* For write_end() in data=journal mode */ |
1001 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) | 1001 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) |
1002 | { | 1002 | { |
1003 | int ret; | 1003 | int ret; |
1004 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1004 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1005 | return 0; | 1005 | return 0; |
1006 | set_buffer_uptodate(bh); | 1006 | set_buffer_uptodate(bh); |
1007 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); | 1007 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); |
1008 | clear_buffer_meta(bh); | 1008 | clear_buffer_meta(bh); |
1009 | clear_buffer_prio(bh); | 1009 | clear_buffer_prio(bh); |
1010 | return ret; | 1010 | return ret; |
1011 | } | 1011 | } |
1012 | 1012 | ||
1013 | /* | 1013 | /* |
1014 | * We need to pick up the new inode size which generic_commit_write gave us | 1014 | * We need to pick up the new inode size which generic_commit_write gave us |
1015 | * `file' can be NULL - eg, when called from page_symlink(). | 1015 | * `file' can be NULL - eg, when called from page_symlink(). |
1016 | * | 1016 | * |
1017 | * ext4 never places buffers on inode->i_mapping->private_list. metadata | 1017 | * ext4 never places buffers on inode->i_mapping->private_list. metadata |
1018 | * buffers are managed internally. | 1018 | * buffers are managed internally. |
1019 | */ | 1019 | */ |
1020 | static int ext4_write_end(struct file *file, | 1020 | static int ext4_write_end(struct file *file, |
1021 | struct address_space *mapping, | 1021 | struct address_space *mapping, |
1022 | loff_t pos, unsigned len, unsigned copied, | 1022 | loff_t pos, unsigned len, unsigned copied, |
1023 | struct page *page, void *fsdata) | 1023 | struct page *page, void *fsdata) |
1024 | { | 1024 | { |
1025 | handle_t *handle = ext4_journal_current_handle(); | 1025 | handle_t *handle = ext4_journal_current_handle(); |
1026 | struct inode *inode = mapping->host; | 1026 | struct inode *inode = mapping->host; |
1027 | loff_t old_size = inode->i_size; | ||
1027 | int ret = 0, ret2; | 1028 | int ret = 0, ret2; |
1028 | int i_size_changed = 0; | 1029 | int i_size_changed = 0; |
1029 | 1030 | ||
1030 | trace_ext4_write_end(inode, pos, len, copied); | 1031 | trace_ext4_write_end(inode, pos, len, copied); |
1031 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) { | 1032 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) { |
1032 | ret = ext4_jbd2_file_inode(handle, inode); | 1033 | ret = ext4_jbd2_file_inode(handle, inode); |
1033 | if (ret) { | 1034 | if (ret) { |
1034 | unlock_page(page); | 1035 | unlock_page(page); |
1035 | page_cache_release(page); | 1036 | page_cache_release(page); |
1036 | goto errout; | 1037 | goto errout; |
1037 | } | 1038 | } |
1038 | } | 1039 | } |
1039 | 1040 | ||
1040 | if (ext4_has_inline_data(inode)) { | 1041 | if (ext4_has_inline_data(inode)) { |
1041 | ret = ext4_write_inline_data_end(inode, pos, len, | 1042 | ret = ext4_write_inline_data_end(inode, pos, len, |
1042 | copied, page); | 1043 | copied, page); |
1043 | if (ret < 0) | 1044 | if (ret < 0) |
1044 | goto errout; | 1045 | goto errout; |
1045 | copied = ret; | 1046 | copied = ret; |
1046 | } else | 1047 | } else |
1047 | copied = block_write_end(file, mapping, pos, | 1048 | copied = block_write_end(file, mapping, pos, |
1048 | len, copied, page, fsdata); | 1049 | len, copied, page, fsdata); |
1049 | /* | 1050 | /* |
1050 | * it's important to update i_size while still holding page lock: | 1051 | * it's important to update i_size while still holding page lock: |
1051 | * page writeout could otherwise come in and zero beyond i_size. | 1052 | * page writeout could otherwise come in and zero beyond i_size. |
1052 | */ | 1053 | */ |
1053 | i_size_changed = ext4_update_inode_size(inode, pos + copied); | 1054 | i_size_changed = ext4_update_inode_size(inode, pos + copied); |
1054 | unlock_page(page); | 1055 | unlock_page(page); |
1055 | page_cache_release(page); | 1056 | page_cache_release(page); |
1056 | 1057 | ||
1058 | if (old_size < pos) | ||
1059 | pagecache_isize_extended(inode, old_size, pos); | ||
1057 | /* | 1060 | /* |
1058 | * Don't mark the inode dirty under page lock. First, it unnecessarily | 1061 | * Don't mark the inode dirty under page lock. First, it unnecessarily |
1059 | * makes the holding time of page lock longer. Second, it forces lock | 1062 | * makes the holding time of page lock longer. Second, it forces lock |
1060 | * ordering of page lock and transaction start for journaling | 1063 | * ordering of page lock and transaction start for journaling |
1061 | * filesystems. | 1064 | * filesystems. |
1062 | */ | 1065 | */ |
1063 | if (i_size_changed) | 1066 | if (i_size_changed) |
1064 | ext4_mark_inode_dirty(handle, inode); | 1067 | ext4_mark_inode_dirty(handle, inode); |
1065 | 1068 | ||
1066 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1069 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1067 | /* if we have allocated more blocks and copied | 1070 | /* if we have allocated more blocks and copied |
1068 | * less. We will have blocks allocated outside | 1071 | * less. We will have blocks allocated outside |
1069 | * inode->i_size. So truncate them | 1072 | * inode->i_size. So truncate them |
1070 | */ | 1073 | */ |
1071 | ext4_orphan_add(handle, inode); | 1074 | ext4_orphan_add(handle, inode); |
1072 | errout: | 1075 | errout: |
1073 | ret2 = ext4_journal_stop(handle); | 1076 | ret2 = ext4_journal_stop(handle); |
1074 | if (!ret) | 1077 | if (!ret) |
1075 | ret = ret2; | 1078 | ret = ret2; |
1076 | 1079 | ||
1077 | if (pos + len > inode->i_size) { | 1080 | if (pos + len > inode->i_size) { |
1078 | ext4_truncate_failed_write(inode); | 1081 | ext4_truncate_failed_write(inode); |
1079 | /* | 1082 | /* |
1080 | * If truncate failed early the inode might still be | 1083 | * If truncate failed early the inode might still be |
1081 | * on the orphan list; we need to make sure the inode | 1084 | * on the orphan list; we need to make sure the inode |
1082 | * is removed from the orphan list in that case. | 1085 | * is removed from the orphan list in that case. |
1083 | */ | 1086 | */ |
1084 | if (inode->i_nlink) | 1087 | if (inode->i_nlink) |
1085 | ext4_orphan_del(NULL, inode); | 1088 | ext4_orphan_del(NULL, inode); |
1086 | } | 1089 | } |
1087 | 1090 | ||
1088 | return ret ? ret : copied; | 1091 | return ret ? ret : copied; |
1089 | } | 1092 | } |
1090 | 1093 | ||
1091 | static int ext4_journalled_write_end(struct file *file, | 1094 | static int ext4_journalled_write_end(struct file *file, |
1092 | struct address_space *mapping, | 1095 | struct address_space *mapping, |
1093 | loff_t pos, unsigned len, unsigned copied, | 1096 | loff_t pos, unsigned len, unsigned copied, |
1094 | struct page *page, void *fsdata) | 1097 | struct page *page, void *fsdata) |
1095 | { | 1098 | { |
1096 | handle_t *handle = ext4_journal_current_handle(); | 1099 | handle_t *handle = ext4_journal_current_handle(); |
1097 | struct inode *inode = mapping->host; | 1100 | struct inode *inode = mapping->host; |
1101 | loff_t old_size = inode->i_size; | ||
1098 | int ret = 0, ret2; | 1102 | int ret = 0, ret2; |
1099 | int partial = 0; | 1103 | int partial = 0; |
1100 | unsigned from, to; | 1104 | unsigned from, to; |
1101 | int size_changed = 0; | 1105 | int size_changed = 0; |
1102 | 1106 | ||
1103 | trace_ext4_journalled_write_end(inode, pos, len, copied); | 1107 | trace_ext4_journalled_write_end(inode, pos, len, copied); |
1104 | from = pos & (PAGE_CACHE_SIZE - 1); | 1108 | from = pos & (PAGE_CACHE_SIZE - 1); |
1105 | to = from + len; | 1109 | to = from + len; |
1106 | 1110 | ||
1107 | BUG_ON(!ext4_handle_valid(handle)); | 1111 | BUG_ON(!ext4_handle_valid(handle)); |
1108 | 1112 | ||
1109 | if (ext4_has_inline_data(inode)) | 1113 | if (ext4_has_inline_data(inode)) |
1110 | copied = ext4_write_inline_data_end(inode, pos, len, | 1114 | copied = ext4_write_inline_data_end(inode, pos, len, |
1111 | copied, page); | 1115 | copied, page); |
1112 | else { | 1116 | else { |
1113 | if (copied < len) { | 1117 | if (copied < len) { |
1114 | if (!PageUptodate(page)) | 1118 | if (!PageUptodate(page)) |
1115 | copied = 0; | 1119 | copied = 0; |
1116 | page_zero_new_buffers(page, from+copied, to); | 1120 | page_zero_new_buffers(page, from+copied, to); |
1117 | } | 1121 | } |
1118 | 1122 | ||
1119 | ret = ext4_walk_page_buffers(handle, page_buffers(page), from, | 1123 | ret = ext4_walk_page_buffers(handle, page_buffers(page), from, |
1120 | to, &partial, write_end_fn); | 1124 | to, &partial, write_end_fn); |
1121 | if (!partial) | 1125 | if (!partial) |
1122 | SetPageUptodate(page); | 1126 | SetPageUptodate(page); |
1123 | } | 1127 | } |
1124 | size_changed = ext4_update_inode_size(inode, pos + copied); | 1128 | size_changed = ext4_update_inode_size(inode, pos + copied); |
1125 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 1129 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1126 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; | 1130 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; |
1127 | unlock_page(page); | 1131 | unlock_page(page); |
1128 | page_cache_release(page); | 1132 | page_cache_release(page); |
1133 | |||
1134 | if (old_size < pos) | ||
1135 | pagecache_isize_extended(inode, old_size, pos); | ||
1129 | 1136 | ||
1130 | if (size_changed) { | 1137 | if (size_changed) { |
1131 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1138 | ret2 = ext4_mark_inode_dirty(handle, inode); |
1132 | if (!ret) | 1139 | if (!ret) |
1133 | ret = ret2; | 1140 | ret = ret2; |
1134 | } | 1141 | } |
1135 | 1142 | ||
1136 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1143 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1137 | /* if we have allocated more blocks and copied | 1144 | /* if we have allocated more blocks and copied |
1138 | * less. We will have blocks allocated outside | 1145 | * less. We will have blocks allocated outside |
1139 | * inode->i_size. So truncate them | 1146 | * inode->i_size. So truncate them |
1140 | */ | 1147 | */ |
1141 | ext4_orphan_add(handle, inode); | 1148 | ext4_orphan_add(handle, inode); |
1142 | 1149 | ||
1143 | ret2 = ext4_journal_stop(handle); | 1150 | ret2 = ext4_journal_stop(handle); |
1144 | if (!ret) | 1151 | if (!ret) |
1145 | ret = ret2; | 1152 | ret = ret2; |
1146 | if (pos + len > inode->i_size) { | 1153 | if (pos + len > inode->i_size) { |
1147 | ext4_truncate_failed_write(inode); | 1154 | ext4_truncate_failed_write(inode); |
1148 | /* | 1155 | /* |
1149 | * If truncate failed early the inode might still be | 1156 | * If truncate failed early the inode might still be |
1150 | * on the orphan list; we need to make sure the inode | 1157 | * on the orphan list; we need to make sure the inode |
1151 | * is removed from the orphan list in that case. | 1158 | * is removed from the orphan list in that case. |
1152 | */ | 1159 | */ |
1153 | if (inode->i_nlink) | 1160 | if (inode->i_nlink) |
1154 | ext4_orphan_del(NULL, inode); | 1161 | ext4_orphan_del(NULL, inode); |
1155 | } | 1162 | } |
1156 | 1163 | ||
1157 | return ret ? ret : copied; | 1164 | return ret ? ret : copied; |
1158 | } | 1165 | } |
1159 | 1166 | ||
1160 | /* | 1167 | /* |
1161 | * Reserve a single cluster located at lblock | 1168 | * Reserve a single cluster located at lblock |
1162 | */ | 1169 | */ |
1163 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1170 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1164 | { | 1171 | { |
1165 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1172 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1166 | struct ext4_inode_info *ei = EXT4_I(inode); | 1173 | struct ext4_inode_info *ei = EXT4_I(inode); |
1167 | unsigned int md_needed; | 1174 | unsigned int md_needed; |
1168 | int ret; | 1175 | int ret; |
1169 | 1176 | ||
1170 | /* | 1177 | /* |
1171 | * We will charge metadata quota at writeout time; this saves | 1178 | * We will charge metadata quota at writeout time; this saves |
1172 | * us from metadata over-estimation, though we may go over by | 1179 | * us from metadata over-estimation, though we may go over by |
1173 | * a small amount in the end. Here we just reserve for data. | 1180 | * a small amount in the end. Here we just reserve for data. |
1174 | */ | 1181 | */ |
1175 | ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); | 1182 | ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); |
1176 | if (ret) | 1183 | if (ret) |
1177 | return ret; | 1184 | return ret; |
1178 | 1185 | ||
1179 | /* | 1186 | /* |
1180 | * recalculate the amount of metadata blocks to reserve | 1187 | * recalculate the amount of metadata blocks to reserve |
1181 | * in order to allocate nrblocks | 1188 | * in order to allocate nrblocks |
1182 | * worse case is one extent per block | 1189 | * worse case is one extent per block |
1183 | */ | 1190 | */ |
1184 | spin_lock(&ei->i_block_reservation_lock); | 1191 | spin_lock(&ei->i_block_reservation_lock); |
1185 | /* | 1192 | /* |
1186 | * ext4_calc_metadata_amount() has side effects, which we have | 1193 | * ext4_calc_metadata_amount() has side effects, which we have |
1187 | * to be prepared undo if we fail to claim space. | 1194 | * to be prepared undo if we fail to claim space. |
1188 | */ | 1195 | */ |
1189 | md_needed = 0; | 1196 | md_needed = 0; |
1190 | trace_ext4_da_reserve_space(inode, 0); | 1197 | trace_ext4_da_reserve_space(inode, 0); |
1191 | 1198 | ||
1192 | if (ext4_claim_free_clusters(sbi, 1, 0)) { | 1199 | if (ext4_claim_free_clusters(sbi, 1, 0)) { |
1193 | spin_unlock(&ei->i_block_reservation_lock); | 1200 | spin_unlock(&ei->i_block_reservation_lock); |
1194 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); | 1201 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
1195 | return -ENOSPC; | 1202 | return -ENOSPC; |
1196 | } | 1203 | } |
1197 | ei->i_reserved_data_blocks++; | 1204 | ei->i_reserved_data_blocks++; |
1198 | spin_unlock(&ei->i_block_reservation_lock); | 1205 | spin_unlock(&ei->i_block_reservation_lock); |
1199 | 1206 | ||
1200 | return 0; /* success */ | 1207 | return 0; /* success */ |
1201 | } | 1208 | } |
1202 | 1209 | ||
1203 | static void ext4_da_release_space(struct inode *inode, int to_free) | 1210 | static void ext4_da_release_space(struct inode *inode, int to_free) |
1204 | { | 1211 | { |
1205 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1212 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1206 | struct ext4_inode_info *ei = EXT4_I(inode); | 1213 | struct ext4_inode_info *ei = EXT4_I(inode); |
1207 | 1214 | ||
1208 | if (!to_free) | 1215 | if (!to_free) |
1209 | return; /* Nothing to release, exit */ | 1216 | return; /* Nothing to release, exit */ |
1210 | 1217 | ||
1211 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1218 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1212 | 1219 | ||
1213 | trace_ext4_da_release_space(inode, to_free); | 1220 | trace_ext4_da_release_space(inode, to_free); |
1214 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { | 1221 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { |
1215 | /* | 1222 | /* |
1216 | * if there aren't enough reserved blocks, then the | 1223 | * if there aren't enough reserved blocks, then the |
1217 | * counter is messed up somewhere. Since this | 1224 | * counter is messed up somewhere. Since this |
1218 | * function is called from invalidate page, it's | 1225 | * function is called from invalidate page, it's |
1219 | * harmless to return without any action. | 1226 | * harmless to return without any action. |
1220 | */ | 1227 | */ |
1221 | ext4_warning(inode->i_sb, "ext4_da_release_space: " | 1228 | ext4_warning(inode->i_sb, "ext4_da_release_space: " |
1222 | "ino %lu, to_free %d with only %d reserved " | 1229 | "ino %lu, to_free %d with only %d reserved " |
1223 | "data blocks", inode->i_ino, to_free, | 1230 | "data blocks", inode->i_ino, to_free, |
1224 | ei->i_reserved_data_blocks); | 1231 | ei->i_reserved_data_blocks); |
1225 | WARN_ON(1); | 1232 | WARN_ON(1); |
1226 | to_free = ei->i_reserved_data_blocks; | 1233 | to_free = ei->i_reserved_data_blocks; |
1227 | } | 1234 | } |
1228 | ei->i_reserved_data_blocks -= to_free; | 1235 | ei->i_reserved_data_blocks -= to_free; |
1229 | 1236 | ||
1230 | /* update fs dirty data blocks counter */ | 1237 | /* update fs dirty data blocks counter */ |
1231 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); | 1238 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); |
1232 | 1239 | ||
1233 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1240 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1234 | 1241 | ||
1235 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); | 1242 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); |
1236 | } | 1243 | } |
1237 | 1244 | ||
1238 | static void ext4_da_page_release_reservation(struct page *page, | 1245 | static void ext4_da_page_release_reservation(struct page *page, |
1239 | unsigned int offset, | 1246 | unsigned int offset, |
1240 | unsigned int length) | 1247 | unsigned int length) |
1241 | { | 1248 | { |
1242 | int to_release = 0; | 1249 | int to_release = 0; |
1243 | struct buffer_head *head, *bh; | 1250 | struct buffer_head *head, *bh; |
1244 | unsigned int curr_off = 0; | 1251 | unsigned int curr_off = 0; |
1245 | struct inode *inode = page->mapping->host; | 1252 | struct inode *inode = page->mapping->host; |
1246 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1253 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1247 | unsigned int stop = offset + length; | 1254 | unsigned int stop = offset + length; |
1248 | int num_clusters; | 1255 | int num_clusters; |
1249 | ext4_fsblk_t lblk; | 1256 | ext4_fsblk_t lblk; |
1250 | 1257 | ||
1251 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | 1258 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); |
1252 | 1259 | ||
1253 | head = page_buffers(page); | 1260 | head = page_buffers(page); |
1254 | bh = head; | 1261 | bh = head; |
1255 | do { | 1262 | do { |
1256 | unsigned int next_off = curr_off + bh->b_size; | 1263 | unsigned int next_off = curr_off + bh->b_size; |
1257 | 1264 | ||
1258 | if (next_off > stop) | 1265 | if (next_off > stop) |
1259 | break; | 1266 | break; |
1260 | 1267 | ||
1261 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1268 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
1262 | to_release++; | 1269 | to_release++; |
1263 | clear_buffer_delay(bh); | 1270 | clear_buffer_delay(bh); |
1264 | } | 1271 | } |
1265 | curr_off = next_off; | 1272 | curr_off = next_off; |
1266 | } while ((bh = bh->b_this_page) != head); | 1273 | } while ((bh = bh->b_this_page) != head); |
1267 | 1274 | ||
1268 | if (to_release) { | 1275 | if (to_release) { |
1269 | lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1276 | lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1270 | ext4_es_remove_extent(inode, lblk, to_release); | 1277 | ext4_es_remove_extent(inode, lblk, to_release); |
1271 | } | 1278 | } |
1272 | 1279 | ||
1273 | /* If we have released all the blocks belonging to a cluster, then we | 1280 | /* If we have released all the blocks belonging to a cluster, then we |
1274 | * need to release the reserved space for that cluster. */ | 1281 | * need to release the reserved space for that cluster. */ |
1275 | num_clusters = EXT4_NUM_B2C(sbi, to_release); | 1282 | num_clusters = EXT4_NUM_B2C(sbi, to_release); |
1276 | while (num_clusters > 0) { | 1283 | while (num_clusters > 0) { |
1277 | lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + | 1284 | lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + |
1278 | ((num_clusters - 1) << sbi->s_cluster_bits); | 1285 | ((num_clusters - 1) << sbi->s_cluster_bits); |
1279 | if (sbi->s_cluster_ratio == 1 || | 1286 | if (sbi->s_cluster_ratio == 1 || |
1280 | !ext4_find_delalloc_cluster(inode, lblk)) | 1287 | !ext4_find_delalloc_cluster(inode, lblk)) |
1281 | ext4_da_release_space(inode, 1); | 1288 | ext4_da_release_space(inode, 1); |
1282 | 1289 | ||
1283 | num_clusters--; | 1290 | num_clusters--; |
1284 | } | 1291 | } |
1285 | } | 1292 | } |
1286 | 1293 | ||
1287 | /* | 1294 | /* |
1288 | * Delayed allocation stuff | 1295 | * Delayed allocation stuff |
1289 | */ | 1296 | */ |
1290 | 1297 | ||
1291 | struct mpage_da_data { | 1298 | struct mpage_da_data { |
1292 | struct inode *inode; | 1299 | struct inode *inode; |
1293 | struct writeback_control *wbc; | 1300 | struct writeback_control *wbc; |
1294 | 1301 | ||
1295 | pgoff_t first_page; /* The first page to write */ | 1302 | pgoff_t first_page; /* The first page to write */ |
1296 | pgoff_t next_page; /* Current page to examine */ | 1303 | pgoff_t next_page; /* Current page to examine */ |
1297 | pgoff_t last_page; /* Last page to examine */ | 1304 | pgoff_t last_page; /* Last page to examine */ |
1298 | /* | 1305 | /* |
1299 | * Extent to map - this can be after first_page because that can be | 1306 | * Extent to map - this can be after first_page because that can be |
1300 | * fully mapped. We somewhat abuse m_flags to store whether the extent | 1307 | * fully mapped. We somewhat abuse m_flags to store whether the extent |
1301 | * is delalloc or unwritten. | 1308 | * is delalloc or unwritten. |
1302 | */ | 1309 | */ |
1303 | struct ext4_map_blocks map; | 1310 | struct ext4_map_blocks map; |
1304 | struct ext4_io_submit io_submit; /* IO submission data */ | 1311 | struct ext4_io_submit io_submit; /* IO submission data */ |
1305 | }; | 1312 | }; |
1306 | 1313 | ||
1307 | static void mpage_release_unused_pages(struct mpage_da_data *mpd, | 1314 | static void mpage_release_unused_pages(struct mpage_da_data *mpd, |
1308 | bool invalidate) | 1315 | bool invalidate) |
1309 | { | 1316 | { |
1310 | int nr_pages, i; | 1317 | int nr_pages, i; |
1311 | pgoff_t index, end; | 1318 | pgoff_t index, end; |
1312 | struct pagevec pvec; | 1319 | struct pagevec pvec; |
1313 | struct inode *inode = mpd->inode; | 1320 | struct inode *inode = mpd->inode; |
1314 | struct address_space *mapping = inode->i_mapping; | 1321 | struct address_space *mapping = inode->i_mapping; |
1315 | 1322 | ||
1316 | /* This is necessary when next_page == 0. */ | 1323 | /* This is necessary when next_page == 0. */ |
1317 | if (mpd->first_page >= mpd->next_page) | 1324 | if (mpd->first_page >= mpd->next_page) |
1318 | return; | 1325 | return; |
1319 | 1326 | ||
1320 | index = mpd->first_page; | 1327 | index = mpd->first_page; |
1321 | end = mpd->next_page - 1; | 1328 | end = mpd->next_page - 1; |
1322 | if (invalidate) { | 1329 | if (invalidate) { |
1323 | ext4_lblk_t start, last; | 1330 | ext4_lblk_t start, last; |
1324 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1331 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1325 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1332 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
1326 | ext4_es_remove_extent(inode, start, last - start + 1); | 1333 | ext4_es_remove_extent(inode, start, last - start + 1); |
1327 | } | 1334 | } |
1328 | 1335 | ||
1329 | pagevec_init(&pvec, 0); | 1336 | pagevec_init(&pvec, 0); |
1330 | while (index <= end) { | 1337 | while (index <= end) { |
1331 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 1338 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
1332 | if (nr_pages == 0) | 1339 | if (nr_pages == 0) |
1333 | break; | 1340 | break; |
1334 | for (i = 0; i < nr_pages; i++) { | 1341 | for (i = 0; i < nr_pages; i++) { |
1335 | struct page *page = pvec.pages[i]; | 1342 | struct page *page = pvec.pages[i]; |
1336 | if (page->index > end) | 1343 | if (page->index > end) |
1337 | break; | 1344 | break; |
1338 | BUG_ON(!PageLocked(page)); | 1345 | BUG_ON(!PageLocked(page)); |
1339 | BUG_ON(PageWriteback(page)); | 1346 | BUG_ON(PageWriteback(page)); |
1340 | if (invalidate) { | 1347 | if (invalidate) { |
1341 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); | 1348 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
1342 | ClearPageUptodate(page); | 1349 | ClearPageUptodate(page); |
1343 | } | 1350 | } |
1344 | unlock_page(page); | 1351 | unlock_page(page); |
1345 | } | 1352 | } |
1346 | index = pvec.pages[nr_pages - 1]->index + 1; | 1353 | index = pvec.pages[nr_pages - 1]->index + 1; |
1347 | pagevec_release(&pvec); | 1354 | pagevec_release(&pvec); |
1348 | } | 1355 | } |
1349 | } | 1356 | } |
1350 | 1357 | ||
1351 | static void ext4_print_free_blocks(struct inode *inode) | 1358 | static void ext4_print_free_blocks(struct inode *inode) |
1352 | { | 1359 | { |
1353 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1360 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1354 | struct super_block *sb = inode->i_sb; | 1361 | struct super_block *sb = inode->i_sb; |
1355 | struct ext4_inode_info *ei = EXT4_I(inode); | 1362 | struct ext4_inode_info *ei = EXT4_I(inode); |
1356 | 1363 | ||
1357 | ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", | 1364 | ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", |
1358 | EXT4_C2B(EXT4_SB(inode->i_sb), | 1365 | EXT4_C2B(EXT4_SB(inode->i_sb), |
1359 | ext4_count_free_clusters(sb))); | 1366 | ext4_count_free_clusters(sb))); |
1360 | ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); | 1367 | ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); |
1361 | ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", | 1368 | ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", |
1362 | (long long) EXT4_C2B(EXT4_SB(sb), | 1369 | (long long) EXT4_C2B(EXT4_SB(sb), |
1363 | percpu_counter_sum(&sbi->s_freeclusters_counter))); | 1370 | percpu_counter_sum(&sbi->s_freeclusters_counter))); |
1364 | ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", | 1371 | ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", |
1365 | (long long) EXT4_C2B(EXT4_SB(sb), | 1372 | (long long) EXT4_C2B(EXT4_SB(sb), |
1366 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | 1373 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); |
1367 | ext4_msg(sb, KERN_CRIT, "Block reservation details"); | 1374 | ext4_msg(sb, KERN_CRIT, "Block reservation details"); |
1368 | ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", | 1375 | ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", |
1369 | ei->i_reserved_data_blocks); | 1376 | ei->i_reserved_data_blocks); |
1370 | return; | 1377 | return; |
1371 | } | 1378 | } |
1372 | 1379 | ||
1373 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | 1380 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) |
1374 | { | 1381 | { |
1375 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); | 1382 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); |
1376 | } | 1383 | } |
1377 | 1384 | ||
1378 | /* | 1385 | /* |
1379 | * This function is grabs code from the very beginning of | 1386 | * This function is grabs code from the very beginning of |
1380 | * ext4_map_blocks, but assumes that the caller is from delayed write | 1387 | * ext4_map_blocks, but assumes that the caller is from delayed write |
1381 | * time. This function looks up the requested blocks and sets the | 1388 | * time. This function looks up the requested blocks and sets the |
1382 | * buffer delay bit under the protection of i_data_sem. | 1389 | * buffer delay bit under the protection of i_data_sem. |
1383 | */ | 1390 | */ |
1384 | static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | 1391 | static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, |
1385 | struct ext4_map_blocks *map, | 1392 | struct ext4_map_blocks *map, |
1386 | struct buffer_head *bh) | 1393 | struct buffer_head *bh) |
1387 | { | 1394 | { |
1388 | struct extent_status es; | 1395 | struct extent_status es; |
1389 | int retval; | 1396 | int retval; |
1390 | sector_t invalid_block = ~((sector_t) 0xffff); | 1397 | sector_t invalid_block = ~((sector_t) 0xffff); |
1391 | #ifdef ES_AGGRESSIVE_TEST | 1398 | #ifdef ES_AGGRESSIVE_TEST |
1392 | struct ext4_map_blocks orig_map; | 1399 | struct ext4_map_blocks orig_map; |
1393 | 1400 | ||
1394 | memcpy(&orig_map, map, sizeof(*map)); | 1401 | memcpy(&orig_map, map, sizeof(*map)); |
1395 | #endif | 1402 | #endif |
1396 | 1403 | ||
1397 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | 1404 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) |
1398 | invalid_block = ~0; | 1405 | invalid_block = ~0; |
1399 | 1406 | ||
1400 | map->m_flags = 0; | 1407 | map->m_flags = 0; |
1401 | ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," | 1408 | ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," |
1402 | "logical block %lu\n", inode->i_ino, map->m_len, | 1409 | "logical block %lu\n", inode->i_ino, map->m_len, |
1403 | (unsigned long) map->m_lblk); | 1410 | (unsigned long) map->m_lblk); |
1404 | 1411 | ||
1405 | /* Lookup extent status tree firstly */ | 1412 | /* Lookup extent status tree firstly */ |
1406 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1413 | if (ext4_es_lookup_extent(inode, iblock, &es)) { |
1407 | if (ext4_es_is_hole(&es)) { | 1414 | if (ext4_es_is_hole(&es)) { |
1408 | retval = 0; | 1415 | retval = 0; |
1409 | down_read(&EXT4_I(inode)->i_data_sem); | 1416 | down_read(&EXT4_I(inode)->i_data_sem); |
1410 | goto add_delayed; | 1417 | goto add_delayed; |
1411 | } | 1418 | } |
1412 | 1419 | ||
1413 | /* | 1420 | /* |
1414 | * Delayed extent could be allocated by fallocate. | 1421 | * Delayed extent could be allocated by fallocate. |
1415 | * So we need to check it. | 1422 | * So we need to check it. |
1416 | */ | 1423 | */ |
1417 | if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { | 1424 | if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { |
1418 | map_bh(bh, inode->i_sb, invalid_block); | 1425 | map_bh(bh, inode->i_sb, invalid_block); |
1419 | set_buffer_new(bh); | 1426 | set_buffer_new(bh); |
1420 | set_buffer_delay(bh); | 1427 | set_buffer_delay(bh); |
1421 | return 0; | 1428 | return 0; |
1422 | } | 1429 | } |
1423 | 1430 | ||
1424 | map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk; | 1431 | map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk; |
1425 | retval = es.es_len - (iblock - es.es_lblk); | 1432 | retval = es.es_len - (iblock - es.es_lblk); |
1426 | if (retval > map->m_len) | 1433 | if (retval > map->m_len) |
1427 | retval = map->m_len; | 1434 | retval = map->m_len; |
1428 | map->m_len = retval; | 1435 | map->m_len = retval; |
1429 | if (ext4_es_is_written(&es)) | 1436 | if (ext4_es_is_written(&es)) |
1430 | map->m_flags |= EXT4_MAP_MAPPED; | 1437 | map->m_flags |= EXT4_MAP_MAPPED; |
1431 | else if (ext4_es_is_unwritten(&es)) | 1438 | else if (ext4_es_is_unwritten(&es)) |
1432 | map->m_flags |= EXT4_MAP_UNWRITTEN; | 1439 | map->m_flags |= EXT4_MAP_UNWRITTEN; |
1433 | else | 1440 | else |
1434 | BUG_ON(1); | 1441 | BUG_ON(1); |
1435 | 1442 | ||
1436 | #ifdef ES_AGGRESSIVE_TEST | 1443 | #ifdef ES_AGGRESSIVE_TEST |
1437 | ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0); | 1444 | ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0); |
1438 | #endif | 1445 | #endif |
1439 | return retval; | 1446 | return retval; |
1440 | } | 1447 | } |
1441 | 1448 | ||
1442 | /* | 1449 | /* |
1443 | * Try to see if we can get the block without requesting a new | 1450 | * Try to see if we can get the block without requesting a new |
1444 | * file system block. | 1451 | * file system block. |
1445 | */ | 1452 | */ |
1446 | down_read(&EXT4_I(inode)->i_data_sem); | 1453 | down_read(&EXT4_I(inode)->i_data_sem); |
1447 | if (ext4_has_inline_data(inode)) | 1454 | if (ext4_has_inline_data(inode)) |
1448 | retval = 0; | 1455 | retval = 0; |
1449 | else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 1456 | else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1450 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); | 1457 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); |
1451 | else | 1458 | else |
1452 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); | 1459 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); |
1453 | 1460 | ||
1454 | add_delayed: | 1461 | add_delayed: |
1455 | if (retval == 0) { | 1462 | if (retval == 0) { |
1456 | int ret; | 1463 | int ret; |
1457 | /* | 1464 | /* |
1458 | * XXX: __block_prepare_write() unmaps passed block, | 1465 | * XXX: __block_prepare_write() unmaps passed block, |
1459 | * is it OK? | 1466 | * is it OK? |
1460 | */ | 1467 | */ |
1461 | /* | 1468 | /* |
1462 | * If the block was allocated from previously allocated cluster, | 1469 | * If the block was allocated from previously allocated cluster, |
1463 | * then we don't need to reserve it again. However we still need | 1470 | * then we don't need to reserve it again. However we still need |
1464 | * to reserve metadata for every block we're going to write. | 1471 | * to reserve metadata for every block we're going to write. |
1465 | */ | 1472 | */ |
1466 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 || | 1473 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 || |
1467 | !ext4_find_delalloc_cluster(inode, map->m_lblk)) { | 1474 | !ext4_find_delalloc_cluster(inode, map->m_lblk)) { |
1468 | ret = ext4_da_reserve_space(inode, iblock); | 1475 | ret = ext4_da_reserve_space(inode, iblock); |
1469 | if (ret) { | 1476 | if (ret) { |
1470 | /* not enough space to reserve */ | 1477 | /* not enough space to reserve */ |
1471 | retval = ret; | 1478 | retval = ret; |
1472 | goto out_unlock; | 1479 | goto out_unlock; |
1473 | } | 1480 | } |
1474 | } | 1481 | } |
1475 | 1482 | ||
1476 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | 1483 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
1477 | ~0, EXTENT_STATUS_DELAYED); | 1484 | ~0, EXTENT_STATUS_DELAYED); |
1478 | if (ret) { | 1485 | if (ret) { |
1479 | retval = ret; | 1486 | retval = ret; |
1480 | goto out_unlock; | 1487 | goto out_unlock; |
1481 | } | 1488 | } |
1482 | 1489 | ||
1483 | map_bh(bh, inode->i_sb, invalid_block); | 1490 | map_bh(bh, inode->i_sb, invalid_block); |
1484 | set_buffer_new(bh); | 1491 | set_buffer_new(bh); |
1485 | set_buffer_delay(bh); | 1492 | set_buffer_delay(bh); |
1486 | } else if (retval > 0) { | 1493 | } else if (retval > 0) { |
1487 | int ret; | 1494 | int ret; |
1488 | unsigned int status; | 1495 | unsigned int status; |
1489 | 1496 | ||
1490 | if (unlikely(retval != map->m_len)) { | 1497 | if (unlikely(retval != map->m_len)) { |
1491 | ext4_warning(inode->i_sb, | 1498 | ext4_warning(inode->i_sb, |
1492 | "ES len assertion failed for inode " | 1499 | "ES len assertion failed for inode " |
1493 | "%lu: retval %d != map->m_len %d", | 1500 | "%lu: retval %d != map->m_len %d", |
1494 | inode->i_ino, retval, map->m_len); | 1501 | inode->i_ino, retval, map->m_len); |
1495 | WARN_ON(1); | 1502 | WARN_ON(1); |
1496 | } | 1503 | } |
1497 | 1504 | ||
1498 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? | 1505 | status = map->m_flags & EXT4_MAP_UNWRITTEN ? |
1499 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; | 1506 | EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; |
1500 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, | 1507 | ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, |
1501 | map->m_pblk, status); | 1508 | map->m_pblk, status); |
1502 | if (ret != 0) | 1509 | if (ret != 0) |
1503 | retval = ret; | 1510 | retval = ret; |
1504 | } | 1511 | } |
1505 | 1512 | ||
1506 | out_unlock: | 1513 | out_unlock: |
1507 | up_read((&EXT4_I(inode)->i_data_sem)); | 1514 | up_read((&EXT4_I(inode)->i_data_sem)); |
1508 | 1515 | ||
1509 | return retval; | 1516 | return retval; |
1510 | } | 1517 | } |
1511 | 1518 | ||
1512 | /* | 1519 | /* |
1513 | * This is a special get_block_t callback which is used by | 1520 | * This is a special get_block_t callback which is used by |
1514 | * ext4_da_write_begin(). It will either return mapped block or | 1521 | * ext4_da_write_begin(). It will either return mapped block or |
1515 | * reserve space for a single block. | 1522 | * reserve space for a single block. |
1516 | * | 1523 | * |
1517 | * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. | 1524 | * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. |
1518 | * We also have b_blocknr = -1 and b_bdev initialized properly | 1525 | * We also have b_blocknr = -1 and b_bdev initialized properly |
1519 | * | 1526 | * |
1520 | * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. | 1527 | * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. |
1521 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev | 1528 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev |
1522 | * initialized properly. | 1529 | * initialized properly. |
1523 | */ | 1530 | */ |
1524 | int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 1531 | int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
1525 | struct buffer_head *bh, int create) | 1532 | struct buffer_head *bh, int create) |
1526 | { | 1533 | { |
1527 | struct ext4_map_blocks map; | 1534 | struct ext4_map_blocks map; |
1528 | int ret = 0; | 1535 | int ret = 0; |
1529 | 1536 | ||
1530 | BUG_ON(create == 0); | 1537 | BUG_ON(create == 0); |
1531 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); | 1538 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); |
1532 | 1539 | ||
1533 | map.m_lblk = iblock; | 1540 | map.m_lblk = iblock; |
1534 | map.m_len = 1; | 1541 | map.m_len = 1; |
1535 | 1542 | ||
1536 | /* | 1543 | /* |
1537 | * first, we need to know whether the block is allocated already | 1544 | * first, we need to know whether the block is allocated already |
1538 | * preallocated blocks are unmapped but should treated | 1545 | * preallocated blocks are unmapped but should treated |
1539 | * the same as allocated blocks. | 1546 | * the same as allocated blocks. |
1540 | */ | 1547 | */ |
1541 | ret = ext4_da_map_blocks(inode, iblock, &map, bh); | 1548 | ret = ext4_da_map_blocks(inode, iblock, &map, bh); |
1542 | if (ret <= 0) | 1549 | if (ret <= 0) |
1543 | return ret; | 1550 | return ret; |
1544 | 1551 | ||
1545 | map_bh(bh, inode->i_sb, map.m_pblk); | 1552 | map_bh(bh, inode->i_sb, map.m_pblk); |
1546 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 1553 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
1547 | 1554 | ||
1548 | if (buffer_unwritten(bh)) { | 1555 | if (buffer_unwritten(bh)) { |
1549 | /* A delayed write to unwritten bh should be marked | 1556 | /* A delayed write to unwritten bh should be marked |
1550 | * new and mapped. Mapped ensures that we don't do | 1557 | * new and mapped. Mapped ensures that we don't do |
1551 | * get_block multiple times when we write to the same | 1558 | * get_block multiple times when we write to the same |
1552 | * offset and new ensures that we do proper zero out | 1559 | * offset and new ensures that we do proper zero out |
1553 | * for partial write. | 1560 | * for partial write. |
1554 | */ | 1561 | */ |
1555 | set_buffer_new(bh); | 1562 | set_buffer_new(bh); |
1556 | set_buffer_mapped(bh); | 1563 | set_buffer_mapped(bh); |
1557 | } | 1564 | } |
1558 | return 0; | 1565 | return 0; |
1559 | } | 1566 | } |
1560 | 1567 | ||
1561 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 1568 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
1562 | { | 1569 | { |
1563 | get_bh(bh); | 1570 | get_bh(bh); |
1564 | return 0; | 1571 | return 0; |
1565 | } | 1572 | } |
1566 | 1573 | ||
1567 | static int bput_one(handle_t *handle, struct buffer_head *bh) | 1574 | static int bput_one(handle_t *handle, struct buffer_head *bh) |
1568 | { | 1575 | { |
1569 | put_bh(bh); | 1576 | put_bh(bh); |
1570 | return 0; | 1577 | return 0; |
1571 | } | 1578 | } |
1572 | 1579 | ||
1573 | static int __ext4_journalled_writepage(struct page *page, | 1580 | static int __ext4_journalled_writepage(struct page *page, |
1574 | unsigned int len) | 1581 | unsigned int len) |
1575 | { | 1582 | { |
1576 | struct address_space *mapping = page->mapping; | 1583 | struct address_space *mapping = page->mapping; |
1577 | struct inode *inode = mapping->host; | 1584 | struct inode *inode = mapping->host; |
1578 | struct buffer_head *page_bufs = NULL; | 1585 | struct buffer_head *page_bufs = NULL; |
1579 | handle_t *handle = NULL; | 1586 | handle_t *handle = NULL; |
1580 | int ret = 0, err = 0; | 1587 | int ret = 0, err = 0; |
1581 | int inline_data = ext4_has_inline_data(inode); | 1588 | int inline_data = ext4_has_inline_data(inode); |
1582 | struct buffer_head *inode_bh = NULL; | 1589 | struct buffer_head *inode_bh = NULL; |
1583 | 1590 | ||
1584 | ClearPageChecked(page); | 1591 | ClearPageChecked(page); |
1585 | 1592 | ||
1586 | if (inline_data) { | 1593 | if (inline_data) { |
1587 | BUG_ON(page->index != 0); | 1594 | BUG_ON(page->index != 0); |
1588 | BUG_ON(len > ext4_get_max_inline_size(inode)); | 1595 | BUG_ON(len > ext4_get_max_inline_size(inode)); |
1589 | inode_bh = ext4_journalled_write_inline_data(inode, len, page); | 1596 | inode_bh = ext4_journalled_write_inline_data(inode, len, page); |
1590 | if (inode_bh == NULL) | 1597 | if (inode_bh == NULL) |
1591 | goto out; | 1598 | goto out; |
1592 | } else { | 1599 | } else { |
1593 | page_bufs = page_buffers(page); | 1600 | page_bufs = page_buffers(page); |
1594 | if (!page_bufs) { | 1601 | if (!page_bufs) { |
1595 | BUG(); | 1602 | BUG(); |
1596 | goto out; | 1603 | goto out; |
1597 | } | 1604 | } |
1598 | ext4_walk_page_buffers(handle, page_bufs, 0, len, | 1605 | ext4_walk_page_buffers(handle, page_bufs, 0, len, |
1599 | NULL, bget_one); | 1606 | NULL, bget_one); |
1600 | } | 1607 | } |
1601 | /* As soon as we unlock the page, it can go away, but we have | 1608 | /* As soon as we unlock the page, it can go away, but we have |
1602 | * references to buffers so we are safe */ | 1609 | * references to buffers so we are safe */ |
1603 | unlock_page(page); | 1610 | unlock_page(page); |
1604 | 1611 | ||
1605 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, | 1612 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, |
1606 | ext4_writepage_trans_blocks(inode)); | 1613 | ext4_writepage_trans_blocks(inode)); |
1607 | if (IS_ERR(handle)) { | 1614 | if (IS_ERR(handle)) { |
1608 | ret = PTR_ERR(handle); | 1615 | ret = PTR_ERR(handle); |
1609 | goto out; | 1616 | goto out; |
1610 | } | 1617 | } |
1611 | 1618 | ||
1612 | BUG_ON(!ext4_handle_valid(handle)); | 1619 | BUG_ON(!ext4_handle_valid(handle)); |
1613 | 1620 | ||
1614 | if (inline_data) { | 1621 | if (inline_data) { |
1615 | BUFFER_TRACE(inode_bh, "get write access"); | 1622 | BUFFER_TRACE(inode_bh, "get write access"); |
1616 | ret = ext4_journal_get_write_access(handle, inode_bh); | 1623 | ret = ext4_journal_get_write_access(handle, inode_bh); |
1617 | 1624 | ||
1618 | err = ext4_handle_dirty_metadata(handle, inode, inode_bh); | 1625 | err = ext4_handle_dirty_metadata(handle, inode, inode_bh); |
1619 | 1626 | ||
1620 | } else { | 1627 | } else { |
1621 | ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, | 1628 | ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, |
1622 | do_journal_get_write_access); | 1629 | do_journal_get_write_access); |
1623 | 1630 | ||
1624 | err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, | 1631 | err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, |
1625 | write_end_fn); | 1632 | write_end_fn); |
1626 | } | 1633 | } |
1627 | if (ret == 0) | 1634 | if (ret == 0) |
1628 | ret = err; | 1635 | ret = err; |
1629 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; | 1636 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; |
1630 | err = ext4_journal_stop(handle); | 1637 | err = ext4_journal_stop(handle); |
1631 | if (!ret) | 1638 | if (!ret) |
1632 | ret = err; | 1639 | ret = err; |
1633 | 1640 | ||
1634 | if (!ext4_has_inline_data(inode)) | 1641 | if (!ext4_has_inline_data(inode)) |
1635 | ext4_walk_page_buffers(NULL, page_bufs, 0, len, | 1642 | ext4_walk_page_buffers(NULL, page_bufs, 0, len, |
1636 | NULL, bput_one); | 1643 | NULL, bput_one); |
1637 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 1644 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1638 | out: | 1645 | out: |
1639 | brelse(inode_bh); | 1646 | brelse(inode_bh); |
1640 | return ret; | 1647 | return ret; |
1641 | } | 1648 | } |
1642 | 1649 | ||
1643 | /* | 1650 | /* |
1644 | * Note that we don't need to start a transaction unless we're journaling data | 1651 | * Note that we don't need to start a transaction unless we're journaling data |
1645 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 1652 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
1646 | * need to file the inode to the transaction's list in ordered mode because if | 1653 | * need to file the inode to the transaction's list in ordered mode because if |
1647 | * we are writing back data added by write(), the inode is already there and if | 1654 | * we are writing back data added by write(), the inode is already there and if |
1648 | * we are writing back data modified via mmap(), no one guarantees in which | 1655 | * we are writing back data modified via mmap(), no one guarantees in which |
1649 | * transaction the data will hit the disk. In case we are journaling data, we | 1656 | * transaction the data will hit the disk. In case we are journaling data, we |
1650 | * cannot start transaction directly because transaction start ranks above page | 1657 | * cannot start transaction directly because transaction start ranks above page |
1651 | * lock so we have to do some magic. | 1658 | * lock so we have to do some magic. |
1652 | * | 1659 | * |
1653 | * This function can get called via... | 1660 | * This function can get called via... |
1654 | * - ext4_writepages after taking page lock (have journal handle) | 1661 | * - ext4_writepages after taking page lock (have journal handle) |
1655 | * - journal_submit_inode_data_buffers (no journal handle) | 1662 | * - journal_submit_inode_data_buffers (no journal handle) |
1656 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) | 1663 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) |
1657 | * - grab_page_cache when doing write_begin (have journal handle) | 1664 | * - grab_page_cache when doing write_begin (have journal handle) |
1658 | * | 1665 | * |
1659 | * We don't do any block allocation in this function. If we have page with | 1666 | * We don't do any block allocation in this function. If we have page with |
1660 | * multiple blocks we need to write those buffer_heads that are mapped. This | 1667 | * multiple blocks we need to write those buffer_heads that are mapped. This |
1661 | * is important for mmaped based write. So if we do with blocksize 1K | 1668 | * is important for mmaped based write. So if we do with blocksize 1K |
1662 | * truncate(f, 1024); | 1669 | * truncate(f, 1024); |
1663 | * a = mmap(f, 0, 4096); | 1670 | * a = mmap(f, 0, 4096); |
1664 | * a[0] = 'a'; | 1671 | * a[0] = 'a'; |
1665 | * truncate(f, 4096); | 1672 | * truncate(f, 4096); |
1666 | * we have in the page first buffer_head mapped via page_mkwrite call back | 1673 | * we have in the page first buffer_head mapped via page_mkwrite call back |
1667 | * but other buffer_heads would be unmapped but dirty (dirty done via the | 1674 | * but other buffer_heads would be unmapped but dirty (dirty done via the |
1668 | * do_wp_page). So writepage should write the first block. If we modify | 1675 | * do_wp_page). So writepage should write the first block. If we modify |
1669 | * the mmap area beyond 1024 we will again get a page_fault and the | 1676 | * the mmap area beyond 1024 we will again get a page_fault and the |
1670 | * page_mkwrite callback will do the block allocation and mark the | 1677 | * page_mkwrite callback will do the block allocation and mark the |
1671 | * buffer_heads mapped. | 1678 | * buffer_heads mapped. |
1672 | * | 1679 | * |
1673 | * We redirty the page if we have any buffer_heads that is either delay or | 1680 | * We redirty the page if we have any buffer_heads that is either delay or |
1674 | * unwritten in the page. | 1681 | * unwritten in the page. |
1675 | * | 1682 | * |
1676 | * We can get recursively called as show below. | 1683 | * We can get recursively called as show below. |
1677 | * | 1684 | * |
1678 | * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> | 1685 | * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> |
1679 | * ext4_writepage() | 1686 | * ext4_writepage() |
1680 | * | 1687 | * |
1681 | * But since we don't do any block allocation we should not deadlock. | 1688 | * But since we don't do any block allocation we should not deadlock. |
1682 | * Page also have the dirty flag cleared so we don't get recurive page_lock. | 1689 | * Page also have the dirty flag cleared so we don't get recurive page_lock. |
1683 | */ | 1690 | */ |
1684 | static int ext4_writepage(struct page *page, | 1691 | static int ext4_writepage(struct page *page, |
1685 | struct writeback_control *wbc) | 1692 | struct writeback_control *wbc) |
1686 | { | 1693 | { |
1687 | int ret = 0; | 1694 | int ret = 0; |
1688 | loff_t size; | 1695 | loff_t size; |
1689 | unsigned int len; | 1696 | unsigned int len; |
1690 | struct buffer_head *page_bufs = NULL; | 1697 | struct buffer_head *page_bufs = NULL; |
1691 | struct inode *inode = page->mapping->host; | 1698 | struct inode *inode = page->mapping->host; |
1692 | struct ext4_io_submit io_submit; | 1699 | struct ext4_io_submit io_submit; |
1693 | bool keep_towrite = false; | 1700 | bool keep_towrite = false; |
1694 | 1701 | ||
1695 | trace_ext4_writepage(page); | 1702 | trace_ext4_writepage(page); |
1696 | size = i_size_read(inode); | 1703 | size = i_size_read(inode); |
1697 | if (page->index == size >> PAGE_CACHE_SHIFT) | 1704 | if (page->index == size >> PAGE_CACHE_SHIFT) |
1698 | len = size & ~PAGE_CACHE_MASK; | 1705 | len = size & ~PAGE_CACHE_MASK; |
1699 | else | 1706 | else |
1700 | len = PAGE_CACHE_SIZE; | 1707 | len = PAGE_CACHE_SIZE; |
1701 | 1708 | ||
1702 | page_bufs = page_buffers(page); | 1709 | page_bufs = page_buffers(page); |
1703 | /* | 1710 | /* |
1704 | * We cannot do block allocation or other extent handling in this | 1711 | * We cannot do block allocation or other extent handling in this |
1705 | * function. If there are buffers needing that, we have to redirty | 1712 | * function. If there are buffers needing that, we have to redirty |
1706 | * the page. But we may reach here when we do a journal commit via | 1713 | * the page. But we may reach here when we do a journal commit via |
1707 | * journal_submit_inode_data_buffers() and in that case we must write | 1714 | * journal_submit_inode_data_buffers() and in that case we must write |
1708 | * allocated buffers to achieve data=ordered mode guarantees. | 1715 | * allocated buffers to achieve data=ordered mode guarantees. |
1709 | */ | 1716 | */ |
1710 | if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 1717 | if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
1711 | ext4_bh_delay_or_unwritten)) { | 1718 | ext4_bh_delay_or_unwritten)) { |
1712 | redirty_page_for_writepage(wbc, page); | 1719 | redirty_page_for_writepage(wbc, page); |
1713 | if (current->flags & PF_MEMALLOC) { | 1720 | if (current->flags & PF_MEMALLOC) { |
1714 | /* | 1721 | /* |
1715 | * For memory cleaning there's no point in writing only | 1722 | * For memory cleaning there's no point in writing only |
1716 | * some buffers. So just bail out. Warn if we came here | 1723 | * some buffers. So just bail out. Warn if we came here |
1717 | * from direct reclaim. | 1724 | * from direct reclaim. |
1718 | */ | 1725 | */ |
1719 | WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) | 1726 | WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) |
1720 | == PF_MEMALLOC); | 1727 | == PF_MEMALLOC); |
1721 | unlock_page(page); | 1728 | unlock_page(page); |
1722 | return 0; | 1729 | return 0; |
1723 | } | 1730 | } |
1724 | keep_towrite = true; | 1731 | keep_towrite = true; |
1725 | } | 1732 | } |
1726 | 1733 | ||
1727 | if (PageChecked(page) && ext4_should_journal_data(inode)) | 1734 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
1728 | /* | 1735 | /* |
1729 | * It's mmapped pagecache. Add buffers and journal it. There | 1736 | * It's mmapped pagecache. Add buffers and journal it. There |
1730 | * doesn't seem much point in redirtying the page here. | 1737 | * doesn't seem much point in redirtying the page here. |
1731 | */ | 1738 | */ |
1732 | return __ext4_journalled_writepage(page, len); | 1739 | return __ext4_journalled_writepage(page, len); |
1733 | 1740 | ||
1734 | ext4_io_submit_init(&io_submit, wbc); | 1741 | ext4_io_submit_init(&io_submit, wbc); |
1735 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | 1742 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); |
1736 | if (!io_submit.io_end) { | 1743 | if (!io_submit.io_end) { |
1737 | redirty_page_for_writepage(wbc, page); | 1744 | redirty_page_for_writepage(wbc, page); |
1738 | unlock_page(page); | 1745 | unlock_page(page); |
1739 | return -ENOMEM; | 1746 | return -ENOMEM; |
1740 | } | 1747 | } |
1741 | ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); | 1748 | ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); |
1742 | ext4_io_submit(&io_submit); | 1749 | ext4_io_submit(&io_submit); |
1743 | /* Drop io_end reference we got from init */ | 1750 | /* Drop io_end reference we got from init */ |
1744 | ext4_put_io_end_defer(io_submit.io_end); | 1751 | ext4_put_io_end_defer(io_submit.io_end); |
1745 | return ret; | 1752 | return ret; |
1746 | } | 1753 | } |
1747 | 1754 | ||
1748 | static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) | 1755 | static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) |
1749 | { | 1756 | { |
1750 | int len; | 1757 | int len; |
1751 | loff_t size = i_size_read(mpd->inode); | 1758 | loff_t size = i_size_read(mpd->inode); |
1752 | int err; | 1759 | int err; |
1753 | 1760 | ||
1754 | BUG_ON(page->index != mpd->first_page); | 1761 | BUG_ON(page->index != mpd->first_page); |
1755 | if (page->index == size >> PAGE_CACHE_SHIFT) | 1762 | if (page->index == size >> PAGE_CACHE_SHIFT) |
1756 | len = size & ~PAGE_CACHE_MASK; | 1763 | len = size & ~PAGE_CACHE_MASK; |
1757 | else | 1764 | else |
1758 | len = PAGE_CACHE_SIZE; | 1765 | len = PAGE_CACHE_SIZE; |
1759 | clear_page_dirty_for_io(page); | 1766 | clear_page_dirty_for_io(page); |
1760 | err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); | 1767 | err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); |
1761 | if (!err) | 1768 | if (!err) |
1762 | mpd->wbc->nr_to_write--; | 1769 | mpd->wbc->nr_to_write--; |
1763 | mpd->first_page++; | 1770 | mpd->first_page++; |
1764 | 1771 | ||
1765 | return err; | 1772 | return err; |
1766 | } | 1773 | } |
1767 | 1774 | ||
1768 | #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) | 1775 | #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) |
1769 | 1776 | ||
1770 | /* | 1777 | /* |
1771 | * mballoc gives us at most this number of blocks... | 1778 | * mballoc gives us at most this number of blocks... |
1772 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). | 1779 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). |
1773 | * The rest of mballoc seems to handle chunks up to full group size. | 1780 | * The rest of mballoc seems to handle chunks up to full group size. |
1774 | */ | 1781 | */ |
1775 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 | 1782 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 |
1776 | 1783 | ||
1777 | /* | 1784 | /* |
1778 | * mpage_add_bh_to_extent - try to add bh to extent of blocks to map | 1785 | * mpage_add_bh_to_extent - try to add bh to extent of blocks to map |
1779 | * | 1786 | * |
1780 | * @mpd - extent of blocks | 1787 | * @mpd - extent of blocks |
1781 | * @lblk - logical number of the block in the file | 1788 | * @lblk - logical number of the block in the file |
1782 | * @bh - buffer head we want to add to the extent | 1789 | * @bh - buffer head we want to add to the extent |
1783 | * | 1790 | * |
1784 | * The function is used to collect contig. blocks in the same state. If the | 1791 | * The function is used to collect contig. blocks in the same state. If the |
1785 | * buffer doesn't require mapping for writeback and we haven't started the | 1792 | * buffer doesn't require mapping for writeback and we haven't started the |
1786 | * extent of buffers to map yet, the function returns 'true' immediately - the | 1793 | * extent of buffers to map yet, the function returns 'true' immediately - the |
1787 | * caller can write the buffer right away. Otherwise the function returns true | 1794 | * caller can write the buffer right away. Otherwise the function returns true |
1788 | * if the block has been added to the extent, false if the block couldn't be | 1795 | * if the block has been added to the extent, false if the block couldn't be |
1789 | * added. | 1796 | * added. |
1790 | */ | 1797 | */ |
1791 | static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, | 1798 | static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, |
1792 | struct buffer_head *bh) | 1799 | struct buffer_head *bh) |
1793 | { | 1800 | { |
1794 | struct ext4_map_blocks *map = &mpd->map; | 1801 | struct ext4_map_blocks *map = &mpd->map; |
1795 | 1802 | ||
1796 | /* Buffer that doesn't need mapping for writeback? */ | 1803 | /* Buffer that doesn't need mapping for writeback? */ |
1797 | if (!buffer_dirty(bh) || !buffer_mapped(bh) || | 1804 | if (!buffer_dirty(bh) || !buffer_mapped(bh) || |
1798 | (!buffer_delay(bh) && !buffer_unwritten(bh))) { | 1805 | (!buffer_delay(bh) && !buffer_unwritten(bh))) { |
1799 | /* So far no extent to map => we write the buffer right away */ | 1806 | /* So far no extent to map => we write the buffer right away */ |
1800 | if (map->m_len == 0) | 1807 | if (map->m_len == 0) |
1801 | return true; | 1808 | return true; |
1802 | return false; | 1809 | return false; |
1803 | } | 1810 | } |
1804 | 1811 | ||
1805 | /* First block in the extent? */ | 1812 | /* First block in the extent? */ |
1806 | if (map->m_len == 0) { | 1813 | if (map->m_len == 0) { |
1807 | map->m_lblk = lblk; | 1814 | map->m_lblk = lblk; |
1808 | map->m_len = 1; | 1815 | map->m_len = 1; |
1809 | map->m_flags = bh->b_state & BH_FLAGS; | 1816 | map->m_flags = bh->b_state & BH_FLAGS; |
1810 | return true; | 1817 | return true; |
1811 | } | 1818 | } |
1812 | 1819 | ||
1813 | /* Don't go larger than mballoc is willing to allocate */ | 1820 | /* Don't go larger than mballoc is willing to allocate */ |
1814 | if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) | 1821 | if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) |
1815 | return false; | 1822 | return false; |
1816 | 1823 | ||
1817 | /* Can we merge the block to our big extent? */ | 1824 | /* Can we merge the block to our big extent? */ |
1818 | if (lblk == map->m_lblk + map->m_len && | 1825 | if (lblk == map->m_lblk + map->m_len && |
1819 | (bh->b_state & BH_FLAGS) == map->m_flags) { | 1826 | (bh->b_state & BH_FLAGS) == map->m_flags) { |
1820 | map->m_len++; | 1827 | map->m_len++; |
1821 | return true; | 1828 | return true; |
1822 | } | 1829 | } |
1823 | return false; | 1830 | return false; |
1824 | } | 1831 | } |
1825 | 1832 | ||
1826 | /* | 1833 | /* |
1827 | * mpage_process_page_bufs - submit page buffers for IO or add them to extent | 1834 | * mpage_process_page_bufs - submit page buffers for IO or add them to extent |
1828 | * | 1835 | * |
1829 | * @mpd - extent of blocks for mapping | 1836 | * @mpd - extent of blocks for mapping |
1830 | * @head - the first buffer in the page | 1837 | * @head - the first buffer in the page |
1831 | * @bh - buffer we should start processing from | 1838 | * @bh - buffer we should start processing from |
1832 | * @lblk - logical number of the block in the file corresponding to @bh | 1839 | * @lblk - logical number of the block in the file corresponding to @bh |
1833 | * | 1840 | * |
1834 | * Walk through page buffers from @bh upto @head (exclusive) and either submit | 1841 | * Walk through page buffers from @bh upto @head (exclusive) and either submit |
1835 | * the page for IO if all buffers in this page were mapped and there's no | 1842 | * the page for IO if all buffers in this page were mapped and there's no |
1836 | * accumulated extent of buffers to map or add buffers in the page to the | 1843 | * accumulated extent of buffers to map or add buffers in the page to the |
1837 | * extent of buffers to map. The function returns 1 if the caller can continue | 1844 | * extent of buffers to map. The function returns 1 if the caller can continue |
1838 | * by processing the next page, 0 if it should stop adding buffers to the | 1845 | * by processing the next page, 0 if it should stop adding buffers to the |
1839 | * extent to map because we cannot extend it anymore. It can also return value | 1846 | * extent to map because we cannot extend it anymore. It can also return value |
1840 | * < 0 in case of error during IO submission. | 1847 | * < 0 in case of error during IO submission. |
1841 | */ | 1848 | */ |
1842 | static int mpage_process_page_bufs(struct mpage_da_data *mpd, | 1849 | static int mpage_process_page_bufs(struct mpage_da_data *mpd, |
1843 | struct buffer_head *head, | 1850 | struct buffer_head *head, |
1844 | struct buffer_head *bh, | 1851 | struct buffer_head *bh, |
1845 | ext4_lblk_t lblk) | 1852 | ext4_lblk_t lblk) |
1846 | { | 1853 | { |
1847 | struct inode *inode = mpd->inode; | 1854 | struct inode *inode = mpd->inode; |
1848 | int err; | 1855 | int err; |
1849 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | 1856 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) |
1850 | >> inode->i_blkbits; | 1857 | >> inode->i_blkbits; |
1851 | 1858 | ||
1852 | do { | 1859 | do { |
1853 | BUG_ON(buffer_locked(bh)); | 1860 | BUG_ON(buffer_locked(bh)); |
1854 | 1861 | ||
1855 | if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) { | 1862 | if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) { |
1856 | /* Found extent to map? */ | 1863 | /* Found extent to map? */ |
1857 | if (mpd->map.m_len) | 1864 | if (mpd->map.m_len) |
1858 | return 0; | 1865 | return 0; |
1859 | /* Everything mapped so far and we hit EOF */ | 1866 | /* Everything mapped so far and we hit EOF */ |
1860 | break; | 1867 | break; |
1861 | } | 1868 | } |
1862 | } while (lblk++, (bh = bh->b_this_page) != head); | 1869 | } while (lblk++, (bh = bh->b_this_page) != head); |
1863 | /* So far everything mapped? Submit the page for IO. */ | 1870 | /* So far everything mapped? Submit the page for IO. */ |
1864 | if (mpd->map.m_len == 0) { | 1871 | if (mpd->map.m_len == 0) { |
1865 | err = mpage_submit_page(mpd, head->b_page); | 1872 | err = mpage_submit_page(mpd, head->b_page); |
1866 | if (err < 0) | 1873 | if (err < 0) |
1867 | return err; | 1874 | return err; |
1868 | } | 1875 | } |
1869 | return lblk < blocks; | 1876 | return lblk < blocks; |
1870 | } | 1877 | } |
1871 | 1878 | ||
1872 | /* | 1879 | /* |
1873 | * mpage_map_buffers - update buffers corresponding to changed extent and | 1880 | * mpage_map_buffers - update buffers corresponding to changed extent and |
1874 | * submit fully mapped pages for IO | 1881 | * submit fully mapped pages for IO |
1875 | * | 1882 | * |
1876 | * @mpd - description of extent to map, on return next extent to map | 1883 | * @mpd - description of extent to map, on return next extent to map |
1877 | * | 1884 | * |
1878 | * Scan buffers corresponding to changed extent (we expect corresponding pages | 1885 | * Scan buffers corresponding to changed extent (we expect corresponding pages |
1879 | * to be already locked) and update buffer state according to new extent state. | 1886 | * to be already locked) and update buffer state according to new extent state. |
1880 | * We map delalloc buffers to their physical location, clear unwritten bits, | 1887 | * We map delalloc buffers to their physical location, clear unwritten bits, |
1881 | * and mark buffers as uninit when we perform writes to unwritten extents | 1888 | * and mark buffers as uninit when we perform writes to unwritten extents |
1882 | * and do extent conversion after IO is finished. If the last page is not fully | 1889 | * and do extent conversion after IO is finished. If the last page is not fully |
1883 | * mapped, we update @map to the next extent in the last page that needs | 1890 | * mapped, we update @map to the next extent in the last page that needs |
1884 | * mapping. Otherwise we submit the page for IO. | 1891 | * mapping. Otherwise we submit the page for IO. |
1885 | */ | 1892 | */ |
1886 | static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | 1893 | static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) |
1887 | { | 1894 | { |
1888 | struct pagevec pvec; | 1895 | struct pagevec pvec; |
1889 | int nr_pages, i; | 1896 | int nr_pages, i; |
1890 | struct inode *inode = mpd->inode; | 1897 | struct inode *inode = mpd->inode; |
1891 | struct buffer_head *head, *bh; | 1898 | struct buffer_head *head, *bh; |
1892 | int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; | 1899 | int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; |
1893 | pgoff_t start, end; | 1900 | pgoff_t start, end; |
1894 | ext4_lblk_t lblk; | 1901 | ext4_lblk_t lblk; |
1895 | sector_t pblock; | 1902 | sector_t pblock; |
1896 | int err; | 1903 | int err; |
1897 | 1904 | ||
1898 | start = mpd->map.m_lblk >> bpp_bits; | 1905 | start = mpd->map.m_lblk >> bpp_bits; |
1899 | end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; | 1906 | end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; |
1900 | lblk = start << bpp_bits; | 1907 | lblk = start << bpp_bits; |
1901 | pblock = mpd->map.m_pblk; | 1908 | pblock = mpd->map.m_pblk; |
1902 | 1909 | ||
1903 | pagevec_init(&pvec, 0); | 1910 | pagevec_init(&pvec, 0); |
1904 | while (start <= end) { | 1911 | while (start <= end) { |
1905 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start, | 1912 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start, |
1906 | PAGEVEC_SIZE); | 1913 | PAGEVEC_SIZE); |
1907 | if (nr_pages == 0) | 1914 | if (nr_pages == 0) |
1908 | break; | 1915 | break; |
1909 | for (i = 0; i < nr_pages; i++) { | 1916 | for (i = 0; i < nr_pages; i++) { |
1910 | struct page *page = pvec.pages[i]; | 1917 | struct page *page = pvec.pages[i]; |
1911 | 1918 | ||
1912 | if (page->index > end) | 1919 | if (page->index > end) |
1913 | break; | 1920 | break; |
1914 | /* Up to 'end' pages must be contiguous */ | 1921 | /* Up to 'end' pages must be contiguous */ |
1915 | BUG_ON(page->index != start); | 1922 | BUG_ON(page->index != start); |
1916 | bh = head = page_buffers(page); | 1923 | bh = head = page_buffers(page); |
1917 | do { | 1924 | do { |
1918 | if (lblk < mpd->map.m_lblk) | 1925 | if (lblk < mpd->map.m_lblk) |
1919 | continue; | 1926 | continue; |
1920 | if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { | 1927 | if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { |
1921 | /* | 1928 | /* |
1922 | * Buffer after end of mapped extent. | 1929 | * Buffer after end of mapped extent. |
1923 | * Find next buffer in the page to map. | 1930 | * Find next buffer in the page to map. |
1924 | */ | 1931 | */ |
1925 | mpd->map.m_len = 0; | 1932 | mpd->map.m_len = 0; |
1926 | mpd->map.m_flags = 0; | 1933 | mpd->map.m_flags = 0; |
1927 | /* | 1934 | /* |
1928 | * FIXME: If dioread_nolock supports | 1935 | * FIXME: If dioread_nolock supports |
1929 | * blocksize < pagesize, we need to make | 1936 | * blocksize < pagesize, we need to make |
1930 | * sure we add size mapped so far to | 1937 | * sure we add size mapped so far to |
1931 | * io_end->size as the following call | 1938 | * io_end->size as the following call |
1932 | * can submit the page for IO. | 1939 | * can submit the page for IO. |
1933 | */ | 1940 | */ |
1934 | err = mpage_process_page_bufs(mpd, head, | 1941 | err = mpage_process_page_bufs(mpd, head, |
1935 | bh, lblk); | 1942 | bh, lblk); |
1936 | pagevec_release(&pvec); | 1943 | pagevec_release(&pvec); |
1937 | if (err > 0) | 1944 | if (err > 0) |
1938 | err = 0; | 1945 | err = 0; |
1939 | return err; | 1946 | return err; |
1940 | } | 1947 | } |
1941 | if (buffer_delay(bh)) { | 1948 | if (buffer_delay(bh)) { |
1942 | clear_buffer_delay(bh); | 1949 | clear_buffer_delay(bh); |
1943 | bh->b_blocknr = pblock++; | 1950 | bh->b_blocknr = pblock++; |
1944 | } | 1951 | } |
1945 | clear_buffer_unwritten(bh); | 1952 | clear_buffer_unwritten(bh); |
1946 | } while (lblk++, (bh = bh->b_this_page) != head); | 1953 | } while (lblk++, (bh = bh->b_this_page) != head); |
1947 | 1954 | ||
1948 | /* | 1955 | /* |
1949 | * FIXME: This is going to break if dioread_nolock | 1956 | * FIXME: This is going to break if dioread_nolock |
1950 | * supports blocksize < pagesize as we will try to | 1957 | * supports blocksize < pagesize as we will try to |
1951 | * convert potentially unmapped parts of inode. | 1958 | * convert potentially unmapped parts of inode. |
1952 | */ | 1959 | */ |
1953 | mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; | 1960 | mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; |
1954 | /* Page fully mapped - let IO run! */ | 1961 | /* Page fully mapped - let IO run! */ |
1955 | err = mpage_submit_page(mpd, page); | 1962 | err = mpage_submit_page(mpd, page); |
1956 | if (err < 0) { | 1963 | if (err < 0) { |
1957 | pagevec_release(&pvec); | 1964 | pagevec_release(&pvec); |
1958 | return err; | 1965 | return err; |
1959 | } | 1966 | } |
1960 | start++; | 1967 | start++; |
1961 | } | 1968 | } |
1962 | pagevec_release(&pvec); | 1969 | pagevec_release(&pvec); |
1963 | } | 1970 | } |
1964 | /* Extent fully mapped and matches with page boundary. We are done. */ | 1971 | /* Extent fully mapped and matches with page boundary. We are done. */ |
1965 | mpd->map.m_len = 0; | 1972 | mpd->map.m_len = 0; |
1966 | mpd->map.m_flags = 0; | 1973 | mpd->map.m_flags = 0; |
1967 | return 0; | 1974 | return 0; |
1968 | } | 1975 | } |
1969 | 1976 | ||
1970 | static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | 1977 | static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) |
1971 | { | 1978 | { |
1972 | struct inode *inode = mpd->inode; | 1979 | struct inode *inode = mpd->inode; |
1973 | struct ext4_map_blocks *map = &mpd->map; | 1980 | struct ext4_map_blocks *map = &mpd->map; |
1974 | int get_blocks_flags; | 1981 | int get_blocks_flags; |
1975 | int err, dioread_nolock; | 1982 | int err, dioread_nolock; |
1976 | 1983 | ||
1977 | trace_ext4_da_write_pages_extent(inode, map); | 1984 | trace_ext4_da_write_pages_extent(inode, map); |
1978 | /* | 1985 | /* |
1979 | * Call ext4_map_blocks() to allocate any delayed allocation blocks, or | 1986 | * Call ext4_map_blocks() to allocate any delayed allocation blocks, or |
1980 | * to convert an unwritten extent to be initialized (in the case | 1987 | * to convert an unwritten extent to be initialized (in the case |
1981 | * where we have written into one or more preallocated blocks). It is | 1988 | * where we have written into one or more preallocated blocks). It is |
1982 | * possible that we're going to need more metadata blocks than | 1989 | * possible that we're going to need more metadata blocks than |
1983 | * previously reserved. However we must not fail because we're in | 1990 | * previously reserved. However we must not fail because we're in |
1984 | * writeback and there is nothing we can do about it so it might result | 1991 | * writeback and there is nothing we can do about it so it might result |
1985 | * in data loss. So use reserved blocks to allocate metadata if | 1992 | * in data loss. So use reserved blocks to allocate metadata if |
1986 | * possible. | 1993 | * possible. |
1987 | * | 1994 | * |
1988 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if | 1995 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if |
1989 | * the blocks in question are delalloc blocks. This indicates | 1996 | * the blocks in question are delalloc blocks. This indicates |
1990 | * that the blocks and quotas has already been checked when | 1997 | * that the blocks and quotas has already been checked when |
1991 | * the data was copied into the page cache. | 1998 | * the data was copied into the page cache. |
1992 | */ | 1999 | */ |
1993 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | 2000 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | |
1994 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | 2001 | EXT4_GET_BLOCKS_METADATA_NOFAIL; |
1995 | dioread_nolock = ext4_should_dioread_nolock(inode); | 2002 | dioread_nolock = ext4_should_dioread_nolock(inode); |
1996 | if (dioread_nolock) | 2003 | if (dioread_nolock) |
1997 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | 2004 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; |
1998 | if (map->m_flags & (1 << BH_Delay)) | 2005 | if (map->m_flags & (1 << BH_Delay)) |
1999 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | 2006 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; |
2000 | 2007 | ||
2001 | err = ext4_map_blocks(handle, inode, map, get_blocks_flags); | 2008 | err = ext4_map_blocks(handle, inode, map, get_blocks_flags); |
2002 | if (err < 0) | 2009 | if (err < 0) |
2003 | return err; | 2010 | return err; |
2004 | if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) { | 2011 | if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) { |
2005 | if (!mpd->io_submit.io_end->handle && | 2012 | if (!mpd->io_submit.io_end->handle && |
2006 | ext4_handle_valid(handle)) { | 2013 | ext4_handle_valid(handle)) { |
2007 | mpd->io_submit.io_end->handle = handle->h_rsv_handle; | 2014 | mpd->io_submit.io_end->handle = handle->h_rsv_handle; |
2008 | handle->h_rsv_handle = NULL; | 2015 | handle->h_rsv_handle = NULL; |
2009 | } | 2016 | } |
2010 | ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); | 2017 | ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); |
2011 | } | 2018 | } |
2012 | 2019 | ||
2013 | BUG_ON(map->m_len == 0); | 2020 | BUG_ON(map->m_len == 0); |
2014 | if (map->m_flags & EXT4_MAP_NEW) { | 2021 | if (map->m_flags & EXT4_MAP_NEW) { |
2015 | struct block_device *bdev = inode->i_sb->s_bdev; | 2022 | struct block_device *bdev = inode->i_sb->s_bdev; |
2016 | int i; | 2023 | int i; |
2017 | 2024 | ||
2018 | for (i = 0; i < map->m_len; i++) | 2025 | for (i = 0; i < map->m_len; i++) |
2019 | unmap_underlying_metadata(bdev, map->m_pblk + i); | 2026 | unmap_underlying_metadata(bdev, map->m_pblk + i); |
2020 | } | 2027 | } |
2021 | return 0; | 2028 | return 0; |
2022 | } | 2029 | } |
2023 | 2030 | ||
2024 | /* | 2031 | /* |
2025 | * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length | 2032 | * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length |
2026 | * mpd->len and submit pages underlying it for IO | 2033 | * mpd->len and submit pages underlying it for IO |
2027 | * | 2034 | * |
2028 | * @handle - handle for journal operations | 2035 | * @handle - handle for journal operations |
2029 | * @mpd - extent to map | 2036 | * @mpd - extent to map |
2030 | * @give_up_on_write - we set this to true iff there is a fatal error and there | 2037 | * @give_up_on_write - we set this to true iff there is a fatal error and there |
2031 | * is no hope of writing the data. The caller should discard | 2038 | * is no hope of writing the data. The caller should discard |
2032 | * dirty pages to avoid infinite loops. | 2039 | * dirty pages to avoid infinite loops. |
2033 | * | 2040 | * |
2034 | * The function maps extent starting at mpd->lblk of length mpd->len. If it is | 2041 | * The function maps extent starting at mpd->lblk of length mpd->len. If it is |
2035 | * delayed, blocks are allocated, if it is unwritten, we may need to convert | 2042 | * delayed, blocks are allocated, if it is unwritten, we may need to convert |
2036 | * them to initialized or split the described range from larger unwritten | 2043 | * them to initialized or split the described range from larger unwritten |
2037 | * extent. Note that we need not map all the described range since allocation | 2044 | * extent. Note that we need not map all the described range since allocation |
2038 | * can return less blocks or the range is covered by more unwritten extents. We | 2045 | * can return less blocks or the range is covered by more unwritten extents. We |
2039 | * cannot map more because we are limited by reserved transaction credits. On | 2046 | * cannot map more because we are limited by reserved transaction credits. On |
2040 | * the other hand we always make sure that the last touched page is fully | 2047 | * the other hand we always make sure that the last touched page is fully |
2041 | * mapped so that it can be written out (and thus forward progress is | 2048 | * mapped so that it can be written out (and thus forward progress is |
2042 | * guaranteed). After mapping we submit all mapped pages for IO. | 2049 | * guaranteed). After mapping we submit all mapped pages for IO. |
2043 | */ | 2050 | */ |
2044 | static int mpage_map_and_submit_extent(handle_t *handle, | 2051 | static int mpage_map_and_submit_extent(handle_t *handle, |
2045 | struct mpage_da_data *mpd, | 2052 | struct mpage_da_data *mpd, |
2046 | bool *give_up_on_write) | 2053 | bool *give_up_on_write) |
2047 | { | 2054 | { |
2048 | struct inode *inode = mpd->inode; | 2055 | struct inode *inode = mpd->inode; |
2049 | struct ext4_map_blocks *map = &mpd->map; | 2056 | struct ext4_map_blocks *map = &mpd->map; |
2050 | int err; | 2057 | int err; |
2051 | loff_t disksize; | 2058 | loff_t disksize; |
2052 | int progress = 0; | 2059 | int progress = 0; |
2053 | 2060 | ||
2054 | mpd->io_submit.io_end->offset = | 2061 | mpd->io_submit.io_end->offset = |
2055 | ((loff_t)map->m_lblk) << inode->i_blkbits; | 2062 | ((loff_t)map->m_lblk) << inode->i_blkbits; |
2056 | do { | 2063 | do { |
2057 | err = mpage_map_one_extent(handle, mpd); | 2064 | err = mpage_map_one_extent(handle, mpd); |
2058 | if (err < 0) { | 2065 | if (err < 0) { |
2059 | struct super_block *sb = inode->i_sb; | 2066 | struct super_block *sb = inode->i_sb; |
2060 | 2067 | ||
2061 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | 2068 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) |
2062 | goto invalidate_dirty_pages; | 2069 | goto invalidate_dirty_pages; |
2063 | /* | 2070 | /* |
2064 | * Let the uper layers retry transient errors. | 2071 | * Let the uper layers retry transient errors. |
2065 | * In the case of ENOSPC, if ext4_count_free_blocks() | 2072 | * In the case of ENOSPC, if ext4_count_free_blocks() |
2066 | * is non-zero, a commit should free up blocks. | 2073 | * is non-zero, a commit should free up blocks. |
2067 | */ | 2074 | */ |
2068 | if ((err == -ENOMEM) || | 2075 | if ((err == -ENOMEM) || |
2069 | (err == -ENOSPC && ext4_count_free_clusters(sb))) { | 2076 | (err == -ENOSPC && ext4_count_free_clusters(sb))) { |
2070 | if (progress) | 2077 | if (progress) |
2071 | goto update_disksize; | 2078 | goto update_disksize; |
2072 | return err; | 2079 | return err; |
2073 | } | 2080 | } |
2074 | ext4_msg(sb, KERN_CRIT, | 2081 | ext4_msg(sb, KERN_CRIT, |
2075 | "Delayed block allocation failed for " | 2082 | "Delayed block allocation failed for " |
2076 | "inode %lu at logical offset %llu with" | 2083 | "inode %lu at logical offset %llu with" |
2077 | " max blocks %u with error %d", | 2084 | " max blocks %u with error %d", |
2078 | inode->i_ino, | 2085 | inode->i_ino, |
2079 | (unsigned long long)map->m_lblk, | 2086 | (unsigned long long)map->m_lblk, |
2080 | (unsigned)map->m_len, -err); | 2087 | (unsigned)map->m_len, -err); |
2081 | ext4_msg(sb, KERN_CRIT, | 2088 | ext4_msg(sb, KERN_CRIT, |
2082 | "This should not happen!! Data will " | 2089 | "This should not happen!! Data will " |
2083 | "be lost\n"); | 2090 | "be lost\n"); |
2084 | if (err == -ENOSPC) | 2091 | if (err == -ENOSPC) |
2085 | ext4_print_free_blocks(inode); | 2092 | ext4_print_free_blocks(inode); |
2086 | invalidate_dirty_pages: | 2093 | invalidate_dirty_pages: |
2087 | *give_up_on_write = true; | 2094 | *give_up_on_write = true; |
2088 | return err; | 2095 | return err; |
2089 | } | 2096 | } |
2090 | progress = 1; | 2097 | progress = 1; |
2091 | /* | 2098 | /* |
2092 | * Update buffer state, submit mapped pages, and get us new | 2099 | * Update buffer state, submit mapped pages, and get us new |
2093 | * extent to map | 2100 | * extent to map |
2094 | */ | 2101 | */ |
2095 | err = mpage_map_and_submit_buffers(mpd); | 2102 | err = mpage_map_and_submit_buffers(mpd); |
2096 | if (err < 0) | 2103 | if (err < 0) |
2097 | goto update_disksize; | 2104 | goto update_disksize; |
2098 | } while (map->m_len); | 2105 | } while (map->m_len); |
2099 | 2106 | ||
2100 | update_disksize: | 2107 | update_disksize: |
2101 | /* | 2108 | /* |
2102 | * Update on-disk size after IO is submitted. Races with | 2109 | * Update on-disk size after IO is submitted. Races with |
2103 | * truncate are avoided by checking i_size under i_data_sem. | 2110 | * truncate are avoided by checking i_size under i_data_sem. |
2104 | */ | 2111 | */ |
2105 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; | 2112 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; |
2106 | if (disksize > EXT4_I(inode)->i_disksize) { | 2113 | if (disksize > EXT4_I(inode)->i_disksize) { |
2107 | int err2; | 2114 | int err2; |
2108 | loff_t i_size; | 2115 | loff_t i_size; |
2109 | 2116 | ||
2110 | down_write(&EXT4_I(inode)->i_data_sem); | 2117 | down_write(&EXT4_I(inode)->i_data_sem); |
2111 | i_size = i_size_read(inode); | 2118 | i_size = i_size_read(inode); |
2112 | if (disksize > i_size) | 2119 | if (disksize > i_size) |
2113 | disksize = i_size; | 2120 | disksize = i_size; |
2114 | if (disksize > EXT4_I(inode)->i_disksize) | 2121 | if (disksize > EXT4_I(inode)->i_disksize) |
2115 | EXT4_I(inode)->i_disksize = disksize; | 2122 | EXT4_I(inode)->i_disksize = disksize; |
2116 | err2 = ext4_mark_inode_dirty(handle, inode); | 2123 | err2 = ext4_mark_inode_dirty(handle, inode); |
2117 | up_write(&EXT4_I(inode)->i_data_sem); | 2124 | up_write(&EXT4_I(inode)->i_data_sem); |
2118 | if (err2) | 2125 | if (err2) |
2119 | ext4_error(inode->i_sb, | 2126 | ext4_error(inode->i_sb, |
2120 | "Failed to mark inode %lu dirty", | 2127 | "Failed to mark inode %lu dirty", |
2121 | inode->i_ino); | 2128 | inode->i_ino); |
2122 | if (!err) | 2129 | if (!err) |
2123 | err = err2; | 2130 | err = err2; |
2124 | } | 2131 | } |
2125 | return err; | 2132 | return err; |
2126 | } | 2133 | } |
2127 | 2134 | ||
2128 | /* | 2135 | /* |
2129 | * Calculate the total number of credits to reserve for one writepages | 2136 | * Calculate the total number of credits to reserve for one writepages |
2130 | * iteration. This is called from ext4_writepages(). We map an extent of | 2137 | * iteration. This is called from ext4_writepages(). We map an extent of |
2131 | * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping | 2138 | * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping |
2132 | * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + | 2139 | * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + |
2133 | * bpp - 1 blocks in bpp different extents. | 2140 | * bpp - 1 blocks in bpp different extents. |
2134 | */ | 2141 | */ |
2135 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | 2142 | static int ext4_da_writepages_trans_blocks(struct inode *inode) |
2136 | { | 2143 | { |
2137 | int bpp = ext4_journal_blocks_per_page(inode); | 2144 | int bpp = ext4_journal_blocks_per_page(inode); |
2138 | 2145 | ||
2139 | return ext4_meta_trans_blocks(inode, | 2146 | return ext4_meta_trans_blocks(inode, |
2140 | MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); | 2147 | MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); |
2141 | } | 2148 | } |
2142 | 2149 | ||
2143 | /* | 2150 | /* |
2144 | * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages | 2151 | * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages |
2145 | * and underlying extent to map | 2152 | * and underlying extent to map |
2146 | * | 2153 | * |
2147 | * @mpd - where to look for pages | 2154 | * @mpd - where to look for pages |
2148 | * | 2155 | * |
2149 | * Walk dirty pages in the mapping. If they are fully mapped, submit them for | 2156 | * Walk dirty pages in the mapping. If they are fully mapped, submit them for |
2150 | * IO immediately. When we find a page which isn't mapped we start accumulating | 2157 | * IO immediately. When we find a page which isn't mapped we start accumulating |
2151 | * extent of buffers underlying these pages that needs mapping (formed by | 2158 | * extent of buffers underlying these pages that needs mapping (formed by |
2152 | * either delayed or unwritten buffers). We also lock the pages containing | 2159 | * either delayed or unwritten buffers). We also lock the pages containing |
2153 | * these buffers. The extent found is returned in @mpd structure (starting at | 2160 | * these buffers. The extent found is returned in @mpd structure (starting at |
2154 | * mpd->lblk with length mpd->len blocks). | 2161 | * mpd->lblk with length mpd->len blocks). |
2155 | * | 2162 | * |
2156 | * Note that this function can attach bios to one io_end structure which are | 2163 | * Note that this function can attach bios to one io_end structure which are |
2157 | * neither logically nor physically contiguous. Although it may seem as an | 2164 | * neither logically nor physically contiguous. Although it may seem as an |
2158 | * unnecessary complication, it is actually inevitable in blocksize < pagesize | 2165 | * unnecessary complication, it is actually inevitable in blocksize < pagesize |
2159 | * case as we need to track IO to all buffers underlying a page in one io_end. | 2166 | * case as we need to track IO to all buffers underlying a page in one io_end. |
2160 | */ | 2167 | */ |
2161 | static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) | 2168 | static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) |
2162 | { | 2169 | { |
2163 | struct address_space *mapping = mpd->inode->i_mapping; | 2170 | struct address_space *mapping = mpd->inode->i_mapping; |
2164 | struct pagevec pvec; | 2171 | struct pagevec pvec; |
2165 | unsigned int nr_pages; | 2172 | unsigned int nr_pages; |
2166 | long left = mpd->wbc->nr_to_write; | 2173 | long left = mpd->wbc->nr_to_write; |
2167 | pgoff_t index = mpd->first_page; | 2174 | pgoff_t index = mpd->first_page; |
2168 | pgoff_t end = mpd->last_page; | 2175 | pgoff_t end = mpd->last_page; |
2169 | int tag; | 2176 | int tag; |
2170 | int i, err = 0; | 2177 | int i, err = 0; |
2171 | int blkbits = mpd->inode->i_blkbits; | 2178 | int blkbits = mpd->inode->i_blkbits; |
2172 | ext4_lblk_t lblk; | 2179 | ext4_lblk_t lblk; |
2173 | struct buffer_head *head; | 2180 | struct buffer_head *head; |
2174 | 2181 | ||
2175 | if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) | 2182 | if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) |
2176 | tag = PAGECACHE_TAG_TOWRITE; | 2183 | tag = PAGECACHE_TAG_TOWRITE; |
2177 | else | 2184 | else |
2178 | tag = PAGECACHE_TAG_DIRTY; | 2185 | tag = PAGECACHE_TAG_DIRTY; |
2179 | 2186 | ||
2180 | pagevec_init(&pvec, 0); | 2187 | pagevec_init(&pvec, 0); |
2181 | mpd->map.m_len = 0; | 2188 | mpd->map.m_len = 0; |
2182 | mpd->next_page = index; | 2189 | mpd->next_page = index; |
2183 | while (index <= end) { | 2190 | while (index <= end) { |
2184 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2191 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2185 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2192 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2186 | if (nr_pages == 0) | 2193 | if (nr_pages == 0) |
2187 | goto out; | 2194 | goto out; |
2188 | 2195 | ||
2189 | for (i = 0; i < nr_pages; i++) { | 2196 | for (i = 0; i < nr_pages; i++) { |
2190 | struct page *page = pvec.pages[i]; | 2197 | struct page *page = pvec.pages[i]; |
2191 | 2198 | ||
2192 | /* | 2199 | /* |
2193 | * At this point, the page may be truncated or | 2200 | * At this point, the page may be truncated or |
2194 | * invalidated (changing page->mapping to NULL), or | 2201 | * invalidated (changing page->mapping to NULL), or |
2195 | * even swizzled back from swapper_space to tmpfs file | 2202 | * even swizzled back from swapper_space to tmpfs file |
2196 | * mapping. However, page->index will not change | 2203 | * mapping. However, page->index will not change |
2197 | * because we have a reference on the page. | 2204 | * because we have a reference on the page. |
2198 | */ | 2205 | */ |
2199 | if (page->index > end) | 2206 | if (page->index > end) |
2200 | goto out; | 2207 | goto out; |
2201 | 2208 | ||
2202 | /* | 2209 | /* |
2203 | * Accumulated enough dirty pages? This doesn't apply | 2210 | * Accumulated enough dirty pages? This doesn't apply |
2204 | * to WB_SYNC_ALL mode. For integrity sync we have to | 2211 | * to WB_SYNC_ALL mode. For integrity sync we have to |
2205 | * keep going because someone may be concurrently | 2212 | * keep going because someone may be concurrently |
2206 | * dirtying pages, and we might have synced a lot of | 2213 | * dirtying pages, and we might have synced a lot of |
2207 | * newly appeared dirty pages, but have not synced all | 2214 | * newly appeared dirty pages, but have not synced all |
2208 | * of the old dirty pages. | 2215 | * of the old dirty pages. |
2209 | */ | 2216 | */ |
2210 | if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0) | 2217 | if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0) |
2211 | goto out; | 2218 | goto out; |
2212 | 2219 | ||
2213 | /* If we can't merge this page, we are done. */ | 2220 | /* If we can't merge this page, we are done. */ |
2214 | if (mpd->map.m_len > 0 && mpd->next_page != page->index) | 2221 | if (mpd->map.m_len > 0 && mpd->next_page != page->index) |
2215 | goto out; | 2222 | goto out; |
2216 | 2223 | ||
2217 | lock_page(page); | 2224 | lock_page(page); |
2218 | /* | 2225 | /* |
2219 | * If the page is no longer dirty, or its mapping no | 2226 | * If the page is no longer dirty, or its mapping no |
2220 | * longer corresponds to inode we are writing (which | 2227 | * longer corresponds to inode we are writing (which |
2221 | * means it has been truncated or invalidated), or the | 2228 | * means it has been truncated or invalidated), or the |
2222 | * page is already under writeback and we are not doing | 2229 | * page is already under writeback and we are not doing |
2223 | * a data integrity writeback, skip the page | 2230 | * a data integrity writeback, skip the page |
2224 | */ | 2231 | */ |
2225 | if (!PageDirty(page) || | 2232 | if (!PageDirty(page) || |
2226 | (PageWriteback(page) && | 2233 | (PageWriteback(page) && |
2227 | (mpd->wbc->sync_mode == WB_SYNC_NONE)) || | 2234 | (mpd->wbc->sync_mode == WB_SYNC_NONE)) || |
2228 | unlikely(page->mapping != mapping)) { | 2235 | unlikely(page->mapping != mapping)) { |
2229 | unlock_page(page); | 2236 | unlock_page(page); |
2230 | continue; | 2237 | continue; |
2231 | } | 2238 | } |
2232 | 2239 | ||
2233 | wait_on_page_writeback(page); | 2240 | wait_on_page_writeback(page); |
2234 | BUG_ON(PageWriteback(page)); | 2241 | BUG_ON(PageWriteback(page)); |
2235 | 2242 | ||
2236 | if (mpd->map.m_len == 0) | 2243 | if (mpd->map.m_len == 0) |
2237 | mpd->first_page = page->index; | 2244 | mpd->first_page = page->index; |
2238 | mpd->next_page = page->index + 1; | 2245 | mpd->next_page = page->index + 1; |
2239 | /* Add all dirty buffers to mpd */ | 2246 | /* Add all dirty buffers to mpd */ |
2240 | lblk = ((ext4_lblk_t)page->index) << | 2247 | lblk = ((ext4_lblk_t)page->index) << |
2241 | (PAGE_CACHE_SHIFT - blkbits); | 2248 | (PAGE_CACHE_SHIFT - blkbits); |
2242 | head = page_buffers(page); | 2249 | head = page_buffers(page); |
2243 | err = mpage_process_page_bufs(mpd, head, head, lblk); | 2250 | err = mpage_process_page_bufs(mpd, head, head, lblk); |
2244 | if (err <= 0) | 2251 | if (err <= 0) |
2245 | goto out; | 2252 | goto out; |
2246 | err = 0; | 2253 | err = 0; |
2247 | left--; | 2254 | left--; |
2248 | } | 2255 | } |
2249 | pagevec_release(&pvec); | 2256 | pagevec_release(&pvec); |
2250 | cond_resched(); | 2257 | cond_resched(); |
2251 | } | 2258 | } |
2252 | return 0; | 2259 | return 0; |
2253 | out: | 2260 | out: |
2254 | pagevec_release(&pvec); | 2261 | pagevec_release(&pvec); |
2255 | return err; | 2262 | return err; |
2256 | } | 2263 | } |
2257 | 2264 | ||
2258 | static int __writepage(struct page *page, struct writeback_control *wbc, | 2265 | static int __writepage(struct page *page, struct writeback_control *wbc, |
2259 | void *data) | 2266 | void *data) |
2260 | { | 2267 | { |
2261 | struct address_space *mapping = data; | 2268 | struct address_space *mapping = data; |
2262 | int ret = ext4_writepage(page, wbc); | 2269 | int ret = ext4_writepage(page, wbc); |
2263 | mapping_set_error(mapping, ret); | 2270 | mapping_set_error(mapping, ret); |
2264 | return ret; | 2271 | return ret; |
2265 | } | 2272 | } |
2266 | 2273 | ||
2267 | static int ext4_writepages(struct address_space *mapping, | 2274 | static int ext4_writepages(struct address_space *mapping, |
2268 | struct writeback_control *wbc) | 2275 | struct writeback_control *wbc) |
2269 | { | 2276 | { |
2270 | pgoff_t writeback_index = 0; | 2277 | pgoff_t writeback_index = 0; |
2271 | long nr_to_write = wbc->nr_to_write; | 2278 | long nr_to_write = wbc->nr_to_write; |
2272 | int range_whole = 0; | 2279 | int range_whole = 0; |
2273 | int cycled = 1; | 2280 | int cycled = 1; |
2274 | handle_t *handle = NULL; | 2281 | handle_t *handle = NULL; |
2275 | struct mpage_da_data mpd; | 2282 | struct mpage_da_data mpd; |
2276 | struct inode *inode = mapping->host; | 2283 | struct inode *inode = mapping->host; |
2277 | int needed_blocks, rsv_blocks = 0, ret = 0; | 2284 | int needed_blocks, rsv_blocks = 0, ret = 0; |
2278 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2285 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2279 | bool done; | 2286 | bool done; |
2280 | struct blk_plug plug; | 2287 | struct blk_plug plug; |
2281 | bool give_up_on_write = false; | 2288 | bool give_up_on_write = false; |
2282 | 2289 | ||
2283 | trace_ext4_writepages(inode, wbc); | 2290 | trace_ext4_writepages(inode, wbc); |
2284 | 2291 | ||
2285 | /* | 2292 | /* |
2286 | * No pages to write? This is mainly a kludge to avoid starting | 2293 | * No pages to write? This is mainly a kludge to avoid starting |
2287 | * a transaction for special inodes like journal inode on last iput() | 2294 | * a transaction for special inodes like journal inode on last iput() |
2288 | * because that could violate lock ordering on umount | 2295 | * because that could violate lock ordering on umount |
2289 | */ | 2296 | */ |
2290 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 2297 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2291 | goto out_writepages; | 2298 | goto out_writepages; |
2292 | 2299 | ||
2293 | if (ext4_should_journal_data(inode)) { | 2300 | if (ext4_should_journal_data(inode)) { |
2294 | struct blk_plug plug; | 2301 | struct blk_plug plug; |
2295 | 2302 | ||
2296 | blk_start_plug(&plug); | 2303 | blk_start_plug(&plug); |
2297 | ret = write_cache_pages(mapping, wbc, __writepage, mapping); | 2304 | ret = write_cache_pages(mapping, wbc, __writepage, mapping); |
2298 | blk_finish_plug(&plug); | 2305 | blk_finish_plug(&plug); |
2299 | goto out_writepages; | 2306 | goto out_writepages; |
2300 | } | 2307 | } |
2301 | 2308 | ||
2302 | /* | 2309 | /* |
2303 | * If the filesystem has aborted, it is read-only, so return | 2310 | * If the filesystem has aborted, it is read-only, so return |
2304 | * right away instead of dumping stack traces later on that | 2311 | * right away instead of dumping stack traces later on that |
2305 | * will obscure the real source of the problem. We test | 2312 | * will obscure the real source of the problem. We test |
2306 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because | 2313 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because |
2307 | * the latter could be true if the filesystem is mounted | 2314 | * the latter could be true if the filesystem is mounted |
2308 | * read-only, and in that case, ext4_writepages should | 2315 | * read-only, and in that case, ext4_writepages should |
2309 | * *never* be called, so if that ever happens, we would want | 2316 | * *never* be called, so if that ever happens, we would want |
2310 | * the stack trace. | 2317 | * the stack trace. |
2311 | */ | 2318 | */ |
2312 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { | 2319 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { |
2313 | ret = -EROFS; | 2320 | ret = -EROFS; |
2314 | goto out_writepages; | 2321 | goto out_writepages; |
2315 | } | 2322 | } |
2316 | 2323 | ||
2317 | if (ext4_should_dioread_nolock(inode)) { | 2324 | if (ext4_should_dioread_nolock(inode)) { |
2318 | /* | 2325 | /* |
2319 | * We may need to convert up to one extent per block in | 2326 | * We may need to convert up to one extent per block in |
2320 | * the page and we may dirty the inode. | 2327 | * the page and we may dirty the inode. |
2321 | */ | 2328 | */ |
2322 | rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); | 2329 | rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); |
2323 | } | 2330 | } |
2324 | 2331 | ||
2325 | /* | 2332 | /* |
2326 | * If we have inline data and arrive here, it means that | 2333 | * If we have inline data and arrive here, it means that |
2327 | * we will soon create the block for the 1st page, so | 2334 | * we will soon create the block for the 1st page, so |
2328 | * we'd better clear the inline data here. | 2335 | * we'd better clear the inline data here. |
2329 | */ | 2336 | */ |
2330 | if (ext4_has_inline_data(inode)) { | 2337 | if (ext4_has_inline_data(inode)) { |
2331 | /* Just inode will be modified... */ | 2338 | /* Just inode will be modified... */ |
2332 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); | 2339 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); |
2333 | if (IS_ERR(handle)) { | 2340 | if (IS_ERR(handle)) { |
2334 | ret = PTR_ERR(handle); | 2341 | ret = PTR_ERR(handle); |
2335 | goto out_writepages; | 2342 | goto out_writepages; |
2336 | } | 2343 | } |
2337 | BUG_ON(ext4_test_inode_state(inode, | 2344 | BUG_ON(ext4_test_inode_state(inode, |
2338 | EXT4_STATE_MAY_INLINE_DATA)); | 2345 | EXT4_STATE_MAY_INLINE_DATA)); |
2339 | ext4_destroy_inline_data(handle, inode); | 2346 | ext4_destroy_inline_data(handle, inode); |
2340 | ext4_journal_stop(handle); | 2347 | ext4_journal_stop(handle); |
2341 | } | 2348 | } |
2342 | 2349 | ||
2343 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2350 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
2344 | range_whole = 1; | 2351 | range_whole = 1; |
2345 | 2352 | ||
2346 | if (wbc->range_cyclic) { | 2353 | if (wbc->range_cyclic) { |
2347 | writeback_index = mapping->writeback_index; | 2354 | writeback_index = mapping->writeback_index; |
2348 | if (writeback_index) | 2355 | if (writeback_index) |
2349 | cycled = 0; | 2356 | cycled = 0; |
2350 | mpd.first_page = writeback_index; | 2357 | mpd.first_page = writeback_index; |
2351 | mpd.last_page = -1; | 2358 | mpd.last_page = -1; |
2352 | } else { | 2359 | } else { |
2353 | mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; | 2360 | mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; |
2354 | mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; | 2361 | mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; |
2355 | } | 2362 | } |
2356 | 2363 | ||
2357 | mpd.inode = inode; | 2364 | mpd.inode = inode; |
2358 | mpd.wbc = wbc; | 2365 | mpd.wbc = wbc; |
2359 | ext4_io_submit_init(&mpd.io_submit, wbc); | 2366 | ext4_io_submit_init(&mpd.io_submit, wbc); |
2360 | retry: | 2367 | retry: |
2361 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2368 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2362 | tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); | 2369 | tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); |
2363 | done = false; | 2370 | done = false; |
2364 | blk_start_plug(&plug); | 2371 | blk_start_plug(&plug); |
2365 | while (!done && mpd.first_page <= mpd.last_page) { | 2372 | while (!done && mpd.first_page <= mpd.last_page) { |
2366 | /* For each extent of pages we use new io_end */ | 2373 | /* For each extent of pages we use new io_end */ |
2367 | mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); | 2374 | mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); |
2368 | if (!mpd.io_submit.io_end) { | 2375 | if (!mpd.io_submit.io_end) { |
2369 | ret = -ENOMEM; | 2376 | ret = -ENOMEM; |
2370 | break; | 2377 | break; |
2371 | } | 2378 | } |
2372 | 2379 | ||
2373 | /* | 2380 | /* |
2374 | * We have two constraints: We find one extent to map and we | 2381 | * We have two constraints: We find one extent to map and we |
2375 | * must always write out whole page (makes a difference when | 2382 | * must always write out whole page (makes a difference when |
2376 | * blocksize < pagesize) so that we don't block on IO when we | 2383 | * blocksize < pagesize) so that we don't block on IO when we |
2377 | * try to write out the rest of the page. Journalled mode is | 2384 | * try to write out the rest of the page. Journalled mode is |
2378 | * not supported by delalloc. | 2385 | * not supported by delalloc. |
2379 | */ | 2386 | */ |
2380 | BUG_ON(ext4_should_journal_data(inode)); | 2387 | BUG_ON(ext4_should_journal_data(inode)); |
2381 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | 2388 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2382 | 2389 | ||
2383 | /* start a new transaction */ | 2390 | /* start a new transaction */ |
2384 | handle = ext4_journal_start_with_reserve(inode, | 2391 | handle = ext4_journal_start_with_reserve(inode, |
2385 | EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); | 2392 | EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); |
2386 | if (IS_ERR(handle)) { | 2393 | if (IS_ERR(handle)) { |
2387 | ret = PTR_ERR(handle); | 2394 | ret = PTR_ERR(handle); |
2388 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2395 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2389 | "%ld pages, ino %lu; err %d", __func__, | 2396 | "%ld pages, ino %lu; err %d", __func__, |
2390 | wbc->nr_to_write, inode->i_ino, ret); | 2397 | wbc->nr_to_write, inode->i_ino, ret); |
2391 | /* Release allocated io_end */ | 2398 | /* Release allocated io_end */ |
2392 | ext4_put_io_end(mpd.io_submit.io_end); | 2399 | ext4_put_io_end(mpd.io_submit.io_end); |
2393 | break; | 2400 | break; |
2394 | } | 2401 | } |
2395 | 2402 | ||
2396 | trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); | 2403 | trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); |
2397 | ret = mpage_prepare_extent_to_map(&mpd); | 2404 | ret = mpage_prepare_extent_to_map(&mpd); |
2398 | if (!ret) { | 2405 | if (!ret) { |
2399 | if (mpd.map.m_len) | 2406 | if (mpd.map.m_len) |
2400 | ret = mpage_map_and_submit_extent(handle, &mpd, | 2407 | ret = mpage_map_and_submit_extent(handle, &mpd, |
2401 | &give_up_on_write); | 2408 | &give_up_on_write); |
2402 | else { | 2409 | else { |
2403 | /* | 2410 | /* |
2404 | * We scanned the whole range (or exhausted | 2411 | * We scanned the whole range (or exhausted |
2405 | * nr_to_write), submitted what was mapped and | 2412 | * nr_to_write), submitted what was mapped and |
2406 | * didn't find anything needing mapping. We are | 2413 | * didn't find anything needing mapping. We are |
2407 | * done. | 2414 | * done. |
2408 | */ | 2415 | */ |
2409 | done = true; | 2416 | done = true; |
2410 | } | 2417 | } |
2411 | } | 2418 | } |
2412 | ext4_journal_stop(handle); | 2419 | ext4_journal_stop(handle); |
2413 | /* Submit prepared bio */ | 2420 | /* Submit prepared bio */ |
2414 | ext4_io_submit(&mpd.io_submit); | 2421 | ext4_io_submit(&mpd.io_submit); |
2415 | /* Unlock pages we didn't use */ | 2422 | /* Unlock pages we didn't use */ |
2416 | mpage_release_unused_pages(&mpd, give_up_on_write); | 2423 | mpage_release_unused_pages(&mpd, give_up_on_write); |
2417 | /* Drop our io_end reference we got from init */ | 2424 | /* Drop our io_end reference we got from init */ |
2418 | ext4_put_io_end(mpd.io_submit.io_end); | 2425 | ext4_put_io_end(mpd.io_submit.io_end); |
2419 | 2426 | ||
2420 | if (ret == -ENOSPC && sbi->s_journal) { | 2427 | if (ret == -ENOSPC && sbi->s_journal) { |
2421 | /* | 2428 | /* |
2422 | * Commit the transaction which would | 2429 | * Commit the transaction which would |
2423 | * free blocks released in the transaction | 2430 | * free blocks released in the transaction |
2424 | * and try again | 2431 | * and try again |
2425 | */ | 2432 | */ |
2426 | jbd2_journal_force_commit_nested(sbi->s_journal); | 2433 | jbd2_journal_force_commit_nested(sbi->s_journal); |
2427 | ret = 0; | 2434 | ret = 0; |
2428 | continue; | 2435 | continue; |
2429 | } | 2436 | } |
2430 | /* Fatal error - ENOMEM, EIO... */ | 2437 | /* Fatal error - ENOMEM, EIO... */ |
2431 | if (ret) | 2438 | if (ret) |
2432 | break; | 2439 | break; |
2433 | } | 2440 | } |
2434 | blk_finish_plug(&plug); | 2441 | blk_finish_plug(&plug); |
2435 | if (!ret && !cycled && wbc->nr_to_write > 0) { | 2442 | if (!ret && !cycled && wbc->nr_to_write > 0) { |
2436 | cycled = 1; | 2443 | cycled = 1; |
2437 | mpd.last_page = writeback_index - 1; | 2444 | mpd.last_page = writeback_index - 1; |
2438 | mpd.first_page = 0; | 2445 | mpd.first_page = 0; |
2439 | goto retry; | 2446 | goto retry; |
2440 | } | 2447 | } |
2441 | 2448 | ||
2442 | /* Update index */ | 2449 | /* Update index */ |
2443 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 2450 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
2444 | /* | 2451 | /* |
2445 | * Set the writeback_index so that range_cyclic | 2452 | * Set the writeback_index so that range_cyclic |
2446 | * mode will write it back later | 2453 | * mode will write it back later |
2447 | */ | 2454 | */ |
2448 | mapping->writeback_index = mpd.first_page; | 2455 | mapping->writeback_index = mpd.first_page; |
2449 | 2456 | ||
2450 | out_writepages: | 2457 | out_writepages: |
2451 | trace_ext4_writepages_result(inode, wbc, ret, | 2458 | trace_ext4_writepages_result(inode, wbc, ret, |
2452 | nr_to_write - wbc->nr_to_write); | 2459 | nr_to_write - wbc->nr_to_write); |
2453 | return ret; | 2460 | return ret; |
2454 | } | 2461 | } |
2455 | 2462 | ||
2456 | static int ext4_nonda_switch(struct super_block *sb) | 2463 | static int ext4_nonda_switch(struct super_block *sb) |
2457 | { | 2464 | { |
2458 | s64 free_clusters, dirty_clusters; | 2465 | s64 free_clusters, dirty_clusters; |
2459 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2466 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2460 | 2467 | ||
2461 | /* | 2468 | /* |
2462 | * switch to non delalloc mode if we are running low | 2469 | * switch to non delalloc mode if we are running low |
2463 | * on free block. The free block accounting via percpu | 2470 | * on free block. The free block accounting via percpu |
2464 | * counters can get slightly wrong with percpu_counter_batch getting | 2471 | * counters can get slightly wrong with percpu_counter_batch getting |
2465 | * accumulated on each CPU without updating global counters | 2472 | * accumulated on each CPU without updating global counters |
2466 | * Delalloc need an accurate free block accounting. So switch | 2473 | * Delalloc need an accurate free block accounting. So switch |
2467 | * to non delalloc when we are near to error range. | 2474 | * to non delalloc when we are near to error range. |
2468 | */ | 2475 | */ |
2469 | free_clusters = | 2476 | free_clusters = |
2470 | percpu_counter_read_positive(&sbi->s_freeclusters_counter); | 2477 | percpu_counter_read_positive(&sbi->s_freeclusters_counter); |
2471 | dirty_clusters = | 2478 | dirty_clusters = |
2472 | percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); | 2479 | percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); |
2473 | /* | 2480 | /* |
2474 | * Start pushing delalloc when 1/2 of free blocks are dirty. | 2481 | * Start pushing delalloc when 1/2 of free blocks are dirty. |
2475 | */ | 2482 | */ |
2476 | if (dirty_clusters && (free_clusters < 2 * dirty_clusters)) | 2483 | if (dirty_clusters && (free_clusters < 2 * dirty_clusters)) |
2477 | try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); | 2484 | try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); |
2478 | 2485 | ||
2479 | if (2 * free_clusters < 3 * dirty_clusters || | 2486 | if (2 * free_clusters < 3 * dirty_clusters || |
2480 | free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) { | 2487 | free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) { |
2481 | /* | 2488 | /* |
2482 | * free block count is less than 150% of dirty blocks | 2489 | * free block count is less than 150% of dirty blocks |
2483 | * or free blocks is less than watermark | 2490 | * or free blocks is less than watermark |
2484 | */ | 2491 | */ |
2485 | return 1; | 2492 | return 1; |
2486 | } | 2493 | } |
2487 | return 0; | 2494 | return 0; |
2488 | } | 2495 | } |
2489 | 2496 | ||
2490 | /* We always reserve for an inode update; the superblock could be there too */ | 2497 | /* We always reserve for an inode update; the superblock could be there too */ |
2491 | static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) | 2498 | static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) |
2492 | { | 2499 | { |
2493 | if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | 2500 | if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, |
2494 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE))) | 2501 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE))) |
2495 | return 1; | 2502 | return 1; |
2496 | 2503 | ||
2497 | if (pos + len <= 0x7fffffffULL) | 2504 | if (pos + len <= 0x7fffffffULL) |
2498 | return 1; | 2505 | return 1; |
2499 | 2506 | ||
2500 | /* We might need to update the superblock to set LARGE_FILE */ | 2507 | /* We might need to update the superblock to set LARGE_FILE */ |
2501 | return 2; | 2508 | return 2; |
2502 | } | 2509 | } |
2503 | 2510 | ||
2504 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2511 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
2505 | loff_t pos, unsigned len, unsigned flags, | 2512 | loff_t pos, unsigned len, unsigned flags, |
2506 | struct page **pagep, void **fsdata) | 2513 | struct page **pagep, void **fsdata) |
2507 | { | 2514 | { |
2508 | int ret, retries = 0; | 2515 | int ret, retries = 0; |
2509 | struct page *page; | 2516 | struct page *page; |
2510 | pgoff_t index; | 2517 | pgoff_t index; |
2511 | struct inode *inode = mapping->host; | 2518 | struct inode *inode = mapping->host; |
2512 | handle_t *handle; | 2519 | handle_t *handle; |
2513 | 2520 | ||
2514 | index = pos >> PAGE_CACHE_SHIFT; | 2521 | index = pos >> PAGE_CACHE_SHIFT; |
2515 | 2522 | ||
2516 | if (ext4_nonda_switch(inode->i_sb)) { | 2523 | if (ext4_nonda_switch(inode->i_sb)) { |
2517 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | 2524 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; |
2518 | return ext4_write_begin(file, mapping, pos, | 2525 | return ext4_write_begin(file, mapping, pos, |
2519 | len, flags, pagep, fsdata); | 2526 | len, flags, pagep, fsdata); |
2520 | } | 2527 | } |
2521 | *fsdata = (void *)0; | 2528 | *fsdata = (void *)0; |
2522 | trace_ext4_da_write_begin(inode, pos, len, flags); | 2529 | trace_ext4_da_write_begin(inode, pos, len, flags); |
2523 | 2530 | ||
2524 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { | 2531 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { |
2525 | ret = ext4_da_write_inline_data_begin(mapping, inode, | 2532 | ret = ext4_da_write_inline_data_begin(mapping, inode, |
2526 | pos, len, flags, | 2533 | pos, len, flags, |
2527 | pagep, fsdata); | 2534 | pagep, fsdata); |
2528 | if (ret < 0) | 2535 | if (ret < 0) |
2529 | return ret; | 2536 | return ret; |
2530 | if (ret == 1) | 2537 | if (ret == 1) |
2531 | return 0; | 2538 | return 0; |
2532 | } | 2539 | } |
2533 | 2540 | ||
2534 | /* | 2541 | /* |
2535 | * grab_cache_page_write_begin() can take a long time if the | 2542 | * grab_cache_page_write_begin() can take a long time if the |
2536 | * system is thrashing due to memory pressure, or if the page | 2543 | * system is thrashing due to memory pressure, or if the page |
2537 | * is being written back. So grab it first before we start | 2544 | * is being written back. So grab it first before we start |
2538 | * the transaction handle. This also allows us to allocate | 2545 | * the transaction handle. This also allows us to allocate |
2539 | * the page (if needed) without using GFP_NOFS. | 2546 | * the page (if needed) without using GFP_NOFS. |
2540 | */ | 2547 | */ |
2541 | retry_grab: | 2548 | retry_grab: |
2542 | page = grab_cache_page_write_begin(mapping, index, flags); | 2549 | page = grab_cache_page_write_begin(mapping, index, flags); |
2543 | if (!page) | 2550 | if (!page) |
2544 | return -ENOMEM; | 2551 | return -ENOMEM; |
2545 | unlock_page(page); | 2552 | unlock_page(page); |
2546 | 2553 | ||
2547 | /* | 2554 | /* |
2548 | * With delayed allocation, we don't log the i_disksize update | 2555 | * With delayed allocation, we don't log the i_disksize update |
2549 | * if there is delayed block allocation. But we still need | 2556 | * if there is delayed block allocation. But we still need |
2550 | * to journalling the i_disksize update if writes to the end | 2557 | * to journalling the i_disksize update if writes to the end |
2551 | * of file which has an already mapped buffer. | 2558 | * of file which has an already mapped buffer. |
2552 | */ | 2559 | */ |
2553 | retry_journal: | 2560 | retry_journal: |
2554 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, | 2561 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, |
2555 | ext4_da_write_credits(inode, pos, len)); | 2562 | ext4_da_write_credits(inode, pos, len)); |
2556 | if (IS_ERR(handle)) { | 2563 | if (IS_ERR(handle)) { |
2557 | page_cache_release(page); | 2564 | page_cache_release(page); |
2558 | return PTR_ERR(handle); | 2565 | return PTR_ERR(handle); |
2559 | } | 2566 | } |
2560 | 2567 | ||
2561 | lock_page(page); | 2568 | lock_page(page); |
2562 | if (page->mapping != mapping) { | 2569 | if (page->mapping != mapping) { |
2563 | /* The page got truncated from under us */ | 2570 | /* The page got truncated from under us */ |
2564 | unlock_page(page); | 2571 | unlock_page(page); |
2565 | page_cache_release(page); | 2572 | page_cache_release(page); |
2566 | ext4_journal_stop(handle); | 2573 | ext4_journal_stop(handle); |
2567 | goto retry_grab; | 2574 | goto retry_grab; |
2568 | } | 2575 | } |
2569 | /* In case writeback began while the page was unlocked */ | 2576 | /* In case writeback began while the page was unlocked */ |
2570 | wait_for_stable_page(page); | 2577 | wait_for_stable_page(page); |
2571 | 2578 | ||
2572 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); | 2579 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); |
2573 | if (ret < 0) { | 2580 | if (ret < 0) { |
2574 | unlock_page(page); | 2581 | unlock_page(page); |
2575 | ext4_journal_stop(handle); | 2582 | ext4_journal_stop(handle); |
2576 | /* | 2583 | /* |
2577 | * block_write_begin may have instantiated a few blocks | 2584 | * block_write_begin may have instantiated a few blocks |
2578 | * outside i_size. Trim these off again. Don't need | 2585 | * outside i_size. Trim these off again. Don't need |
2579 | * i_size_read because we hold i_mutex. | 2586 | * i_size_read because we hold i_mutex. |
2580 | */ | 2587 | */ |
2581 | if (pos + len > inode->i_size) | 2588 | if (pos + len > inode->i_size) |
2582 | ext4_truncate_failed_write(inode); | 2589 | ext4_truncate_failed_write(inode); |
2583 | 2590 | ||
2584 | if (ret == -ENOSPC && | 2591 | if (ret == -ENOSPC && |
2585 | ext4_should_retry_alloc(inode->i_sb, &retries)) | 2592 | ext4_should_retry_alloc(inode->i_sb, &retries)) |
2586 | goto retry_journal; | 2593 | goto retry_journal; |
2587 | 2594 | ||
2588 | page_cache_release(page); | 2595 | page_cache_release(page); |
2589 | return ret; | 2596 | return ret; |
2590 | } | 2597 | } |
2591 | 2598 | ||
2592 | *pagep = page; | 2599 | *pagep = page; |
2593 | return ret; | 2600 | return ret; |
2594 | } | 2601 | } |
2595 | 2602 | ||
2596 | /* | 2603 | /* |
2597 | * Check if we should update i_disksize | 2604 | * Check if we should update i_disksize |
2598 | * when write to the end of file but not require block allocation | 2605 | * when write to the end of file but not require block allocation |
2599 | */ | 2606 | */ |
2600 | static int ext4_da_should_update_i_disksize(struct page *page, | 2607 | static int ext4_da_should_update_i_disksize(struct page *page, |
2601 | unsigned long offset) | 2608 | unsigned long offset) |
2602 | { | 2609 | { |
2603 | struct buffer_head *bh; | 2610 | struct buffer_head *bh; |
2604 | struct inode *inode = page->mapping->host; | 2611 | struct inode *inode = page->mapping->host; |
2605 | unsigned int idx; | 2612 | unsigned int idx; |
2606 | int i; | 2613 | int i; |
2607 | 2614 | ||
2608 | bh = page_buffers(page); | 2615 | bh = page_buffers(page); |
2609 | idx = offset >> inode->i_blkbits; | 2616 | idx = offset >> inode->i_blkbits; |
2610 | 2617 | ||
2611 | for (i = 0; i < idx; i++) | 2618 | for (i = 0; i < idx; i++) |
2612 | bh = bh->b_this_page; | 2619 | bh = bh->b_this_page; |
2613 | 2620 | ||
2614 | if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) | 2621 | if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) |
2615 | return 0; | 2622 | return 0; |
2616 | return 1; | 2623 | return 1; |
2617 | } | 2624 | } |
2618 | 2625 | ||
2619 | static int ext4_da_write_end(struct file *file, | 2626 | static int ext4_da_write_end(struct file *file, |
2620 | struct address_space *mapping, | 2627 | struct address_space *mapping, |
2621 | loff_t pos, unsigned len, unsigned copied, | 2628 | loff_t pos, unsigned len, unsigned copied, |
2622 | struct page *page, void *fsdata) | 2629 | struct page *page, void *fsdata) |
2623 | { | 2630 | { |
2624 | struct inode *inode = mapping->host; | 2631 | struct inode *inode = mapping->host; |
2625 | int ret = 0, ret2; | 2632 | int ret = 0, ret2; |
2626 | handle_t *handle = ext4_journal_current_handle(); | 2633 | handle_t *handle = ext4_journal_current_handle(); |
2627 | loff_t new_i_size; | 2634 | loff_t new_i_size; |
2628 | unsigned long start, end; | 2635 | unsigned long start, end; |
2629 | int write_mode = (int)(unsigned long)fsdata; | 2636 | int write_mode = (int)(unsigned long)fsdata; |
2630 | 2637 | ||
2631 | if (write_mode == FALL_BACK_TO_NONDELALLOC) | 2638 | if (write_mode == FALL_BACK_TO_NONDELALLOC) |
2632 | return ext4_write_end(file, mapping, pos, | 2639 | return ext4_write_end(file, mapping, pos, |
2633 | len, copied, page, fsdata); | 2640 | len, copied, page, fsdata); |
2634 | 2641 | ||
2635 | trace_ext4_da_write_end(inode, pos, len, copied); | 2642 | trace_ext4_da_write_end(inode, pos, len, copied); |
2636 | start = pos & (PAGE_CACHE_SIZE - 1); | 2643 | start = pos & (PAGE_CACHE_SIZE - 1); |
2637 | end = start + copied - 1; | 2644 | end = start + copied - 1; |
2638 | 2645 | ||
2639 | /* | 2646 | /* |
2640 | * generic_write_end() will run mark_inode_dirty() if i_size | 2647 | * generic_write_end() will run mark_inode_dirty() if i_size |
2641 | * changes. So let's piggyback the i_disksize mark_inode_dirty | 2648 | * changes. So let's piggyback the i_disksize mark_inode_dirty |
2642 | * into that. | 2649 | * into that. |
2643 | */ | 2650 | */ |
2644 | new_i_size = pos + copied; | 2651 | new_i_size = pos + copied; |
2645 | if (copied && new_i_size > EXT4_I(inode)->i_disksize) { | 2652 | if (copied && new_i_size > EXT4_I(inode)->i_disksize) { |
2646 | if (ext4_has_inline_data(inode) || | 2653 | if (ext4_has_inline_data(inode) || |
2647 | ext4_da_should_update_i_disksize(page, end)) { | 2654 | ext4_da_should_update_i_disksize(page, end)) { |
2648 | ext4_update_i_disksize(inode, new_i_size); | 2655 | ext4_update_i_disksize(inode, new_i_size); |
2649 | /* We need to mark inode dirty even if | 2656 | /* We need to mark inode dirty even if |
2650 | * new_i_size is less that inode->i_size | 2657 | * new_i_size is less that inode->i_size |
2651 | * bu greater than i_disksize.(hint delalloc) | 2658 | * bu greater than i_disksize.(hint delalloc) |
2652 | */ | 2659 | */ |
2653 | ext4_mark_inode_dirty(handle, inode); | 2660 | ext4_mark_inode_dirty(handle, inode); |
2654 | } | 2661 | } |
2655 | } | 2662 | } |
2656 | 2663 | ||
2657 | if (write_mode != CONVERT_INLINE_DATA && | 2664 | if (write_mode != CONVERT_INLINE_DATA && |
2658 | ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && | 2665 | ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && |
2659 | ext4_has_inline_data(inode)) | 2666 | ext4_has_inline_data(inode)) |
2660 | ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied, | 2667 | ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied, |
2661 | page); | 2668 | page); |
2662 | else | 2669 | else |
2663 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2670 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
2664 | page, fsdata); | 2671 | page, fsdata); |
2665 | 2672 | ||
2666 | copied = ret2; | 2673 | copied = ret2; |
2667 | if (ret2 < 0) | 2674 | if (ret2 < 0) |
2668 | ret = ret2; | 2675 | ret = ret2; |
2669 | ret2 = ext4_journal_stop(handle); | 2676 | ret2 = ext4_journal_stop(handle); |
2670 | if (!ret) | 2677 | if (!ret) |
2671 | ret = ret2; | 2678 | ret = ret2; |
2672 | 2679 | ||
2673 | return ret ? ret : copied; | 2680 | return ret ? ret : copied; |
2674 | } | 2681 | } |
2675 | 2682 | ||
2676 | static void ext4_da_invalidatepage(struct page *page, unsigned int offset, | 2683 | static void ext4_da_invalidatepage(struct page *page, unsigned int offset, |
2677 | unsigned int length) | 2684 | unsigned int length) |
2678 | { | 2685 | { |
2679 | /* | 2686 | /* |
2680 | * Drop reserved blocks | 2687 | * Drop reserved blocks |
2681 | */ | 2688 | */ |
2682 | BUG_ON(!PageLocked(page)); | 2689 | BUG_ON(!PageLocked(page)); |
2683 | if (!page_has_buffers(page)) | 2690 | if (!page_has_buffers(page)) |
2684 | goto out; | 2691 | goto out; |
2685 | 2692 | ||
2686 | ext4_da_page_release_reservation(page, offset, length); | 2693 | ext4_da_page_release_reservation(page, offset, length); |
2687 | 2694 | ||
2688 | out: | 2695 | out: |
2689 | ext4_invalidatepage(page, offset, length); | 2696 | ext4_invalidatepage(page, offset, length); |
2690 | 2697 | ||
2691 | return; | 2698 | return; |
2692 | } | 2699 | } |
2693 | 2700 | ||
2694 | /* | 2701 | /* |
2695 | * Force all delayed allocation blocks to be allocated for a given inode. | 2702 | * Force all delayed allocation blocks to be allocated for a given inode. |
2696 | */ | 2703 | */ |
2697 | int ext4_alloc_da_blocks(struct inode *inode) | 2704 | int ext4_alloc_da_blocks(struct inode *inode) |
2698 | { | 2705 | { |
2699 | trace_ext4_alloc_da_blocks(inode); | 2706 | trace_ext4_alloc_da_blocks(inode); |
2700 | 2707 | ||
2701 | if (!EXT4_I(inode)->i_reserved_data_blocks) | 2708 | if (!EXT4_I(inode)->i_reserved_data_blocks) |
2702 | return 0; | 2709 | return 0; |
2703 | 2710 | ||
2704 | /* | 2711 | /* |
2705 | * We do something simple for now. The filemap_flush() will | 2712 | * We do something simple for now. The filemap_flush() will |
2706 | * also start triggering a write of the data blocks, which is | 2713 | * also start triggering a write of the data blocks, which is |
2707 | * not strictly speaking necessary (and for users of | 2714 | * not strictly speaking necessary (and for users of |
2708 | * laptop_mode, not even desirable). However, to do otherwise | 2715 | * laptop_mode, not even desirable). However, to do otherwise |
2709 | * would require replicating code paths in: | 2716 | * would require replicating code paths in: |
2710 | * | 2717 | * |
2711 | * ext4_writepages() -> | 2718 | * ext4_writepages() -> |
2712 | * write_cache_pages() ---> (via passed in callback function) | 2719 | * write_cache_pages() ---> (via passed in callback function) |
2713 | * __mpage_da_writepage() --> | 2720 | * __mpage_da_writepage() --> |
2714 | * mpage_add_bh_to_extent() | 2721 | * mpage_add_bh_to_extent() |
2715 | * mpage_da_map_blocks() | 2722 | * mpage_da_map_blocks() |
2716 | * | 2723 | * |
2717 | * The problem is that write_cache_pages(), located in | 2724 | * The problem is that write_cache_pages(), located in |
2718 | * mm/page-writeback.c, marks pages clean in preparation for | 2725 | * mm/page-writeback.c, marks pages clean in preparation for |
2719 | * doing I/O, which is not desirable if we're not planning on | 2726 | * doing I/O, which is not desirable if we're not planning on |
2720 | * doing I/O at all. | 2727 | * doing I/O at all. |
2721 | * | 2728 | * |
2722 | * We could call write_cache_pages(), and then redirty all of | 2729 | * We could call write_cache_pages(), and then redirty all of |
2723 | * the pages by calling redirty_page_for_writepage() but that | 2730 | * the pages by calling redirty_page_for_writepage() but that |
2724 | * would be ugly in the extreme. So instead we would need to | 2731 | * would be ugly in the extreme. So instead we would need to |
2725 | * replicate parts of the code in the above functions, | 2732 | * replicate parts of the code in the above functions, |
2726 | * simplifying them because we wouldn't actually intend to | 2733 | * simplifying them because we wouldn't actually intend to |
2727 | * write out the pages, but rather only collect contiguous | 2734 | * write out the pages, but rather only collect contiguous |
2728 | * logical block extents, call the multi-block allocator, and | 2735 | * logical block extents, call the multi-block allocator, and |
2729 | * then update the buffer heads with the block allocations. | 2736 | * then update the buffer heads with the block allocations. |
2730 | * | 2737 | * |
2731 | * For now, though, we'll cheat by calling filemap_flush(), | 2738 | * For now, though, we'll cheat by calling filemap_flush(), |
2732 | * which will map the blocks, and start the I/O, but not | 2739 | * which will map the blocks, and start the I/O, but not |
2733 | * actually wait for the I/O to complete. | 2740 | * actually wait for the I/O to complete. |
2734 | */ | 2741 | */ |
2735 | return filemap_flush(inode->i_mapping); | 2742 | return filemap_flush(inode->i_mapping); |
2736 | } | 2743 | } |
2737 | 2744 | ||
2738 | /* | 2745 | /* |
2739 | * bmap() is special. It gets used by applications such as lilo and by | 2746 | * bmap() is special. It gets used by applications such as lilo and by |
2740 | * the swapper to find the on-disk block of a specific piece of data. | 2747 | * the swapper to find the on-disk block of a specific piece of data. |
2741 | * | 2748 | * |
2742 | * Naturally, this is dangerous if the block concerned is still in the | 2749 | * Naturally, this is dangerous if the block concerned is still in the |
2743 | * journal. If somebody makes a swapfile on an ext4 data-journaling | 2750 | * journal. If somebody makes a swapfile on an ext4 data-journaling |
2744 | * filesystem and enables swap, then they may get a nasty shock when the | 2751 | * filesystem and enables swap, then they may get a nasty shock when the |
2745 | * data getting swapped to that swapfile suddenly gets overwritten by | 2752 | * data getting swapped to that swapfile suddenly gets overwritten by |
2746 | * the original zero's written out previously to the journal and | 2753 | * the original zero's written out previously to the journal and |
2747 | * awaiting writeback in the kernel's buffer cache. | 2754 | * awaiting writeback in the kernel's buffer cache. |
2748 | * | 2755 | * |
2749 | * So, if we see any bmap calls here on a modified, data-journaled file, | 2756 | * So, if we see any bmap calls here on a modified, data-journaled file, |
2750 | * take extra steps to flush any blocks which might be in the cache. | 2757 | * take extra steps to flush any blocks which might be in the cache. |
2751 | */ | 2758 | */ |
2752 | static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | 2759 | static sector_t ext4_bmap(struct address_space *mapping, sector_t block) |
2753 | { | 2760 | { |
2754 | struct inode *inode = mapping->host; | 2761 | struct inode *inode = mapping->host; |
2755 | journal_t *journal; | 2762 | journal_t *journal; |
2756 | int err; | 2763 | int err; |
2757 | 2764 | ||
2758 | /* | 2765 | /* |
2759 | * We can get here for an inline file via the FIBMAP ioctl | 2766 | * We can get here for an inline file via the FIBMAP ioctl |
2760 | */ | 2767 | */ |
2761 | if (ext4_has_inline_data(inode)) | 2768 | if (ext4_has_inline_data(inode)) |
2762 | return 0; | 2769 | return 0; |
2763 | 2770 | ||
2764 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && | 2771 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && |
2765 | test_opt(inode->i_sb, DELALLOC)) { | 2772 | test_opt(inode->i_sb, DELALLOC)) { |
2766 | /* | 2773 | /* |
2767 | * With delalloc we want to sync the file | 2774 | * With delalloc we want to sync the file |
2768 | * so that we can make sure we allocate | 2775 | * so that we can make sure we allocate |
2769 | * blocks for file | 2776 | * blocks for file |
2770 | */ | 2777 | */ |
2771 | filemap_write_and_wait(mapping); | 2778 | filemap_write_and_wait(mapping); |
2772 | } | 2779 | } |
2773 | 2780 | ||
2774 | if (EXT4_JOURNAL(inode) && | 2781 | if (EXT4_JOURNAL(inode) && |
2775 | ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { | 2782 | ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { |
2776 | /* | 2783 | /* |
2777 | * This is a REALLY heavyweight approach, but the use of | 2784 | * This is a REALLY heavyweight approach, but the use of |
2778 | * bmap on dirty files is expected to be extremely rare: | 2785 | * bmap on dirty files is expected to be extremely rare: |
2779 | * only if we run lilo or swapon on a freshly made file | 2786 | * only if we run lilo or swapon on a freshly made file |
2780 | * do we expect this to happen. | 2787 | * do we expect this to happen. |
2781 | * | 2788 | * |
2782 | * (bmap requires CAP_SYS_RAWIO so this does not | 2789 | * (bmap requires CAP_SYS_RAWIO so this does not |
2783 | * represent an unprivileged user DOS attack --- we'd be | 2790 | * represent an unprivileged user DOS attack --- we'd be |
2784 | * in trouble if mortal users could trigger this path at | 2791 | * in trouble if mortal users could trigger this path at |
2785 | * will.) | 2792 | * will.) |
2786 | * | 2793 | * |
2787 | * NB. EXT4_STATE_JDATA is not set on files other than | 2794 | * NB. EXT4_STATE_JDATA is not set on files other than |
2788 | * regular files. If somebody wants to bmap a directory | 2795 | * regular files. If somebody wants to bmap a directory |
2789 | * or symlink and gets confused because the buffer | 2796 | * or symlink and gets confused because the buffer |
2790 | * hasn't yet been flushed to disk, they deserve | 2797 | * hasn't yet been flushed to disk, they deserve |
2791 | * everything they get. | 2798 | * everything they get. |
2792 | */ | 2799 | */ |
2793 | 2800 | ||
2794 | ext4_clear_inode_state(inode, EXT4_STATE_JDATA); | 2801 | ext4_clear_inode_state(inode, EXT4_STATE_JDATA); |
2795 | journal = EXT4_JOURNAL(inode); | 2802 | journal = EXT4_JOURNAL(inode); |
2796 | jbd2_journal_lock_updates(journal); | 2803 | jbd2_journal_lock_updates(journal); |
2797 | err = jbd2_journal_flush(journal); | 2804 | err = jbd2_journal_flush(journal); |
2798 | jbd2_journal_unlock_updates(journal); | 2805 | jbd2_journal_unlock_updates(journal); |
2799 | 2806 | ||
2800 | if (err) | 2807 | if (err) |
2801 | return 0; | 2808 | return 0; |
2802 | } | 2809 | } |
2803 | 2810 | ||
2804 | return generic_block_bmap(mapping, block, ext4_get_block); | 2811 | return generic_block_bmap(mapping, block, ext4_get_block); |
2805 | } | 2812 | } |
2806 | 2813 | ||
2807 | static int ext4_readpage(struct file *file, struct page *page) | 2814 | static int ext4_readpage(struct file *file, struct page *page) |
2808 | { | 2815 | { |
2809 | int ret = -EAGAIN; | 2816 | int ret = -EAGAIN; |
2810 | struct inode *inode = page->mapping->host; | 2817 | struct inode *inode = page->mapping->host; |
2811 | 2818 | ||
2812 | trace_ext4_readpage(page); | 2819 | trace_ext4_readpage(page); |
2813 | 2820 | ||
2814 | if (ext4_has_inline_data(inode)) | 2821 | if (ext4_has_inline_data(inode)) |
2815 | ret = ext4_readpage_inline(inode, page); | 2822 | ret = ext4_readpage_inline(inode, page); |
2816 | 2823 | ||
2817 | if (ret == -EAGAIN) | 2824 | if (ret == -EAGAIN) |
2818 | return mpage_readpage(page, ext4_get_block); | 2825 | return mpage_readpage(page, ext4_get_block); |
2819 | 2826 | ||
2820 | return ret; | 2827 | return ret; |
2821 | } | 2828 | } |
2822 | 2829 | ||
2823 | static int | 2830 | static int |
2824 | ext4_readpages(struct file *file, struct address_space *mapping, | 2831 | ext4_readpages(struct file *file, struct address_space *mapping, |
2825 | struct list_head *pages, unsigned nr_pages) | 2832 | struct list_head *pages, unsigned nr_pages) |
2826 | { | 2833 | { |
2827 | struct inode *inode = mapping->host; | 2834 | struct inode *inode = mapping->host; |
2828 | 2835 | ||
2829 | /* If the file has inline data, no need to do readpages. */ | 2836 | /* If the file has inline data, no need to do readpages. */ |
2830 | if (ext4_has_inline_data(inode)) | 2837 | if (ext4_has_inline_data(inode)) |
2831 | return 0; | 2838 | return 0; |
2832 | 2839 | ||
2833 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 2840 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
2834 | } | 2841 | } |
2835 | 2842 | ||
2836 | static void ext4_invalidatepage(struct page *page, unsigned int offset, | 2843 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
2837 | unsigned int length) | 2844 | unsigned int length) |
2838 | { | 2845 | { |
2839 | trace_ext4_invalidatepage(page, offset, length); | 2846 | trace_ext4_invalidatepage(page, offset, length); |
2840 | 2847 | ||
2841 | /* No journalling happens on data buffers when this function is used */ | 2848 | /* No journalling happens on data buffers when this function is used */ |
2842 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); | 2849 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); |
2843 | 2850 | ||
2844 | block_invalidatepage(page, offset, length); | 2851 | block_invalidatepage(page, offset, length); |
2845 | } | 2852 | } |
2846 | 2853 | ||
2847 | static int __ext4_journalled_invalidatepage(struct page *page, | 2854 | static int __ext4_journalled_invalidatepage(struct page *page, |
2848 | unsigned int offset, | 2855 | unsigned int offset, |
2849 | unsigned int length) | 2856 | unsigned int length) |
2850 | { | 2857 | { |
2851 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 2858 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
2852 | 2859 | ||
2853 | trace_ext4_journalled_invalidatepage(page, offset, length); | 2860 | trace_ext4_journalled_invalidatepage(page, offset, length); |
2854 | 2861 | ||
2855 | /* | 2862 | /* |
2856 | * If it's a full truncate we just forget about the pending dirtying | 2863 | * If it's a full truncate we just forget about the pending dirtying |
2857 | */ | 2864 | */ |
2858 | if (offset == 0 && length == PAGE_CACHE_SIZE) | 2865 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
2859 | ClearPageChecked(page); | 2866 | ClearPageChecked(page); |
2860 | 2867 | ||
2861 | return jbd2_journal_invalidatepage(journal, page, offset, length); | 2868 | return jbd2_journal_invalidatepage(journal, page, offset, length); |
2862 | } | 2869 | } |
2863 | 2870 | ||
2864 | /* Wrapper for aops... */ | 2871 | /* Wrapper for aops... */ |
2865 | static void ext4_journalled_invalidatepage(struct page *page, | 2872 | static void ext4_journalled_invalidatepage(struct page *page, |
2866 | unsigned int offset, | 2873 | unsigned int offset, |
2867 | unsigned int length) | 2874 | unsigned int length) |
2868 | { | 2875 | { |
2869 | WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); | 2876 | WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); |
2870 | } | 2877 | } |
2871 | 2878 | ||
2872 | static int ext4_releasepage(struct page *page, gfp_t wait) | 2879 | static int ext4_releasepage(struct page *page, gfp_t wait) |
2873 | { | 2880 | { |
2874 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 2881 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
2875 | 2882 | ||
2876 | trace_ext4_releasepage(page); | 2883 | trace_ext4_releasepage(page); |
2877 | 2884 | ||
2878 | /* Page has dirty journalled data -> cannot release */ | 2885 | /* Page has dirty journalled data -> cannot release */ |
2879 | if (PageChecked(page)) | 2886 | if (PageChecked(page)) |
2880 | return 0; | 2887 | return 0; |
2881 | if (journal) | 2888 | if (journal) |
2882 | return jbd2_journal_try_to_free_buffers(journal, page, wait); | 2889 | return jbd2_journal_try_to_free_buffers(journal, page, wait); |
2883 | else | 2890 | else |
2884 | return try_to_free_buffers(page); | 2891 | return try_to_free_buffers(page); |
2885 | } | 2892 | } |
2886 | 2893 | ||
2887 | /* | 2894 | /* |
2888 | * ext4_get_block used when preparing for a DIO write or buffer write. | 2895 | * ext4_get_block used when preparing for a DIO write or buffer write. |
2889 | * We allocate an uinitialized extent if blocks haven't been allocated. | 2896 | * We allocate an uinitialized extent if blocks haven't been allocated. |
2890 | * The extent will be converted to initialized after the IO is complete. | 2897 | * The extent will be converted to initialized after the IO is complete. |
2891 | */ | 2898 | */ |
2892 | int ext4_get_block_write(struct inode *inode, sector_t iblock, | 2899 | int ext4_get_block_write(struct inode *inode, sector_t iblock, |
2893 | struct buffer_head *bh_result, int create) | 2900 | struct buffer_head *bh_result, int create) |
2894 | { | 2901 | { |
2895 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", | 2902 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
2896 | inode->i_ino, create); | 2903 | inode->i_ino, create); |
2897 | return _ext4_get_block(inode, iblock, bh_result, | 2904 | return _ext4_get_block(inode, iblock, bh_result, |
2898 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 2905 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
2899 | } | 2906 | } |
2900 | 2907 | ||
2901 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, | 2908 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, |
2902 | struct buffer_head *bh_result, int create) | 2909 | struct buffer_head *bh_result, int create) |
2903 | { | 2910 | { |
2904 | ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n", | 2911 | ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n", |
2905 | inode->i_ino, create); | 2912 | inode->i_ino, create); |
2906 | return _ext4_get_block(inode, iblock, bh_result, | 2913 | return _ext4_get_block(inode, iblock, bh_result, |
2907 | EXT4_GET_BLOCKS_NO_LOCK); | 2914 | EXT4_GET_BLOCKS_NO_LOCK); |
2908 | } | 2915 | } |
2909 | 2916 | ||
2910 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 2917 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
2911 | ssize_t size, void *private) | 2918 | ssize_t size, void *private) |
2912 | { | 2919 | { |
2913 | ext4_io_end_t *io_end = iocb->private; | 2920 | ext4_io_end_t *io_end = iocb->private; |
2914 | 2921 | ||
2915 | /* if not async direct IO just return */ | 2922 | /* if not async direct IO just return */ |
2916 | if (!io_end) | 2923 | if (!io_end) |
2917 | return; | 2924 | return; |
2918 | 2925 | ||
2919 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 2926 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
2920 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", | 2927 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
2921 | iocb->private, io_end->inode->i_ino, iocb, offset, | 2928 | iocb->private, io_end->inode->i_ino, iocb, offset, |
2922 | size); | 2929 | size); |
2923 | 2930 | ||
2924 | iocb->private = NULL; | 2931 | iocb->private = NULL; |
2925 | io_end->offset = offset; | 2932 | io_end->offset = offset; |
2926 | io_end->size = size; | 2933 | io_end->size = size; |
2927 | ext4_put_io_end(io_end); | 2934 | ext4_put_io_end(io_end); |
2928 | } | 2935 | } |
2929 | 2936 | ||
2930 | /* | 2937 | /* |
2931 | * For ext4 extent files, ext4 will do direct-io write to holes, | 2938 | * For ext4 extent files, ext4 will do direct-io write to holes, |
2932 | * preallocated extents, and those write extend the file, no need to | 2939 | * preallocated extents, and those write extend the file, no need to |
2933 | * fall back to buffered IO. | 2940 | * fall back to buffered IO. |
2934 | * | 2941 | * |
2935 | * For holes, we fallocate those blocks, mark them as unwritten | 2942 | * For holes, we fallocate those blocks, mark them as unwritten |
2936 | * If those blocks were preallocated, we mark sure they are split, but | 2943 | * If those blocks were preallocated, we mark sure they are split, but |
2937 | * still keep the range to write as unwritten. | 2944 | * still keep the range to write as unwritten. |
2938 | * | 2945 | * |
2939 | * The unwritten extents will be converted to written when DIO is completed. | 2946 | * The unwritten extents will be converted to written when DIO is completed. |
2940 | * For async direct IO, since the IO may still pending when return, we | 2947 | * For async direct IO, since the IO may still pending when return, we |
2941 | * set up an end_io call back function, which will do the conversion | 2948 | * set up an end_io call back function, which will do the conversion |
2942 | * when async direct IO completed. | 2949 | * when async direct IO completed. |
2943 | * | 2950 | * |
2944 | * If the O_DIRECT write will extend the file then add this inode to the | 2951 | * If the O_DIRECT write will extend the file then add this inode to the |
2945 | * orphan list. So recovery will truncate it back to the original size | 2952 | * orphan list. So recovery will truncate it back to the original size |
2946 | * if the machine crashes during the write. | 2953 | * if the machine crashes during the write. |
2947 | * | 2954 | * |
2948 | */ | 2955 | */ |
2949 | static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | 2956 | static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, |
2950 | struct iov_iter *iter, loff_t offset) | 2957 | struct iov_iter *iter, loff_t offset) |
2951 | { | 2958 | { |
2952 | struct file *file = iocb->ki_filp; | 2959 | struct file *file = iocb->ki_filp; |
2953 | struct inode *inode = file->f_mapping->host; | 2960 | struct inode *inode = file->f_mapping->host; |
2954 | ssize_t ret; | 2961 | ssize_t ret; |
2955 | size_t count = iov_iter_count(iter); | 2962 | size_t count = iov_iter_count(iter); |
2956 | int overwrite = 0; | 2963 | int overwrite = 0; |
2957 | get_block_t *get_block_func = NULL; | 2964 | get_block_t *get_block_func = NULL; |
2958 | int dio_flags = 0; | 2965 | int dio_flags = 0; |
2959 | loff_t final_size = offset + count; | 2966 | loff_t final_size = offset + count; |
2960 | ext4_io_end_t *io_end = NULL; | 2967 | ext4_io_end_t *io_end = NULL; |
2961 | 2968 | ||
2962 | /* Use the old path for reads and writes beyond i_size. */ | 2969 | /* Use the old path for reads and writes beyond i_size. */ |
2963 | if (rw != WRITE || final_size > inode->i_size) | 2970 | if (rw != WRITE || final_size > inode->i_size) |
2964 | return ext4_ind_direct_IO(rw, iocb, iter, offset); | 2971 | return ext4_ind_direct_IO(rw, iocb, iter, offset); |
2965 | 2972 | ||
2966 | BUG_ON(iocb->private == NULL); | 2973 | BUG_ON(iocb->private == NULL); |
2967 | 2974 | ||
2968 | /* | 2975 | /* |
2969 | * Make all waiters for direct IO properly wait also for extent | 2976 | * Make all waiters for direct IO properly wait also for extent |
2970 | * conversion. This also disallows race between truncate() and | 2977 | * conversion. This also disallows race between truncate() and |
2971 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. | 2978 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. |
2972 | */ | 2979 | */ |
2973 | if (rw == WRITE) | 2980 | if (rw == WRITE) |
2974 | atomic_inc(&inode->i_dio_count); | 2981 | atomic_inc(&inode->i_dio_count); |
2975 | 2982 | ||
2976 | /* If we do a overwrite dio, i_mutex locking can be released */ | 2983 | /* If we do a overwrite dio, i_mutex locking can be released */ |
2977 | overwrite = *((int *)iocb->private); | 2984 | overwrite = *((int *)iocb->private); |
2978 | 2985 | ||
2979 | if (overwrite) { | 2986 | if (overwrite) { |
2980 | down_read(&EXT4_I(inode)->i_data_sem); | 2987 | down_read(&EXT4_I(inode)->i_data_sem); |
2981 | mutex_unlock(&inode->i_mutex); | 2988 | mutex_unlock(&inode->i_mutex); |
2982 | } | 2989 | } |
2983 | 2990 | ||
2984 | /* | 2991 | /* |
2985 | * We could direct write to holes and fallocate. | 2992 | * We could direct write to holes and fallocate. |
2986 | * | 2993 | * |
2987 | * Allocated blocks to fill the hole are marked as | 2994 | * Allocated blocks to fill the hole are marked as |
2988 | * unwritten to prevent parallel buffered read to expose | 2995 | * unwritten to prevent parallel buffered read to expose |
2989 | * the stale data before DIO complete the data IO. | 2996 | * the stale data before DIO complete the data IO. |
2990 | * | 2997 | * |
2991 | * As to previously fallocated extents, ext4 get_block will | 2998 | * As to previously fallocated extents, ext4 get_block will |
2992 | * just simply mark the buffer mapped but still keep the | 2999 | * just simply mark the buffer mapped but still keep the |
2993 | * extents unwritten. | 3000 | * extents unwritten. |
2994 | * | 3001 | * |
2995 | * For non AIO case, we will convert those unwritten extents | 3002 | * For non AIO case, we will convert those unwritten extents |
2996 | * to written after return back from blockdev_direct_IO. | 3003 | * to written after return back from blockdev_direct_IO. |
2997 | * | 3004 | * |
2998 | * For async DIO, the conversion needs to be deferred when the | 3005 | * For async DIO, the conversion needs to be deferred when the |
2999 | * IO is completed. The ext4 end_io callback function will be | 3006 | * IO is completed. The ext4 end_io callback function will be |
3000 | * called to take care of the conversion work. Here for async | 3007 | * called to take care of the conversion work. Here for async |
3001 | * case, we allocate an io_end structure to hook to the iocb. | 3008 | * case, we allocate an io_end structure to hook to the iocb. |
3002 | */ | 3009 | */ |
3003 | iocb->private = NULL; | 3010 | iocb->private = NULL; |
3004 | ext4_inode_aio_set(inode, NULL); | 3011 | ext4_inode_aio_set(inode, NULL); |
3005 | if (!is_sync_kiocb(iocb)) { | 3012 | if (!is_sync_kiocb(iocb)) { |
3006 | io_end = ext4_init_io_end(inode, GFP_NOFS); | 3013 | io_end = ext4_init_io_end(inode, GFP_NOFS); |
3007 | if (!io_end) { | 3014 | if (!io_end) { |
3008 | ret = -ENOMEM; | 3015 | ret = -ENOMEM; |
3009 | goto retake_lock; | 3016 | goto retake_lock; |
3010 | } | 3017 | } |
3011 | /* | 3018 | /* |
3012 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() | 3019 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() |
3013 | */ | 3020 | */ |
3014 | iocb->private = ext4_get_io_end(io_end); | 3021 | iocb->private = ext4_get_io_end(io_end); |
3015 | /* | 3022 | /* |
3016 | * we save the io structure for current async direct | 3023 | * we save the io structure for current async direct |
3017 | * IO, so that later ext4_map_blocks() could flag the | 3024 | * IO, so that later ext4_map_blocks() could flag the |
3018 | * io structure whether there is a unwritten extents | 3025 | * io structure whether there is a unwritten extents |
3019 | * needs to be converted when IO is completed. | 3026 | * needs to be converted when IO is completed. |
3020 | */ | 3027 | */ |
3021 | ext4_inode_aio_set(inode, io_end); | 3028 | ext4_inode_aio_set(inode, io_end); |
3022 | } | 3029 | } |
3023 | 3030 | ||
3024 | if (overwrite) { | 3031 | if (overwrite) { |
3025 | get_block_func = ext4_get_block_write_nolock; | 3032 | get_block_func = ext4_get_block_write_nolock; |
3026 | } else { | 3033 | } else { |
3027 | get_block_func = ext4_get_block_write; | 3034 | get_block_func = ext4_get_block_write; |
3028 | dio_flags = DIO_LOCKING; | 3035 | dio_flags = DIO_LOCKING; |
3029 | } | 3036 | } |
3030 | if (IS_DAX(inode)) | 3037 | if (IS_DAX(inode)) |
3031 | ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, | 3038 | ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, |
3032 | ext4_end_io_dio, dio_flags); | 3039 | ext4_end_io_dio, dio_flags); |
3033 | else | 3040 | else |
3034 | ret = __blockdev_direct_IO(rw, iocb, inode, | 3041 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3035 | inode->i_sb->s_bdev, iter, offset, | 3042 | inode->i_sb->s_bdev, iter, offset, |
3036 | get_block_func, | 3043 | get_block_func, |
3037 | ext4_end_io_dio, NULL, dio_flags); | 3044 | ext4_end_io_dio, NULL, dio_flags); |
3038 | 3045 | ||
3039 | /* | 3046 | /* |
3040 | * Put our reference to io_end. This can free the io_end structure e.g. | 3047 | * Put our reference to io_end. This can free the io_end structure e.g. |
3041 | * in sync IO case or in case of error. It can even perform extent | 3048 | * in sync IO case or in case of error. It can even perform extent |
3042 | * conversion if all bios we submitted finished before we got here. | 3049 | * conversion if all bios we submitted finished before we got here. |
3043 | * Note that in that case iocb->private can be already set to NULL | 3050 | * Note that in that case iocb->private can be already set to NULL |
3044 | * here. | 3051 | * here. |
3045 | */ | 3052 | */ |
3046 | if (io_end) { | 3053 | if (io_end) { |
3047 | ext4_inode_aio_set(inode, NULL); | 3054 | ext4_inode_aio_set(inode, NULL); |
3048 | ext4_put_io_end(io_end); | 3055 | ext4_put_io_end(io_end); |
3049 | /* | 3056 | /* |
3050 | * When no IO was submitted ext4_end_io_dio() was not | 3057 | * When no IO was submitted ext4_end_io_dio() was not |
3051 | * called so we have to put iocb's reference. | 3058 | * called so we have to put iocb's reference. |
3052 | */ | 3059 | */ |
3053 | if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { | 3060 | if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { |
3054 | WARN_ON(iocb->private != io_end); | 3061 | WARN_ON(iocb->private != io_end); |
3055 | WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | 3062 | WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); |
3056 | ext4_put_io_end(io_end); | 3063 | ext4_put_io_end(io_end); |
3057 | iocb->private = NULL; | 3064 | iocb->private = NULL; |
3058 | } | 3065 | } |
3059 | } | 3066 | } |
3060 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | 3067 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, |
3061 | EXT4_STATE_DIO_UNWRITTEN)) { | 3068 | EXT4_STATE_DIO_UNWRITTEN)) { |
3062 | int err; | 3069 | int err; |
3063 | /* | 3070 | /* |
3064 | * for non AIO case, since the IO is already | 3071 | * for non AIO case, since the IO is already |
3065 | * completed, we could do the conversion right here | 3072 | * completed, we could do the conversion right here |
3066 | */ | 3073 | */ |
3067 | err = ext4_convert_unwritten_extents(NULL, inode, | 3074 | err = ext4_convert_unwritten_extents(NULL, inode, |
3068 | offset, ret); | 3075 | offset, ret); |
3069 | if (err < 0) | 3076 | if (err < 0) |
3070 | ret = err; | 3077 | ret = err; |
3071 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3078 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3072 | } | 3079 | } |
3073 | 3080 | ||
3074 | retake_lock: | 3081 | retake_lock: |
3075 | if (rw == WRITE) | 3082 | if (rw == WRITE) |
3076 | inode_dio_done(inode); | 3083 | inode_dio_done(inode); |
3077 | /* take i_mutex locking again if we do a ovewrite dio */ | 3084 | /* take i_mutex locking again if we do a ovewrite dio */ |
3078 | if (overwrite) { | 3085 | if (overwrite) { |
3079 | up_read(&EXT4_I(inode)->i_data_sem); | 3086 | up_read(&EXT4_I(inode)->i_data_sem); |
3080 | mutex_lock(&inode->i_mutex); | 3087 | mutex_lock(&inode->i_mutex); |
3081 | } | 3088 | } |
3082 | 3089 | ||
3083 | return ret; | 3090 | return ret; |
3084 | } | 3091 | } |
3085 | 3092 | ||
3086 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | 3093 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, |
3087 | struct iov_iter *iter, loff_t offset) | 3094 | struct iov_iter *iter, loff_t offset) |
3088 | { | 3095 | { |
3089 | struct file *file = iocb->ki_filp; | 3096 | struct file *file = iocb->ki_filp; |
3090 | struct inode *inode = file->f_mapping->host; | 3097 | struct inode *inode = file->f_mapping->host; |
3091 | size_t count = iov_iter_count(iter); | 3098 | size_t count = iov_iter_count(iter); |
3092 | ssize_t ret; | 3099 | ssize_t ret; |
3093 | 3100 | ||
3094 | /* | 3101 | /* |
3095 | * If we are doing data journalling we don't support O_DIRECT | 3102 | * If we are doing data journalling we don't support O_DIRECT |
3096 | */ | 3103 | */ |
3097 | if (ext4_should_journal_data(inode)) | 3104 | if (ext4_should_journal_data(inode)) |
3098 | return 0; | 3105 | return 0; |
3099 | 3106 | ||
3100 | /* Let buffer I/O handle the inline data case. */ | 3107 | /* Let buffer I/O handle the inline data case. */ |
3101 | if (ext4_has_inline_data(inode)) | 3108 | if (ext4_has_inline_data(inode)) |
3102 | return 0; | 3109 | return 0; |
3103 | 3110 | ||
3104 | trace_ext4_direct_IO_enter(inode, offset, count, rw); | 3111 | trace_ext4_direct_IO_enter(inode, offset, count, rw); |
3105 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3112 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3106 | ret = ext4_ext_direct_IO(rw, iocb, iter, offset); | 3113 | ret = ext4_ext_direct_IO(rw, iocb, iter, offset); |
3107 | else | 3114 | else |
3108 | ret = ext4_ind_direct_IO(rw, iocb, iter, offset); | 3115 | ret = ext4_ind_direct_IO(rw, iocb, iter, offset); |
3109 | trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); | 3116 | trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); |
3110 | return ret; | 3117 | return ret; |
3111 | } | 3118 | } |
3112 | 3119 | ||
3113 | /* | 3120 | /* |
3114 | * Pages can be marked dirty completely asynchronously from ext4's journalling | 3121 | * Pages can be marked dirty completely asynchronously from ext4's journalling |
3115 | * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do | 3122 | * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do |
3116 | * much here because ->set_page_dirty is called under VFS locks. The page is | 3123 | * much here because ->set_page_dirty is called under VFS locks. The page is |
3117 | * not necessarily locked. | 3124 | * not necessarily locked. |
3118 | * | 3125 | * |
3119 | * We cannot just dirty the page and leave attached buffers clean, because the | 3126 | * We cannot just dirty the page and leave attached buffers clean, because the |
3120 | * buffers' dirty state is "definitive". We cannot just set the buffers dirty | 3127 | * buffers' dirty state is "definitive". We cannot just set the buffers dirty |
3121 | * or jbddirty because all the journalling code will explode. | 3128 | * or jbddirty because all the journalling code will explode. |
3122 | * | 3129 | * |
3123 | * So what we do is to mark the page "pending dirty" and next time writepage | 3130 | * So what we do is to mark the page "pending dirty" and next time writepage |
3124 | * is called, propagate that into the buffers appropriately. | 3131 | * is called, propagate that into the buffers appropriately. |
3125 | */ | 3132 | */ |
3126 | static int ext4_journalled_set_page_dirty(struct page *page) | 3133 | static int ext4_journalled_set_page_dirty(struct page *page) |
3127 | { | 3134 | { |
3128 | SetPageChecked(page); | 3135 | SetPageChecked(page); |
3129 | return __set_page_dirty_nobuffers(page); | 3136 | return __set_page_dirty_nobuffers(page); |
3130 | } | 3137 | } |
3131 | 3138 | ||
3132 | static const struct address_space_operations ext4_aops = { | 3139 | static const struct address_space_operations ext4_aops = { |
3133 | .readpage = ext4_readpage, | 3140 | .readpage = ext4_readpage, |
3134 | .readpages = ext4_readpages, | 3141 | .readpages = ext4_readpages, |
3135 | .writepage = ext4_writepage, | 3142 | .writepage = ext4_writepage, |
3136 | .writepages = ext4_writepages, | 3143 | .writepages = ext4_writepages, |
3137 | .write_begin = ext4_write_begin, | 3144 | .write_begin = ext4_write_begin, |
3138 | .write_end = ext4_write_end, | 3145 | .write_end = ext4_write_end, |
3139 | .bmap = ext4_bmap, | 3146 | .bmap = ext4_bmap, |
3140 | .invalidatepage = ext4_invalidatepage, | 3147 | .invalidatepage = ext4_invalidatepage, |
3141 | .releasepage = ext4_releasepage, | 3148 | .releasepage = ext4_releasepage, |
3142 | .direct_IO = ext4_direct_IO, | 3149 | .direct_IO = ext4_direct_IO, |
3143 | .migratepage = buffer_migrate_page, | 3150 | .migratepage = buffer_migrate_page, |
3144 | .is_partially_uptodate = block_is_partially_uptodate, | 3151 | .is_partially_uptodate = block_is_partially_uptodate, |
3145 | .error_remove_page = generic_error_remove_page, | 3152 | .error_remove_page = generic_error_remove_page, |
3146 | }; | 3153 | }; |
3147 | 3154 | ||
3148 | static const struct address_space_operations ext4_journalled_aops = { | 3155 | static const struct address_space_operations ext4_journalled_aops = { |
3149 | .readpage = ext4_readpage, | 3156 | .readpage = ext4_readpage, |
3150 | .readpages = ext4_readpages, | 3157 | .readpages = ext4_readpages, |
3151 | .writepage = ext4_writepage, | 3158 | .writepage = ext4_writepage, |
3152 | .writepages = ext4_writepages, | 3159 | .writepages = ext4_writepages, |
3153 | .write_begin = ext4_write_begin, | 3160 | .write_begin = ext4_write_begin, |
3154 | .write_end = ext4_journalled_write_end, | 3161 | .write_end = ext4_journalled_write_end, |
3155 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3162 | .set_page_dirty = ext4_journalled_set_page_dirty, |
3156 | .bmap = ext4_bmap, | 3163 | .bmap = ext4_bmap, |
3157 | .invalidatepage = ext4_journalled_invalidatepage, | 3164 | .invalidatepage = ext4_journalled_invalidatepage, |
3158 | .releasepage = ext4_releasepage, | 3165 | .releasepage = ext4_releasepage, |
3159 | .direct_IO = ext4_direct_IO, | 3166 | .direct_IO = ext4_direct_IO, |
3160 | .is_partially_uptodate = block_is_partially_uptodate, | 3167 | .is_partially_uptodate = block_is_partially_uptodate, |
3161 | .error_remove_page = generic_error_remove_page, | 3168 | .error_remove_page = generic_error_remove_page, |
3162 | }; | 3169 | }; |
3163 | 3170 | ||
3164 | static const struct address_space_operations ext4_da_aops = { | 3171 | static const struct address_space_operations ext4_da_aops = { |
3165 | .readpage = ext4_readpage, | 3172 | .readpage = ext4_readpage, |
3166 | .readpages = ext4_readpages, | 3173 | .readpages = ext4_readpages, |
3167 | .writepage = ext4_writepage, | 3174 | .writepage = ext4_writepage, |
3168 | .writepages = ext4_writepages, | 3175 | .writepages = ext4_writepages, |
3169 | .write_begin = ext4_da_write_begin, | 3176 | .write_begin = ext4_da_write_begin, |
3170 | .write_end = ext4_da_write_end, | 3177 | .write_end = ext4_da_write_end, |
3171 | .bmap = ext4_bmap, | 3178 | .bmap = ext4_bmap, |
3172 | .invalidatepage = ext4_da_invalidatepage, | 3179 | .invalidatepage = ext4_da_invalidatepage, |
3173 | .releasepage = ext4_releasepage, | 3180 | .releasepage = ext4_releasepage, |
3174 | .direct_IO = ext4_direct_IO, | 3181 | .direct_IO = ext4_direct_IO, |
3175 | .migratepage = buffer_migrate_page, | 3182 | .migratepage = buffer_migrate_page, |
3176 | .is_partially_uptodate = block_is_partially_uptodate, | 3183 | .is_partially_uptodate = block_is_partially_uptodate, |
3177 | .error_remove_page = generic_error_remove_page, | 3184 | .error_remove_page = generic_error_remove_page, |
3178 | }; | 3185 | }; |
3179 | 3186 | ||
3180 | void ext4_set_aops(struct inode *inode) | 3187 | void ext4_set_aops(struct inode *inode) |
3181 | { | 3188 | { |
3182 | switch (ext4_inode_journal_mode(inode)) { | 3189 | switch (ext4_inode_journal_mode(inode)) { |
3183 | case EXT4_INODE_ORDERED_DATA_MODE: | 3190 | case EXT4_INODE_ORDERED_DATA_MODE: |
3184 | ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE); | 3191 | ext4_set_inode_state(inode, EXT4_STATE_ORDERED_MODE); |
3185 | break; | 3192 | break; |
3186 | case EXT4_INODE_WRITEBACK_DATA_MODE: | 3193 | case EXT4_INODE_WRITEBACK_DATA_MODE: |
3187 | ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE); | 3194 | ext4_clear_inode_state(inode, EXT4_STATE_ORDERED_MODE); |
3188 | break; | 3195 | break; |
3189 | case EXT4_INODE_JOURNAL_DATA_MODE: | 3196 | case EXT4_INODE_JOURNAL_DATA_MODE: |
3190 | inode->i_mapping->a_ops = &ext4_journalled_aops; | 3197 | inode->i_mapping->a_ops = &ext4_journalled_aops; |
3191 | return; | 3198 | return; |
3192 | default: | 3199 | default: |
3193 | BUG(); | 3200 | BUG(); |
3194 | } | 3201 | } |
3195 | if (test_opt(inode->i_sb, DELALLOC)) | 3202 | if (test_opt(inode->i_sb, DELALLOC)) |
3196 | inode->i_mapping->a_ops = &ext4_da_aops; | 3203 | inode->i_mapping->a_ops = &ext4_da_aops; |
3197 | else | 3204 | else |
3198 | inode->i_mapping->a_ops = &ext4_aops; | 3205 | inode->i_mapping->a_ops = &ext4_aops; |
3199 | } | 3206 | } |
3200 | 3207 | ||
3201 | static int __ext4_block_zero_page_range(handle_t *handle, | 3208 | static int __ext4_block_zero_page_range(handle_t *handle, |
3202 | struct address_space *mapping, loff_t from, loff_t length) | 3209 | struct address_space *mapping, loff_t from, loff_t length) |
3203 | { | 3210 | { |
3204 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3211 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
3205 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3212 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3206 | unsigned blocksize, pos; | 3213 | unsigned blocksize, pos; |
3207 | ext4_lblk_t iblock; | 3214 | ext4_lblk_t iblock; |
3208 | struct inode *inode = mapping->host; | 3215 | struct inode *inode = mapping->host; |
3209 | struct buffer_head *bh; | 3216 | struct buffer_head *bh; |
3210 | struct page *page; | 3217 | struct page *page; |
3211 | int err = 0; | 3218 | int err = 0; |
3212 | 3219 | ||
3213 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3220 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
3214 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3221 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
3215 | if (!page) | 3222 | if (!page) |
3216 | return -ENOMEM; | 3223 | return -ENOMEM; |
3217 | 3224 | ||
3218 | blocksize = inode->i_sb->s_blocksize; | 3225 | blocksize = inode->i_sb->s_blocksize; |
3219 | 3226 | ||
3220 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 3227 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
3221 | 3228 | ||
3222 | if (!page_has_buffers(page)) | 3229 | if (!page_has_buffers(page)) |
3223 | create_empty_buffers(page, blocksize, 0); | 3230 | create_empty_buffers(page, blocksize, 0); |
3224 | 3231 | ||
3225 | /* Find the buffer that contains "offset" */ | 3232 | /* Find the buffer that contains "offset" */ |
3226 | bh = page_buffers(page); | 3233 | bh = page_buffers(page); |
3227 | pos = blocksize; | 3234 | pos = blocksize; |
3228 | while (offset >= pos) { | 3235 | while (offset >= pos) { |
3229 | bh = bh->b_this_page; | 3236 | bh = bh->b_this_page; |
3230 | iblock++; | 3237 | iblock++; |
3231 | pos += blocksize; | 3238 | pos += blocksize; |
3232 | } | 3239 | } |
3233 | if (buffer_freed(bh)) { | 3240 | if (buffer_freed(bh)) { |
3234 | BUFFER_TRACE(bh, "freed: skip"); | 3241 | BUFFER_TRACE(bh, "freed: skip"); |
3235 | goto unlock; | 3242 | goto unlock; |
3236 | } | 3243 | } |
3237 | if (!buffer_mapped(bh)) { | 3244 | if (!buffer_mapped(bh)) { |
3238 | BUFFER_TRACE(bh, "unmapped"); | 3245 | BUFFER_TRACE(bh, "unmapped"); |
3239 | ext4_get_block(inode, iblock, bh, 0); | 3246 | ext4_get_block(inode, iblock, bh, 0); |
3240 | /* unmapped? It's a hole - nothing to do */ | 3247 | /* unmapped? It's a hole - nothing to do */ |
3241 | if (!buffer_mapped(bh)) { | 3248 | if (!buffer_mapped(bh)) { |
3242 | BUFFER_TRACE(bh, "still unmapped"); | 3249 | BUFFER_TRACE(bh, "still unmapped"); |
3243 | goto unlock; | 3250 | goto unlock; |
3244 | } | 3251 | } |
3245 | } | 3252 | } |
3246 | 3253 | ||
3247 | /* Ok, it's mapped. Make sure it's up-to-date */ | 3254 | /* Ok, it's mapped. Make sure it's up-to-date */ |
3248 | if (PageUptodate(page)) | 3255 | if (PageUptodate(page)) |
3249 | set_buffer_uptodate(bh); | 3256 | set_buffer_uptodate(bh); |
3250 | 3257 | ||
3251 | if (!buffer_uptodate(bh)) { | 3258 | if (!buffer_uptodate(bh)) { |
3252 | err = -EIO; | 3259 | err = -EIO; |
3253 | ll_rw_block(READ, 1, &bh); | 3260 | ll_rw_block(READ, 1, &bh); |
3254 | wait_on_buffer(bh); | 3261 | wait_on_buffer(bh); |
3255 | /* Uhhuh. Read error. Complain and punt. */ | 3262 | /* Uhhuh. Read error. Complain and punt. */ |
3256 | if (!buffer_uptodate(bh)) | 3263 | if (!buffer_uptodate(bh)) |
3257 | goto unlock; | 3264 | goto unlock; |
3258 | } | 3265 | } |
3259 | if (ext4_should_journal_data(inode)) { | 3266 | if (ext4_should_journal_data(inode)) { |
3260 | BUFFER_TRACE(bh, "get write access"); | 3267 | BUFFER_TRACE(bh, "get write access"); |
3261 | err = ext4_journal_get_write_access(handle, bh); | 3268 | err = ext4_journal_get_write_access(handle, bh); |
3262 | if (err) | 3269 | if (err) |
3263 | goto unlock; | 3270 | goto unlock; |
3264 | } | 3271 | } |
3265 | zero_user(page, offset, length); | 3272 | zero_user(page, offset, length); |
3266 | BUFFER_TRACE(bh, "zeroed end of block"); | 3273 | BUFFER_TRACE(bh, "zeroed end of block"); |
3267 | 3274 | ||
3268 | if (ext4_should_journal_data(inode)) { | 3275 | if (ext4_should_journal_data(inode)) { |
3269 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 3276 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
3270 | } else { | 3277 | } else { |
3271 | err = 0; | 3278 | err = 0; |
3272 | mark_buffer_dirty(bh); | 3279 | mark_buffer_dirty(bh); |
3273 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) | 3280 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) |
3274 | err = ext4_jbd2_file_inode(handle, inode); | 3281 | err = ext4_jbd2_file_inode(handle, inode); |
3275 | } | 3282 | } |
3276 | 3283 | ||
3277 | unlock: | 3284 | unlock: |
3278 | unlock_page(page); | 3285 | unlock_page(page); |
3279 | page_cache_release(page); | 3286 | page_cache_release(page); |
3280 | return err; | 3287 | return err; |
3281 | } | 3288 | } |
3282 | 3289 | ||
3283 | /* | 3290 | /* |
3284 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | 3291 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' |
3285 | * starting from file offset 'from'. The range to be zero'd must | 3292 | * starting from file offset 'from'. The range to be zero'd must |
3286 | * be contained with in one block. If the specified range exceeds | 3293 | * be contained with in one block. If the specified range exceeds |
3287 | * the end of the block it will be shortened to end of the block | 3294 | * the end of the block it will be shortened to end of the block |
3288 | * that cooresponds to 'from' | 3295 | * that cooresponds to 'from' |
3289 | */ | 3296 | */ |
3290 | static int ext4_block_zero_page_range(handle_t *handle, | 3297 | static int ext4_block_zero_page_range(handle_t *handle, |
3291 | struct address_space *mapping, loff_t from, loff_t length) | 3298 | struct address_space *mapping, loff_t from, loff_t length) |
3292 | { | 3299 | { |
3293 | struct inode *inode = mapping->host; | 3300 | struct inode *inode = mapping->host; |
3294 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3301 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3295 | unsigned blocksize = inode->i_sb->s_blocksize; | 3302 | unsigned blocksize = inode->i_sb->s_blocksize; |
3296 | unsigned max = blocksize - (offset & (blocksize - 1)); | 3303 | unsigned max = blocksize - (offset & (blocksize - 1)); |
3297 | 3304 | ||
3298 | /* | 3305 | /* |
3299 | * correct length if it does not fall between | 3306 | * correct length if it does not fall between |
3300 | * 'from' and the end of the block | 3307 | * 'from' and the end of the block |
3301 | */ | 3308 | */ |
3302 | if (length > max || length < 0) | 3309 | if (length > max || length < 0) |
3303 | length = max; | 3310 | length = max; |
3304 | 3311 | ||
3305 | if (IS_DAX(inode)) | 3312 | if (IS_DAX(inode)) |
3306 | return dax_zero_page_range(inode, from, length, ext4_get_block); | 3313 | return dax_zero_page_range(inode, from, length, ext4_get_block); |
3307 | return __ext4_block_zero_page_range(handle, mapping, from, length); | 3314 | return __ext4_block_zero_page_range(handle, mapping, from, length); |
3308 | } | 3315 | } |
3309 | 3316 | ||
3310 | /* | 3317 | /* |
3311 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | 3318 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
3312 | * up to the end of the block which corresponds to `from'. | 3319 | * up to the end of the block which corresponds to `from'. |
3313 | * This required during truncate. We need to physically zero the tail end | 3320 | * This required during truncate. We need to physically zero the tail end |
3314 | * of that block so it doesn't yield old data if the file is later grown. | 3321 | * of that block so it doesn't yield old data if the file is later grown. |
3315 | */ | 3322 | */ |
3316 | static int ext4_block_truncate_page(handle_t *handle, | 3323 | static int ext4_block_truncate_page(handle_t *handle, |
3317 | struct address_space *mapping, loff_t from) | 3324 | struct address_space *mapping, loff_t from) |
3318 | { | 3325 | { |
3319 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3326 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3320 | unsigned length; | 3327 | unsigned length; |
3321 | unsigned blocksize; | 3328 | unsigned blocksize; |
3322 | struct inode *inode = mapping->host; | 3329 | struct inode *inode = mapping->host; |
3323 | 3330 | ||
3324 | blocksize = inode->i_sb->s_blocksize; | 3331 | blocksize = inode->i_sb->s_blocksize; |
3325 | length = blocksize - (offset & (blocksize - 1)); | 3332 | length = blocksize - (offset & (blocksize - 1)); |
3326 | 3333 | ||
3327 | return ext4_block_zero_page_range(handle, mapping, from, length); | 3334 | return ext4_block_zero_page_range(handle, mapping, from, length); |
3328 | } | 3335 | } |
3329 | 3336 | ||
3330 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | 3337 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
3331 | loff_t lstart, loff_t length) | 3338 | loff_t lstart, loff_t length) |
3332 | { | 3339 | { |
3333 | struct super_block *sb = inode->i_sb; | 3340 | struct super_block *sb = inode->i_sb; |
3334 | struct address_space *mapping = inode->i_mapping; | 3341 | struct address_space *mapping = inode->i_mapping; |
3335 | unsigned partial_start, partial_end; | 3342 | unsigned partial_start, partial_end; |
3336 | ext4_fsblk_t start, end; | 3343 | ext4_fsblk_t start, end; |
3337 | loff_t byte_end = (lstart + length - 1); | 3344 | loff_t byte_end = (lstart + length - 1); |
3338 | int err = 0; | 3345 | int err = 0; |
3339 | 3346 | ||
3340 | partial_start = lstart & (sb->s_blocksize - 1); | 3347 | partial_start = lstart & (sb->s_blocksize - 1); |
3341 | partial_end = byte_end & (sb->s_blocksize - 1); | 3348 | partial_end = byte_end & (sb->s_blocksize - 1); |
3342 | 3349 | ||
3343 | start = lstart >> sb->s_blocksize_bits; | 3350 | start = lstart >> sb->s_blocksize_bits; |
3344 | end = byte_end >> sb->s_blocksize_bits; | 3351 | end = byte_end >> sb->s_blocksize_bits; |
3345 | 3352 | ||
3346 | /* Handle partial zero within the single block */ | 3353 | /* Handle partial zero within the single block */ |
3347 | if (start == end && | 3354 | if (start == end && |
3348 | (partial_start || (partial_end != sb->s_blocksize - 1))) { | 3355 | (partial_start || (partial_end != sb->s_blocksize - 1))) { |
3349 | err = ext4_block_zero_page_range(handle, mapping, | 3356 | err = ext4_block_zero_page_range(handle, mapping, |
3350 | lstart, length); | 3357 | lstart, length); |
3351 | return err; | 3358 | return err; |
3352 | } | 3359 | } |
3353 | /* Handle partial zero out on the start of the range */ | 3360 | /* Handle partial zero out on the start of the range */ |
3354 | if (partial_start) { | 3361 | if (partial_start) { |
3355 | err = ext4_block_zero_page_range(handle, mapping, | 3362 | err = ext4_block_zero_page_range(handle, mapping, |
3356 | lstart, sb->s_blocksize); | 3363 | lstart, sb->s_blocksize); |
3357 | if (err) | 3364 | if (err) |
3358 | return err; | 3365 | return err; |
3359 | } | 3366 | } |
3360 | /* Handle partial zero out on the end of the range */ | 3367 | /* Handle partial zero out on the end of the range */ |
3361 | if (partial_end != sb->s_blocksize - 1) | 3368 | if (partial_end != sb->s_blocksize - 1) |
3362 | err = ext4_block_zero_page_range(handle, mapping, | 3369 | err = ext4_block_zero_page_range(handle, mapping, |
3363 | byte_end - partial_end, | 3370 | byte_end - partial_end, |
3364 | partial_end + 1); | 3371 | partial_end + 1); |
3365 | return err; | 3372 | return err; |
3366 | } | 3373 | } |
3367 | 3374 | ||
3368 | int ext4_can_truncate(struct inode *inode) | 3375 | int ext4_can_truncate(struct inode *inode) |
3369 | { | 3376 | { |
3370 | if (S_ISREG(inode->i_mode)) | 3377 | if (S_ISREG(inode->i_mode)) |
3371 | return 1; | 3378 | return 1; |
3372 | if (S_ISDIR(inode->i_mode)) | 3379 | if (S_ISDIR(inode->i_mode)) |
3373 | return 1; | 3380 | return 1; |
3374 | if (S_ISLNK(inode->i_mode)) | 3381 | if (S_ISLNK(inode->i_mode)) |
3375 | return !ext4_inode_is_fast_symlink(inode); | 3382 | return !ext4_inode_is_fast_symlink(inode); |
3376 | return 0; | 3383 | return 0; |
3377 | } | 3384 | } |
3378 | 3385 | ||
3379 | /* | 3386 | /* |
3380 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks | 3387 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks |
3381 | * associated with the given offset and length | 3388 | * associated with the given offset and length |
3382 | * | 3389 | * |
3383 | * @inode: File inode | 3390 | * @inode: File inode |
3384 | * @offset: The offset where the hole will begin | 3391 | * @offset: The offset where the hole will begin |
3385 | * @len: The length of the hole | 3392 | * @len: The length of the hole |
3386 | * | 3393 | * |
3387 | * Returns: 0 on success or negative on failure | 3394 | * Returns: 0 on success or negative on failure |
3388 | */ | 3395 | */ |
3389 | 3396 | ||
3390 | int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | 3397 | int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) |
3391 | { | 3398 | { |
3392 | struct super_block *sb = inode->i_sb; | 3399 | struct super_block *sb = inode->i_sb; |
3393 | ext4_lblk_t first_block, stop_block; | 3400 | ext4_lblk_t first_block, stop_block; |
3394 | struct address_space *mapping = inode->i_mapping; | 3401 | struct address_space *mapping = inode->i_mapping; |
3395 | loff_t first_block_offset, last_block_offset; | 3402 | loff_t first_block_offset, last_block_offset; |
3396 | handle_t *handle; | 3403 | handle_t *handle; |
3397 | unsigned int credits; | 3404 | unsigned int credits; |
3398 | int ret = 0; | 3405 | int ret = 0; |
3399 | 3406 | ||
3400 | if (!S_ISREG(inode->i_mode)) | 3407 | if (!S_ISREG(inode->i_mode)) |
3401 | return -EOPNOTSUPP; | 3408 | return -EOPNOTSUPP; |
3402 | 3409 | ||
3403 | trace_ext4_punch_hole(inode, offset, length, 0); | 3410 | trace_ext4_punch_hole(inode, offset, length, 0); |
3404 | 3411 | ||
3405 | /* | 3412 | /* |
3406 | * Write out all dirty pages to avoid race conditions | 3413 | * Write out all dirty pages to avoid race conditions |
3407 | * Then release them. | 3414 | * Then release them. |
3408 | */ | 3415 | */ |
3409 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 3416 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
3410 | ret = filemap_write_and_wait_range(mapping, offset, | 3417 | ret = filemap_write_and_wait_range(mapping, offset, |
3411 | offset + length - 1); | 3418 | offset + length - 1); |
3412 | if (ret) | 3419 | if (ret) |
3413 | return ret; | 3420 | return ret; |
3414 | } | 3421 | } |
3415 | 3422 | ||
3416 | mutex_lock(&inode->i_mutex); | 3423 | mutex_lock(&inode->i_mutex); |
3417 | 3424 | ||
3418 | /* No need to punch hole beyond i_size */ | 3425 | /* No need to punch hole beyond i_size */ |
3419 | if (offset >= inode->i_size) | 3426 | if (offset >= inode->i_size) |
3420 | goto out_mutex; | 3427 | goto out_mutex; |
3421 | 3428 | ||
3422 | /* | 3429 | /* |
3423 | * If the hole extends beyond i_size, set the hole | 3430 | * If the hole extends beyond i_size, set the hole |
3424 | * to end after the page that contains i_size | 3431 | * to end after the page that contains i_size |
3425 | */ | 3432 | */ |
3426 | if (offset + length > inode->i_size) { | 3433 | if (offset + length > inode->i_size) { |
3427 | length = inode->i_size + | 3434 | length = inode->i_size + |
3428 | PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - | 3435 | PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - |
3429 | offset; | 3436 | offset; |
3430 | } | 3437 | } |
3431 | 3438 | ||
3432 | if (offset & (sb->s_blocksize - 1) || | 3439 | if (offset & (sb->s_blocksize - 1) || |
3433 | (offset + length) & (sb->s_blocksize - 1)) { | 3440 | (offset + length) & (sb->s_blocksize - 1)) { |
3434 | /* | 3441 | /* |
3435 | * Attach jinode to inode for jbd2 if we do any zeroing of | 3442 | * Attach jinode to inode for jbd2 if we do any zeroing of |
3436 | * partial block | 3443 | * partial block |
3437 | */ | 3444 | */ |
3438 | ret = ext4_inode_attach_jinode(inode); | 3445 | ret = ext4_inode_attach_jinode(inode); |
3439 | if (ret < 0) | 3446 | if (ret < 0) |
3440 | goto out_mutex; | 3447 | goto out_mutex; |
3441 | 3448 | ||
3442 | } | 3449 | } |
3443 | 3450 | ||
3444 | first_block_offset = round_up(offset, sb->s_blocksize); | 3451 | first_block_offset = round_up(offset, sb->s_blocksize); |
3445 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; | 3452 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; |
3446 | 3453 | ||
3447 | /* Now release the pages and zero block aligned part of pages*/ | 3454 | /* Now release the pages and zero block aligned part of pages*/ |
3448 | if (last_block_offset > first_block_offset) | 3455 | if (last_block_offset > first_block_offset) |
3449 | truncate_pagecache_range(inode, first_block_offset, | 3456 | truncate_pagecache_range(inode, first_block_offset, |
3450 | last_block_offset); | 3457 | last_block_offset); |
3451 | 3458 | ||
3452 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | 3459 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
3453 | ext4_inode_block_unlocked_dio(inode); | 3460 | ext4_inode_block_unlocked_dio(inode); |
3454 | inode_dio_wait(inode); | 3461 | inode_dio_wait(inode); |
3455 | 3462 | ||
3456 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3463 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3457 | credits = ext4_writepage_trans_blocks(inode); | 3464 | credits = ext4_writepage_trans_blocks(inode); |
3458 | else | 3465 | else |
3459 | credits = ext4_blocks_for_truncate(inode); | 3466 | credits = ext4_blocks_for_truncate(inode); |
3460 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | 3467 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); |
3461 | if (IS_ERR(handle)) { | 3468 | if (IS_ERR(handle)) { |
3462 | ret = PTR_ERR(handle); | 3469 | ret = PTR_ERR(handle); |
3463 | ext4_std_error(sb, ret); | 3470 | ext4_std_error(sb, ret); |
3464 | goto out_dio; | 3471 | goto out_dio; |
3465 | } | 3472 | } |
3466 | 3473 | ||
3467 | ret = ext4_zero_partial_blocks(handle, inode, offset, | 3474 | ret = ext4_zero_partial_blocks(handle, inode, offset, |
3468 | length); | 3475 | length); |
3469 | if (ret) | 3476 | if (ret) |
3470 | goto out_stop; | 3477 | goto out_stop; |
3471 | 3478 | ||
3472 | first_block = (offset + sb->s_blocksize - 1) >> | 3479 | first_block = (offset + sb->s_blocksize - 1) >> |
3473 | EXT4_BLOCK_SIZE_BITS(sb); | 3480 | EXT4_BLOCK_SIZE_BITS(sb); |
3474 | stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | 3481 | stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); |
3475 | 3482 | ||
3476 | /* If there are no blocks to remove, return now */ | 3483 | /* If there are no blocks to remove, return now */ |
3477 | if (first_block >= stop_block) | 3484 | if (first_block >= stop_block) |
3478 | goto out_stop; | 3485 | goto out_stop; |
3479 | 3486 | ||
3480 | down_write(&EXT4_I(inode)->i_data_sem); | 3487 | down_write(&EXT4_I(inode)->i_data_sem); |
3481 | ext4_discard_preallocations(inode); | 3488 | ext4_discard_preallocations(inode); |
3482 | 3489 | ||
3483 | ret = ext4_es_remove_extent(inode, first_block, | 3490 | ret = ext4_es_remove_extent(inode, first_block, |
3484 | stop_block - first_block); | 3491 | stop_block - first_block); |
3485 | if (ret) { | 3492 | if (ret) { |
3486 | up_write(&EXT4_I(inode)->i_data_sem); | 3493 | up_write(&EXT4_I(inode)->i_data_sem); |
3487 | goto out_stop; | 3494 | goto out_stop; |
3488 | } | 3495 | } |
3489 | 3496 | ||
3490 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3497 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3491 | ret = ext4_ext_remove_space(inode, first_block, | 3498 | ret = ext4_ext_remove_space(inode, first_block, |
3492 | stop_block - 1); | 3499 | stop_block - 1); |
3493 | else | 3500 | else |
3494 | ret = ext4_ind_remove_space(handle, inode, first_block, | 3501 | ret = ext4_ind_remove_space(handle, inode, first_block, |
3495 | stop_block); | 3502 | stop_block); |
3496 | 3503 | ||
3497 | up_write(&EXT4_I(inode)->i_data_sem); | 3504 | up_write(&EXT4_I(inode)->i_data_sem); |
3498 | if (IS_SYNC(inode)) | 3505 | if (IS_SYNC(inode)) |
3499 | ext4_handle_sync(handle); | 3506 | ext4_handle_sync(handle); |
3500 | 3507 | ||
3501 | /* Now release the pages again to reduce race window */ | 3508 | /* Now release the pages again to reduce race window */ |
3502 | if (last_block_offset > first_block_offset) | 3509 | if (last_block_offset > first_block_offset) |
3503 | truncate_pagecache_range(inode, first_block_offset, | 3510 | truncate_pagecache_range(inode, first_block_offset, |
3504 | last_block_offset); | 3511 | last_block_offset); |
3505 | 3512 | ||
3506 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3513 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
3507 | ext4_mark_inode_dirty(handle, inode); | 3514 | ext4_mark_inode_dirty(handle, inode); |
3508 | out_stop: | 3515 | out_stop: |
3509 | ext4_journal_stop(handle); | 3516 | ext4_journal_stop(handle); |
3510 | out_dio: | 3517 | out_dio: |
3511 | ext4_inode_resume_unlocked_dio(inode); | 3518 | ext4_inode_resume_unlocked_dio(inode); |
3512 | out_mutex: | 3519 | out_mutex: |
3513 | mutex_unlock(&inode->i_mutex); | 3520 | mutex_unlock(&inode->i_mutex); |
3514 | return ret; | 3521 | return ret; |
3515 | } | 3522 | } |
3516 | 3523 | ||
3517 | int ext4_inode_attach_jinode(struct inode *inode) | 3524 | int ext4_inode_attach_jinode(struct inode *inode) |
3518 | { | 3525 | { |
3519 | struct ext4_inode_info *ei = EXT4_I(inode); | 3526 | struct ext4_inode_info *ei = EXT4_I(inode); |
3520 | struct jbd2_inode *jinode; | 3527 | struct jbd2_inode *jinode; |
3521 | 3528 | ||
3522 | if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal) | 3529 | if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal) |
3523 | return 0; | 3530 | return 0; |
3524 | 3531 | ||
3525 | jinode = jbd2_alloc_inode(GFP_KERNEL); | 3532 | jinode = jbd2_alloc_inode(GFP_KERNEL); |
3526 | spin_lock(&inode->i_lock); | 3533 | spin_lock(&inode->i_lock); |
3527 | if (!ei->jinode) { | 3534 | if (!ei->jinode) { |
3528 | if (!jinode) { | 3535 | if (!jinode) { |
3529 | spin_unlock(&inode->i_lock); | 3536 | spin_unlock(&inode->i_lock); |
3530 | return -ENOMEM; | 3537 | return -ENOMEM; |
3531 | } | 3538 | } |
3532 | ei->jinode = jinode; | 3539 | ei->jinode = jinode; |
3533 | jbd2_journal_init_jbd_inode(ei->jinode, inode); | 3540 | jbd2_journal_init_jbd_inode(ei->jinode, inode); |
3534 | jinode = NULL; | 3541 | jinode = NULL; |
3535 | } | 3542 | } |
3536 | spin_unlock(&inode->i_lock); | 3543 | spin_unlock(&inode->i_lock); |
3537 | if (unlikely(jinode != NULL)) | 3544 | if (unlikely(jinode != NULL)) |
3538 | jbd2_free_inode(jinode); | 3545 | jbd2_free_inode(jinode); |
3539 | return 0; | 3546 | return 0; |
3540 | } | 3547 | } |
3541 | 3548 | ||
3542 | /* | 3549 | /* |
3543 | * ext4_truncate() | 3550 | * ext4_truncate() |
3544 | * | 3551 | * |
3545 | * We block out ext4_get_block() block instantiations across the entire | 3552 | * We block out ext4_get_block() block instantiations across the entire |
3546 | * transaction, and VFS/VM ensures that ext4_truncate() cannot run | 3553 | * transaction, and VFS/VM ensures that ext4_truncate() cannot run |
3547 | * simultaneously on behalf of the same inode. | 3554 | * simultaneously on behalf of the same inode. |
3548 | * | 3555 | * |
3549 | * As we work through the truncate and commit bits of it to the journal there | 3556 | * As we work through the truncate and commit bits of it to the journal there |
3550 | * is one core, guiding principle: the file's tree must always be consistent on | 3557 | * is one core, guiding principle: the file's tree must always be consistent on |
3551 | * disk. We must be able to restart the truncate after a crash. | 3558 | * disk. We must be able to restart the truncate after a crash. |
3552 | * | 3559 | * |
3553 | * The file's tree may be transiently inconsistent in memory (although it | 3560 | * The file's tree may be transiently inconsistent in memory (although it |
3554 | * probably isn't), but whenever we close off and commit a journal transaction, | 3561 | * probably isn't), but whenever we close off and commit a journal transaction, |
3555 | * the contents of (the filesystem + the journal) must be consistent and | 3562 | * the contents of (the filesystem + the journal) must be consistent and |
3556 | * restartable. It's pretty simple, really: bottom up, right to left (although | 3563 | * restartable. It's pretty simple, really: bottom up, right to left (although |
3557 | * left-to-right works OK too). | 3564 | * left-to-right works OK too). |
3558 | * | 3565 | * |
3559 | * Note that at recovery time, journal replay occurs *before* the restart of | 3566 | * Note that at recovery time, journal replay occurs *before* the restart of |
3560 | * truncate against the orphan inode list. | 3567 | * truncate against the orphan inode list. |
3561 | * | 3568 | * |
3562 | * The committed inode has the new, desired i_size (which is the same as | 3569 | * The committed inode has the new, desired i_size (which is the same as |
3563 | * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see | 3570 | * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see |
3564 | * that this inode's truncate did not complete and it will again call | 3571 | * that this inode's truncate did not complete and it will again call |
3565 | * ext4_truncate() to have another go. So there will be instantiated blocks | 3572 | * ext4_truncate() to have another go. So there will be instantiated blocks |
3566 | * to the right of the truncation point in a crashed ext4 filesystem. But | 3573 | * to the right of the truncation point in a crashed ext4 filesystem. But |
3567 | * that's fine - as long as they are linked from the inode, the post-crash | 3574 | * that's fine - as long as they are linked from the inode, the post-crash |
3568 | * ext4_truncate() run will find them and release them. | 3575 | * ext4_truncate() run will find them and release them. |
3569 | */ | 3576 | */ |
3570 | void ext4_truncate(struct inode *inode) | 3577 | void ext4_truncate(struct inode *inode) |
3571 | { | 3578 | { |
3572 | struct ext4_inode_info *ei = EXT4_I(inode); | 3579 | struct ext4_inode_info *ei = EXT4_I(inode); |
3573 | unsigned int credits; | 3580 | unsigned int credits; |
3574 | handle_t *handle; | 3581 | handle_t *handle; |
3575 | struct address_space *mapping = inode->i_mapping; | 3582 | struct address_space *mapping = inode->i_mapping; |
3576 | 3583 | ||
3577 | /* | 3584 | /* |
3578 | * There is a possibility that we're either freeing the inode | 3585 | * There is a possibility that we're either freeing the inode |
3579 | * or it's a completely new inode. In those cases we might not | 3586 | * or it's a completely new inode. In those cases we might not |
3580 | * have i_mutex locked because it's not necessary. | 3587 | * have i_mutex locked because it's not necessary. |
3581 | */ | 3588 | */ |
3582 | if (!(inode->i_state & (I_NEW|I_FREEING))) | 3589 | if (!(inode->i_state & (I_NEW|I_FREEING))) |
3583 | WARN_ON(!mutex_is_locked(&inode->i_mutex)); | 3590 | WARN_ON(!mutex_is_locked(&inode->i_mutex)); |
3584 | trace_ext4_truncate_enter(inode); | 3591 | trace_ext4_truncate_enter(inode); |
3585 | 3592 | ||
3586 | if (!ext4_can_truncate(inode)) | 3593 | if (!ext4_can_truncate(inode)) |
3587 | return; | 3594 | return; |
3588 | 3595 | ||
3589 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | 3596 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
3590 | 3597 | ||
3591 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 3598 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
3592 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); | 3599 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
3593 | 3600 | ||
3594 | if (ext4_has_inline_data(inode)) { | 3601 | if (ext4_has_inline_data(inode)) { |
3595 | int has_inline = 1; | 3602 | int has_inline = 1; |
3596 | 3603 | ||
3597 | ext4_inline_data_truncate(inode, &has_inline); | 3604 | ext4_inline_data_truncate(inode, &has_inline); |
3598 | if (has_inline) | 3605 | if (has_inline) |
3599 | return; | 3606 | return; |
3600 | } | 3607 | } |
3601 | 3608 | ||
3602 | /* If we zero-out tail of the page, we have to create jinode for jbd2 */ | 3609 | /* If we zero-out tail of the page, we have to create jinode for jbd2 */ |
3603 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { | 3610 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { |
3604 | if (ext4_inode_attach_jinode(inode) < 0) | 3611 | if (ext4_inode_attach_jinode(inode) < 0) |
3605 | return; | 3612 | return; |
3606 | } | 3613 | } |
3607 | 3614 | ||
3608 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3615 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3609 | credits = ext4_writepage_trans_blocks(inode); | 3616 | credits = ext4_writepage_trans_blocks(inode); |
3610 | else | 3617 | else |
3611 | credits = ext4_blocks_for_truncate(inode); | 3618 | credits = ext4_blocks_for_truncate(inode); |
3612 | 3619 | ||
3613 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | 3620 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); |
3614 | if (IS_ERR(handle)) { | 3621 | if (IS_ERR(handle)) { |
3615 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); | 3622 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); |
3616 | return; | 3623 | return; |
3617 | } | 3624 | } |
3618 | 3625 | ||
3619 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) | 3626 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) |
3620 | ext4_block_truncate_page(handle, mapping, inode->i_size); | 3627 | ext4_block_truncate_page(handle, mapping, inode->i_size); |
3621 | 3628 | ||
3622 | /* | 3629 | /* |
3623 | * We add the inode to the orphan list, so that if this | 3630 | * We add the inode to the orphan list, so that if this |
3624 | * truncate spans multiple transactions, and we crash, we will | 3631 | * truncate spans multiple transactions, and we crash, we will |
3625 | * resume the truncate when the filesystem recovers. It also | 3632 | * resume the truncate when the filesystem recovers. It also |
3626 | * marks the inode dirty, to catch the new size. | 3633 | * marks the inode dirty, to catch the new size. |
3627 | * | 3634 | * |
3628 | * Implication: the file must always be in a sane, consistent | 3635 | * Implication: the file must always be in a sane, consistent |
3629 | * truncatable state while each transaction commits. | 3636 | * truncatable state while each transaction commits. |
3630 | */ | 3637 | */ |
3631 | if (ext4_orphan_add(handle, inode)) | 3638 | if (ext4_orphan_add(handle, inode)) |
3632 | goto out_stop; | 3639 | goto out_stop; |
3633 | 3640 | ||
3634 | down_write(&EXT4_I(inode)->i_data_sem); | 3641 | down_write(&EXT4_I(inode)->i_data_sem); |
3635 | 3642 | ||
3636 | ext4_discard_preallocations(inode); | 3643 | ext4_discard_preallocations(inode); |
3637 | 3644 | ||
3638 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3645 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3639 | ext4_ext_truncate(handle, inode); | 3646 | ext4_ext_truncate(handle, inode); |
3640 | else | 3647 | else |
3641 | ext4_ind_truncate(handle, inode); | 3648 | ext4_ind_truncate(handle, inode); |
3642 | 3649 | ||
3643 | up_write(&ei->i_data_sem); | 3650 | up_write(&ei->i_data_sem); |
3644 | 3651 | ||
3645 | if (IS_SYNC(inode)) | 3652 | if (IS_SYNC(inode)) |
3646 | ext4_handle_sync(handle); | 3653 | ext4_handle_sync(handle); |
3647 | 3654 | ||
3648 | out_stop: | 3655 | out_stop: |
3649 | /* | 3656 | /* |
3650 | * If this was a simple ftruncate() and the file will remain alive, | 3657 | * If this was a simple ftruncate() and the file will remain alive, |
3651 | * then we need to clear up the orphan record which we created above. | 3658 | * then we need to clear up the orphan record which we created above. |
3652 | * However, if this was a real unlink then we were called by | 3659 | * However, if this was a real unlink then we were called by |
3653 | * ext4_evict_inode(), and we allow that function to clean up the | 3660 | * ext4_evict_inode(), and we allow that function to clean up the |
3654 | * orphan info for us. | 3661 | * orphan info for us. |
3655 | */ | 3662 | */ |
3656 | if (inode->i_nlink) | 3663 | if (inode->i_nlink) |
3657 | ext4_orphan_del(handle, inode); | 3664 | ext4_orphan_del(handle, inode); |
3658 | 3665 | ||
3659 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3666 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
3660 | ext4_mark_inode_dirty(handle, inode); | 3667 | ext4_mark_inode_dirty(handle, inode); |
3661 | ext4_journal_stop(handle); | 3668 | ext4_journal_stop(handle); |
3662 | 3669 | ||
3663 | trace_ext4_truncate_exit(inode); | 3670 | trace_ext4_truncate_exit(inode); |
3664 | } | 3671 | } |
3665 | 3672 | ||
3666 | /* | 3673 | /* |
3667 | * ext4_get_inode_loc returns with an extra refcount against the inode's | 3674 | * ext4_get_inode_loc returns with an extra refcount against the inode's |
3668 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 3675 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
3669 | * data in memory that is needed to recreate the on-disk version of this | 3676 | * data in memory that is needed to recreate the on-disk version of this |
3670 | * inode. | 3677 | * inode. |
3671 | */ | 3678 | */ |
3672 | static int __ext4_get_inode_loc(struct inode *inode, | 3679 | static int __ext4_get_inode_loc(struct inode *inode, |
3673 | struct ext4_iloc *iloc, int in_mem) | 3680 | struct ext4_iloc *iloc, int in_mem) |
3674 | { | 3681 | { |
3675 | struct ext4_group_desc *gdp; | 3682 | struct ext4_group_desc *gdp; |
3676 | struct buffer_head *bh; | 3683 | struct buffer_head *bh; |
3677 | struct super_block *sb = inode->i_sb; | 3684 | struct super_block *sb = inode->i_sb; |
3678 | ext4_fsblk_t block; | 3685 | ext4_fsblk_t block; |
3679 | int inodes_per_block, inode_offset; | 3686 | int inodes_per_block, inode_offset; |
3680 | 3687 | ||
3681 | iloc->bh = NULL; | 3688 | iloc->bh = NULL; |
3682 | if (!ext4_valid_inum(sb, inode->i_ino)) | 3689 | if (!ext4_valid_inum(sb, inode->i_ino)) |
3683 | return -EIO; | 3690 | return -EIO; |
3684 | 3691 | ||
3685 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); | 3692 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); |
3686 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); | 3693 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); |
3687 | if (!gdp) | 3694 | if (!gdp) |
3688 | return -EIO; | 3695 | return -EIO; |
3689 | 3696 | ||
3690 | /* | 3697 | /* |
3691 | * Figure out the offset within the block group inode table | 3698 | * Figure out the offset within the block group inode table |
3692 | */ | 3699 | */ |
3693 | inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; | 3700 | inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; |
3694 | inode_offset = ((inode->i_ino - 1) % | 3701 | inode_offset = ((inode->i_ino - 1) % |
3695 | EXT4_INODES_PER_GROUP(sb)); | 3702 | EXT4_INODES_PER_GROUP(sb)); |
3696 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | 3703 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); |
3697 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); | 3704 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); |
3698 | 3705 | ||
3699 | bh = sb_getblk(sb, block); | 3706 | bh = sb_getblk(sb, block); |
3700 | if (unlikely(!bh)) | 3707 | if (unlikely(!bh)) |
3701 | return -ENOMEM; | 3708 | return -ENOMEM; |
3702 | if (!buffer_uptodate(bh)) { | 3709 | if (!buffer_uptodate(bh)) { |
3703 | lock_buffer(bh); | 3710 | lock_buffer(bh); |
3704 | 3711 | ||
3705 | /* | 3712 | /* |
3706 | * If the buffer has the write error flag, we have failed | 3713 | * If the buffer has the write error flag, we have failed |
3707 | * to write out another inode in the same block. In this | 3714 | * to write out another inode in the same block. In this |
3708 | * case, we don't have to read the block because we may | 3715 | * case, we don't have to read the block because we may |
3709 | * read the old inode data successfully. | 3716 | * read the old inode data successfully. |
3710 | */ | 3717 | */ |
3711 | if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) | 3718 | if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) |
3712 | set_buffer_uptodate(bh); | 3719 | set_buffer_uptodate(bh); |
3713 | 3720 | ||
3714 | if (buffer_uptodate(bh)) { | 3721 | if (buffer_uptodate(bh)) { |
3715 | /* someone brought it uptodate while we waited */ | 3722 | /* someone brought it uptodate while we waited */ |
3716 | unlock_buffer(bh); | 3723 | unlock_buffer(bh); |
3717 | goto has_buffer; | 3724 | goto has_buffer; |
3718 | } | 3725 | } |
3719 | 3726 | ||
3720 | /* | 3727 | /* |
3721 | * If we have all information of the inode in memory and this | 3728 | * If we have all information of the inode in memory and this |
3722 | * is the only valid inode in the block, we need not read the | 3729 | * is the only valid inode in the block, we need not read the |
3723 | * block. | 3730 | * block. |
3724 | */ | 3731 | */ |
3725 | if (in_mem) { | 3732 | if (in_mem) { |
3726 | struct buffer_head *bitmap_bh; | 3733 | struct buffer_head *bitmap_bh; |
3727 | int i, start; | 3734 | int i, start; |
3728 | 3735 | ||
3729 | start = inode_offset & ~(inodes_per_block - 1); | 3736 | start = inode_offset & ~(inodes_per_block - 1); |
3730 | 3737 | ||
3731 | /* Is the inode bitmap in cache? */ | 3738 | /* Is the inode bitmap in cache? */ |
3732 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); | 3739 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); |
3733 | if (unlikely(!bitmap_bh)) | 3740 | if (unlikely(!bitmap_bh)) |
3734 | goto make_io; | 3741 | goto make_io; |
3735 | 3742 | ||
3736 | /* | 3743 | /* |
3737 | * If the inode bitmap isn't in cache then the | 3744 | * If the inode bitmap isn't in cache then the |
3738 | * optimisation may end up performing two reads instead | 3745 | * optimisation may end up performing two reads instead |
3739 | * of one, so skip it. | 3746 | * of one, so skip it. |
3740 | */ | 3747 | */ |
3741 | if (!buffer_uptodate(bitmap_bh)) { | 3748 | if (!buffer_uptodate(bitmap_bh)) { |
3742 | brelse(bitmap_bh); | 3749 | brelse(bitmap_bh); |
3743 | goto make_io; | 3750 | goto make_io; |
3744 | } | 3751 | } |
3745 | for (i = start; i < start + inodes_per_block; i++) { | 3752 | for (i = start; i < start + inodes_per_block; i++) { |
3746 | if (i == inode_offset) | 3753 | if (i == inode_offset) |
3747 | continue; | 3754 | continue; |
3748 | if (ext4_test_bit(i, bitmap_bh->b_data)) | 3755 | if (ext4_test_bit(i, bitmap_bh->b_data)) |
3749 | break; | 3756 | break; |
3750 | } | 3757 | } |
3751 | brelse(bitmap_bh); | 3758 | brelse(bitmap_bh); |
3752 | if (i == start + inodes_per_block) { | 3759 | if (i == start + inodes_per_block) { |
3753 | /* all other inodes are free, so skip I/O */ | 3760 | /* all other inodes are free, so skip I/O */ |
3754 | memset(bh->b_data, 0, bh->b_size); | 3761 | memset(bh->b_data, 0, bh->b_size); |
3755 | set_buffer_uptodate(bh); | 3762 | set_buffer_uptodate(bh); |
3756 | unlock_buffer(bh); | 3763 | unlock_buffer(bh); |
3757 | goto has_buffer; | 3764 | goto has_buffer; |
3758 | } | 3765 | } |
3759 | } | 3766 | } |
3760 | 3767 | ||
3761 | make_io: | 3768 | make_io: |
3762 | /* | 3769 | /* |
3763 | * If we need to do any I/O, try to pre-readahead extra | 3770 | * If we need to do any I/O, try to pre-readahead extra |
3764 | * blocks from the inode table. | 3771 | * blocks from the inode table. |
3765 | */ | 3772 | */ |
3766 | if (EXT4_SB(sb)->s_inode_readahead_blks) { | 3773 | if (EXT4_SB(sb)->s_inode_readahead_blks) { |
3767 | ext4_fsblk_t b, end, table; | 3774 | ext4_fsblk_t b, end, table; |
3768 | unsigned num; | 3775 | unsigned num; |
3769 | __u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks; | 3776 | __u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks; |
3770 | 3777 | ||
3771 | table = ext4_inode_table(sb, gdp); | 3778 | table = ext4_inode_table(sb, gdp); |
3772 | /* s_inode_readahead_blks is always a power of 2 */ | 3779 | /* s_inode_readahead_blks is always a power of 2 */ |
3773 | b = block & ~((ext4_fsblk_t) ra_blks - 1); | 3780 | b = block & ~((ext4_fsblk_t) ra_blks - 1); |
3774 | if (table > b) | 3781 | if (table > b) |
3775 | b = table; | 3782 | b = table; |
3776 | end = b + ra_blks; | 3783 | end = b + ra_blks; |
3777 | num = EXT4_INODES_PER_GROUP(sb); | 3784 | num = EXT4_INODES_PER_GROUP(sb); |
3778 | if (ext4_has_group_desc_csum(sb)) | 3785 | if (ext4_has_group_desc_csum(sb)) |
3779 | num -= ext4_itable_unused_count(sb, gdp); | 3786 | num -= ext4_itable_unused_count(sb, gdp); |
3780 | table += num / inodes_per_block; | 3787 | table += num / inodes_per_block; |
3781 | if (end > table) | 3788 | if (end > table) |
3782 | end = table; | 3789 | end = table; |
3783 | while (b <= end) | 3790 | while (b <= end) |
3784 | sb_breadahead(sb, b++); | 3791 | sb_breadahead(sb, b++); |
3785 | } | 3792 | } |
3786 | 3793 | ||
3787 | /* | 3794 | /* |
3788 | * There are other valid inodes in the buffer, this inode | 3795 | * There are other valid inodes in the buffer, this inode |
3789 | * has in-inode xattrs, or we don't have this inode in memory. | 3796 | * has in-inode xattrs, or we don't have this inode in memory. |
3790 | * Read the block from disk. | 3797 | * Read the block from disk. |
3791 | */ | 3798 | */ |
3792 | trace_ext4_load_inode(inode); | 3799 | trace_ext4_load_inode(inode); |
3793 | get_bh(bh); | 3800 | get_bh(bh); |
3794 | bh->b_end_io = end_buffer_read_sync; | 3801 | bh->b_end_io = end_buffer_read_sync; |
3795 | submit_bh(READ | REQ_META | REQ_PRIO, bh); | 3802 | submit_bh(READ | REQ_META | REQ_PRIO, bh); |
3796 | wait_on_buffer(bh); | 3803 | wait_on_buffer(bh); |
3797 | if (!buffer_uptodate(bh)) { | 3804 | if (!buffer_uptodate(bh)) { |
3798 | EXT4_ERROR_INODE_BLOCK(inode, block, | 3805 | EXT4_ERROR_INODE_BLOCK(inode, block, |
3799 | "unable to read itable block"); | 3806 | "unable to read itable block"); |
3800 | brelse(bh); | 3807 | brelse(bh); |
3801 | return -EIO; | 3808 | return -EIO; |
3802 | } | 3809 | } |
3803 | } | 3810 | } |
3804 | has_buffer: | 3811 | has_buffer: |
3805 | iloc->bh = bh; | 3812 | iloc->bh = bh; |
3806 | return 0; | 3813 | return 0; |
3807 | } | 3814 | } |
3808 | 3815 | ||
3809 | int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) | 3816 | int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) |
3810 | { | 3817 | { |
3811 | /* We have all inode data except xattrs in memory here. */ | 3818 | /* We have all inode data except xattrs in memory here. */ |
3812 | return __ext4_get_inode_loc(inode, iloc, | 3819 | return __ext4_get_inode_loc(inode, iloc, |
3813 | !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); | 3820 | !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); |
3814 | } | 3821 | } |
3815 | 3822 | ||
3816 | void ext4_set_inode_flags(struct inode *inode) | 3823 | void ext4_set_inode_flags(struct inode *inode) |
3817 | { | 3824 | { |
3818 | unsigned int flags = EXT4_I(inode)->i_flags; | 3825 | unsigned int flags = EXT4_I(inode)->i_flags; |
3819 | unsigned int new_fl = 0; | 3826 | unsigned int new_fl = 0; |
3820 | 3827 | ||
3821 | if (flags & EXT4_SYNC_FL) | 3828 | if (flags & EXT4_SYNC_FL) |
3822 | new_fl |= S_SYNC; | 3829 | new_fl |= S_SYNC; |
3823 | if (flags & EXT4_APPEND_FL) | 3830 | if (flags & EXT4_APPEND_FL) |
3824 | new_fl |= S_APPEND; | 3831 | new_fl |= S_APPEND; |
3825 | if (flags & EXT4_IMMUTABLE_FL) | 3832 | if (flags & EXT4_IMMUTABLE_FL) |
3826 | new_fl |= S_IMMUTABLE; | 3833 | new_fl |= S_IMMUTABLE; |
3827 | if (flags & EXT4_NOATIME_FL) | 3834 | if (flags & EXT4_NOATIME_FL) |
3828 | new_fl |= S_NOATIME; | 3835 | new_fl |= S_NOATIME; |
3829 | if (flags & EXT4_DIRSYNC_FL) | 3836 | if (flags & EXT4_DIRSYNC_FL) |
3830 | new_fl |= S_DIRSYNC; | 3837 | new_fl |= S_DIRSYNC; |
3831 | if (test_opt(inode->i_sb, DAX)) | 3838 | if (test_opt(inode->i_sb, DAX)) |
3832 | new_fl |= S_DAX; | 3839 | new_fl |= S_DAX; |
3833 | inode_set_flags(inode, new_fl, | 3840 | inode_set_flags(inode, new_fl, |
3834 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); | 3841 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); |
3835 | } | 3842 | } |
3836 | 3843 | ||
3837 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ | 3844 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ |
3838 | void ext4_get_inode_flags(struct ext4_inode_info *ei) | 3845 | void ext4_get_inode_flags(struct ext4_inode_info *ei) |
3839 | { | 3846 | { |
3840 | unsigned int vfs_fl; | 3847 | unsigned int vfs_fl; |
3841 | unsigned long old_fl, new_fl; | 3848 | unsigned long old_fl, new_fl; |
3842 | 3849 | ||
3843 | do { | 3850 | do { |
3844 | vfs_fl = ei->vfs_inode.i_flags; | 3851 | vfs_fl = ei->vfs_inode.i_flags; |
3845 | old_fl = ei->i_flags; | 3852 | old_fl = ei->i_flags; |
3846 | new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL| | 3853 | new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL| |
3847 | EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL| | 3854 | EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL| |
3848 | EXT4_DIRSYNC_FL); | 3855 | EXT4_DIRSYNC_FL); |
3849 | if (vfs_fl & S_SYNC) | 3856 | if (vfs_fl & S_SYNC) |
3850 | new_fl |= EXT4_SYNC_FL; | 3857 | new_fl |= EXT4_SYNC_FL; |
3851 | if (vfs_fl & S_APPEND) | 3858 | if (vfs_fl & S_APPEND) |
3852 | new_fl |= EXT4_APPEND_FL; | 3859 | new_fl |= EXT4_APPEND_FL; |
3853 | if (vfs_fl & S_IMMUTABLE) | 3860 | if (vfs_fl & S_IMMUTABLE) |
3854 | new_fl |= EXT4_IMMUTABLE_FL; | 3861 | new_fl |= EXT4_IMMUTABLE_FL; |
3855 | if (vfs_fl & S_NOATIME) | 3862 | if (vfs_fl & S_NOATIME) |
3856 | new_fl |= EXT4_NOATIME_FL; | 3863 | new_fl |= EXT4_NOATIME_FL; |
3857 | if (vfs_fl & S_DIRSYNC) | 3864 | if (vfs_fl & S_DIRSYNC) |
3858 | new_fl |= EXT4_DIRSYNC_FL; | 3865 | new_fl |= EXT4_DIRSYNC_FL; |
3859 | } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl); | 3866 | } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl); |
3860 | } | 3867 | } |
3861 | 3868 | ||
3862 | static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | 3869 | static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, |
3863 | struct ext4_inode_info *ei) | 3870 | struct ext4_inode_info *ei) |
3864 | { | 3871 | { |
3865 | blkcnt_t i_blocks ; | 3872 | blkcnt_t i_blocks ; |
3866 | struct inode *inode = &(ei->vfs_inode); | 3873 | struct inode *inode = &(ei->vfs_inode); |
3867 | struct super_block *sb = inode->i_sb; | 3874 | struct super_block *sb = inode->i_sb; |
3868 | 3875 | ||
3869 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3876 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
3870 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | 3877 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { |
3871 | /* we are using combined 48 bit field */ | 3878 | /* we are using combined 48 bit field */ |
3872 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | | 3879 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | |
3873 | le32_to_cpu(raw_inode->i_blocks_lo); | 3880 | le32_to_cpu(raw_inode->i_blocks_lo); |
3874 | if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { | 3881 | if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { |
3875 | /* i_blocks represent file system block size */ | 3882 | /* i_blocks represent file system block size */ |
3876 | return i_blocks << (inode->i_blkbits - 9); | 3883 | return i_blocks << (inode->i_blkbits - 9); |
3877 | } else { | 3884 | } else { |
3878 | return i_blocks; | 3885 | return i_blocks; |
3879 | } | 3886 | } |
3880 | } else { | 3887 | } else { |
3881 | return le32_to_cpu(raw_inode->i_blocks_lo); | 3888 | return le32_to_cpu(raw_inode->i_blocks_lo); |
3882 | } | 3889 | } |
3883 | } | 3890 | } |
3884 | 3891 | ||
3885 | static inline void ext4_iget_extra_inode(struct inode *inode, | 3892 | static inline void ext4_iget_extra_inode(struct inode *inode, |
3886 | struct ext4_inode *raw_inode, | 3893 | struct ext4_inode *raw_inode, |
3887 | struct ext4_inode_info *ei) | 3894 | struct ext4_inode_info *ei) |
3888 | { | 3895 | { |
3889 | __le32 *magic = (void *)raw_inode + | 3896 | __le32 *magic = (void *)raw_inode + |
3890 | EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; | 3897 | EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; |
3891 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { | 3898 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { |
3892 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); | 3899 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); |
3893 | ext4_find_inline_data_nolock(inode); | 3900 | ext4_find_inline_data_nolock(inode); |
3894 | } else | 3901 | } else |
3895 | EXT4_I(inode)->i_inline_off = 0; | 3902 | EXT4_I(inode)->i_inline_off = 0; |
3896 | } | 3903 | } |
3897 | 3904 | ||
3898 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | 3905 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) |
3899 | { | 3906 | { |
3900 | struct ext4_iloc iloc; | 3907 | struct ext4_iloc iloc; |
3901 | struct ext4_inode *raw_inode; | 3908 | struct ext4_inode *raw_inode; |
3902 | struct ext4_inode_info *ei; | 3909 | struct ext4_inode_info *ei; |
3903 | struct inode *inode; | 3910 | struct inode *inode; |
3904 | journal_t *journal = EXT4_SB(sb)->s_journal; | 3911 | journal_t *journal = EXT4_SB(sb)->s_journal; |
3905 | long ret; | 3912 | long ret; |
3906 | int block; | 3913 | int block; |
3907 | uid_t i_uid; | 3914 | uid_t i_uid; |
3908 | gid_t i_gid; | 3915 | gid_t i_gid; |
3909 | 3916 | ||
3910 | inode = iget_locked(sb, ino); | 3917 | inode = iget_locked(sb, ino); |
3911 | if (!inode) | 3918 | if (!inode) |
3912 | return ERR_PTR(-ENOMEM); | 3919 | return ERR_PTR(-ENOMEM); |
3913 | if (!(inode->i_state & I_NEW)) | 3920 | if (!(inode->i_state & I_NEW)) |
3914 | return inode; | 3921 | return inode; |
3915 | 3922 | ||
3916 | ei = EXT4_I(inode); | 3923 | ei = EXT4_I(inode); |
3917 | iloc.bh = NULL; | 3924 | iloc.bh = NULL; |
3918 | 3925 | ||
3919 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 3926 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
3920 | if (ret < 0) | 3927 | if (ret < 0) |
3921 | goto bad_inode; | 3928 | goto bad_inode; |
3922 | raw_inode = ext4_raw_inode(&iloc); | 3929 | raw_inode = ext4_raw_inode(&iloc); |
3923 | 3930 | ||
3924 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 3931 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
3925 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 3932 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
3926 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 3933 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
3927 | EXT4_INODE_SIZE(inode->i_sb)) { | 3934 | EXT4_INODE_SIZE(inode->i_sb)) { |
3928 | EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", | 3935 | EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", |
3929 | EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, | 3936 | EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, |
3930 | EXT4_INODE_SIZE(inode->i_sb)); | 3937 | EXT4_INODE_SIZE(inode->i_sb)); |
3931 | ret = -EIO; | 3938 | ret = -EIO; |
3932 | goto bad_inode; | 3939 | goto bad_inode; |
3933 | } | 3940 | } |
3934 | } else | 3941 | } else |
3935 | ei->i_extra_isize = 0; | 3942 | ei->i_extra_isize = 0; |
3936 | 3943 | ||
3937 | /* Precompute checksum seed for inode metadata */ | 3944 | /* Precompute checksum seed for inode metadata */ |
3938 | if (ext4_has_metadata_csum(sb)) { | 3945 | if (ext4_has_metadata_csum(sb)) { |
3939 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 3946 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
3940 | __u32 csum; | 3947 | __u32 csum; |
3941 | __le32 inum = cpu_to_le32(inode->i_ino); | 3948 | __le32 inum = cpu_to_le32(inode->i_ino); |
3942 | __le32 gen = raw_inode->i_generation; | 3949 | __le32 gen = raw_inode->i_generation; |
3943 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, | 3950 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, |
3944 | sizeof(inum)); | 3951 | sizeof(inum)); |
3945 | ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, | 3952 | ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, |
3946 | sizeof(gen)); | 3953 | sizeof(gen)); |
3947 | } | 3954 | } |
3948 | 3955 | ||
3949 | if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { | 3956 | if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { |
3950 | EXT4_ERROR_INODE(inode, "checksum invalid"); | 3957 | EXT4_ERROR_INODE(inode, "checksum invalid"); |
3951 | ret = -EIO; | 3958 | ret = -EIO; |
3952 | goto bad_inode; | 3959 | goto bad_inode; |
3953 | } | 3960 | } |
3954 | 3961 | ||
3955 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 3962 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
3956 | i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 3963 | i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
3957 | i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 3964 | i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
3958 | if (!(test_opt(inode->i_sb, NO_UID32))) { | 3965 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
3959 | i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 3966 | i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
3960 | i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 3967 | i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
3961 | } | 3968 | } |
3962 | i_uid_write(inode, i_uid); | 3969 | i_uid_write(inode, i_uid); |
3963 | i_gid_write(inode, i_gid); | 3970 | i_gid_write(inode, i_gid); |
3964 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); | 3971 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); |
3965 | 3972 | ||
3966 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ | 3973 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
3967 | ei->i_inline_off = 0; | 3974 | ei->i_inline_off = 0; |
3968 | ei->i_dir_start_lookup = 0; | 3975 | ei->i_dir_start_lookup = 0; |
3969 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 3976 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
3970 | /* We now have enough fields to check if the inode was active or not. | 3977 | /* We now have enough fields to check if the inode was active or not. |
3971 | * This is needed because nfsd might try to access dead inodes | 3978 | * This is needed because nfsd might try to access dead inodes |
3972 | * the test is that same one that e2fsck uses | 3979 | * the test is that same one that e2fsck uses |
3973 | * NeilBrown 1999oct15 | 3980 | * NeilBrown 1999oct15 |
3974 | */ | 3981 | */ |
3975 | if (inode->i_nlink == 0) { | 3982 | if (inode->i_nlink == 0) { |
3976 | if ((inode->i_mode == 0 || | 3983 | if ((inode->i_mode == 0 || |
3977 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) && | 3984 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) && |
3978 | ino != EXT4_BOOT_LOADER_INO) { | 3985 | ino != EXT4_BOOT_LOADER_INO) { |
3979 | /* this inode is deleted */ | 3986 | /* this inode is deleted */ |
3980 | ret = -ESTALE; | 3987 | ret = -ESTALE; |
3981 | goto bad_inode; | 3988 | goto bad_inode; |
3982 | } | 3989 | } |
3983 | /* The only unlinked inodes we let through here have | 3990 | /* The only unlinked inodes we let through here have |
3984 | * valid i_mode and are being read by the orphan | 3991 | * valid i_mode and are being read by the orphan |
3985 | * recovery code: that's fine, we're about to complete | 3992 | * recovery code: that's fine, we're about to complete |
3986 | * the process of deleting those. | 3993 | * the process of deleting those. |
3987 | * OR it is the EXT4_BOOT_LOADER_INO which is | 3994 | * OR it is the EXT4_BOOT_LOADER_INO which is |
3988 | * not initialized on a new filesystem. */ | 3995 | * not initialized on a new filesystem. */ |
3989 | } | 3996 | } |
3990 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); | 3997 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); |
3991 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); | 3998 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); |
3992 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); | 3999 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); |
3993 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) | 4000 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) |
3994 | ei->i_file_acl |= | 4001 | ei->i_file_acl |= |
3995 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; | 4002 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; |
3996 | inode->i_size = ext4_isize(raw_inode); | 4003 | inode->i_size = ext4_isize(raw_inode); |
3997 | ei->i_disksize = inode->i_size; | 4004 | ei->i_disksize = inode->i_size; |
3998 | #ifdef CONFIG_QUOTA | 4005 | #ifdef CONFIG_QUOTA |
3999 | ei->i_reserved_quota = 0; | 4006 | ei->i_reserved_quota = 0; |
4000 | #endif | 4007 | #endif |
4001 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 4008 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
4002 | ei->i_block_group = iloc.block_group; | 4009 | ei->i_block_group = iloc.block_group; |
4003 | ei->i_last_alloc_group = ~0; | 4010 | ei->i_last_alloc_group = ~0; |
4004 | /* | 4011 | /* |
4005 | * NOTE! The in-memory inode i_data array is in little-endian order | 4012 | * NOTE! The in-memory inode i_data array is in little-endian order |
4006 | * even on big-endian machines: we do NOT byteswap the block numbers! | 4013 | * even on big-endian machines: we do NOT byteswap the block numbers! |
4007 | */ | 4014 | */ |
4008 | for (block = 0; block < EXT4_N_BLOCKS; block++) | 4015 | for (block = 0; block < EXT4_N_BLOCKS; block++) |
4009 | ei->i_data[block] = raw_inode->i_block[block]; | 4016 | ei->i_data[block] = raw_inode->i_block[block]; |
4010 | INIT_LIST_HEAD(&ei->i_orphan); | 4017 | INIT_LIST_HEAD(&ei->i_orphan); |
4011 | 4018 | ||
4012 | /* | 4019 | /* |
4013 | * Set transaction id's of transactions that have to be committed | 4020 | * Set transaction id's of transactions that have to be committed |
4014 | * to finish f[data]sync. We set them to currently running transaction | 4021 | * to finish f[data]sync. We set them to currently running transaction |
4015 | * as we cannot be sure that the inode or some of its metadata isn't | 4022 | * as we cannot be sure that the inode or some of its metadata isn't |
4016 | * part of the transaction - the inode could have been reclaimed and | 4023 | * part of the transaction - the inode could have been reclaimed and |
4017 | * now it is reread from disk. | 4024 | * now it is reread from disk. |
4018 | */ | 4025 | */ |
4019 | if (journal) { | 4026 | if (journal) { |
4020 | transaction_t *transaction; | 4027 | transaction_t *transaction; |
4021 | tid_t tid; | 4028 | tid_t tid; |
4022 | 4029 | ||
4023 | read_lock(&journal->j_state_lock); | 4030 | read_lock(&journal->j_state_lock); |
4024 | if (journal->j_running_transaction) | 4031 | if (journal->j_running_transaction) |
4025 | transaction = journal->j_running_transaction; | 4032 | transaction = journal->j_running_transaction; |
4026 | else | 4033 | else |
4027 | transaction = journal->j_committing_transaction; | 4034 | transaction = journal->j_committing_transaction; |
4028 | if (transaction) | 4035 | if (transaction) |
4029 | tid = transaction->t_tid; | 4036 | tid = transaction->t_tid; |
4030 | else | 4037 | else |
4031 | tid = journal->j_commit_sequence; | 4038 | tid = journal->j_commit_sequence; |
4032 | read_unlock(&journal->j_state_lock); | 4039 | read_unlock(&journal->j_state_lock); |
4033 | ei->i_sync_tid = tid; | 4040 | ei->i_sync_tid = tid; |
4034 | ei->i_datasync_tid = tid; | 4041 | ei->i_datasync_tid = tid; |
4035 | } | 4042 | } |
4036 | 4043 | ||
4037 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 4044 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
4038 | if (ei->i_extra_isize == 0) { | 4045 | if (ei->i_extra_isize == 0) { |
4039 | /* The extra space is currently unused. Use it. */ | 4046 | /* The extra space is currently unused. Use it. */ |
4040 | ei->i_extra_isize = sizeof(struct ext4_inode) - | 4047 | ei->i_extra_isize = sizeof(struct ext4_inode) - |
4041 | EXT4_GOOD_OLD_INODE_SIZE; | 4048 | EXT4_GOOD_OLD_INODE_SIZE; |
4042 | } else { | 4049 | } else { |
4043 | ext4_iget_extra_inode(inode, raw_inode, ei); | 4050 | ext4_iget_extra_inode(inode, raw_inode, ei); |
4044 | } | 4051 | } |
4045 | } | 4052 | } |
4046 | 4053 | ||
4047 | EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); | 4054 | EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); |
4048 | EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); | 4055 | EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); |
4049 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); | 4056 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); |
4050 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); | 4057 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); |
4051 | 4058 | ||
4052 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { | 4059 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { |
4053 | inode->i_version = le32_to_cpu(raw_inode->i_disk_version); | 4060 | inode->i_version = le32_to_cpu(raw_inode->i_disk_version); |
4054 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 4061 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
4055 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | 4062 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) |
4056 | inode->i_version |= | 4063 | inode->i_version |= |
4057 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | 4064 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; |
4058 | } | 4065 | } |
4059 | } | 4066 | } |
4060 | 4067 | ||
4061 | ret = 0; | 4068 | ret = 0; |
4062 | if (ei->i_file_acl && | 4069 | if (ei->i_file_acl && |
4063 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { | 4070 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { |
4064 | EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", | 4071 | EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", |
4065 | ei->i_file_acl); | 4072 | ei->i_file_acl); |
4066 | ret = -EIO; | 4073 | ret = -EIO; |
4067 | goto bad_inode; | 4074 | goto bad_inode; |
4068 | } else if (!ext4_has_inline_data(inode)) { | 4075 | } else if (!ext4_has_inline_data(inode)) { |
4069 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4076 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4070 | if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 4077 | if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
4071 | (S_ISLNK(inode->i_mode) && | 4078 | (S_ISLNK(inode->i_mode) && |
4072 | !ext4_inode_is_fast_symlink(inode)))) | 4079 | !ext4_inode_is_fast_symlink(inode)))) |
4073 | /* Validate extent which is part of inode */ | 4080 | /* Validate extent which is part of inode */ |
4074 | ret = ext4_ext_check_inode(inode); | 4081 | ret = ext4_ext_check_inode(inode); |
4075 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 4082 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
4076 | (S_ISLNK(inode->i_mode) && | 4083 | (S_ISLNK(inode->i_mode) && |
4077 | !ext4_inode_is_fast_symlink(inode))) { | 4084 | !ext4_inode_is_fast_symlink(inode))) { |
4078 | /* Validate block references which are part of inode */ | 4085 | /* Validate block references which are part of inode */ |
4079 | ret = ext4_ind_check_inode(inode); | 4086 | ret = ext4_ind_check_inode(inode); |
4080 | } | 4087 | } |
4081 | } | 4088 | } |
4082 | if (ret) | 4089 | if (ret) |
4083 | goto bad_inode; | 4090 | goto bad_inode; |
4084 | 4091 | ||
4085 | if (S_ISREG(inode->i_mode)) { | 4092 | if (S_ISREG(inode->i_mode)) { |
4086 | inode->i_op = &ext4_file_inode_operations; | 4093 | inode->i_op = &ext4_file_inode_operations; |
4087 | if (test_opt(inode->i_sb, DAX)) | 4094 | if (test_opt(inode->i_sb, DAX)) |
4088 | inode->i_fop = &ext4_dax_file_operations; | 4095 | inode->i_fop = &ext4_dax_file_operations; |
4089 | else | 4096 | else |
4090 | inode->i_fop = &ext4_file_operations; | 4097 | inode->i_fop = &ext4_file_operations; |
4091 | ext4_set_aops(inode); | 4098 | ext4_set_aops(inode); |
4092 | } else if (S_ISDIR(inode->i_mode)) { | 4099 | } else if (S_ISDIR(inode->i_mode)) { |
4093 | inode->i_op = &ext4_dir_inode_operations; | 4100 | inode->i_op = &ext4_dir_inode_operations; |
4094 | inode->i_fop = &ext4_dir_operations; | 4101 | inode->i_fop = &ext4_dir_operations; |
4095 | } else if (S_ISLNK(inode->i_mode)) { | 4102 | } else if (S_ISLNK(inode->i_mode)) { |
4096 | if (ext4_inode_is_fast_symlink(inode)) { | 4103 | if (ext4_inode_is_fast_symlink(inode)) { |
4097 | inode->i_op = &ext4_fast_symlink_inode_operations; | 4104 | inode->i_op = &ext4_fast_symlink_inode_operations; |
4098 | nd_terminate_link(ei->i_data, inode->i_size, | 4105 | nd_terminate_link(ei->i_data, inode->i_size, |
4099 | sizeof(ei->i_data) - 1); | 4106 | sizeof(ei->i_data) - 1); |
4100 | } else { | 4107 | } else { |
4101 | inode->i_op = &ext4_symlink_inode_operations; | 4108 | inode->i_op = &ext4_symlink_inode_operations; |
4102 | ext4_set_aops(inode); | 4109 | ext4_set_aops(inode); |
4103 | } | 4110 | } |
4104 | } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || | 4111 | } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || |
4105 | S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { | 4112 | S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { |
4106 | inode->i_op = &ext4_special_inode_operations; | 4113 | inode->i_op = &ext4_special_inode_operations; |
4107 | if (raw_inode->i_block[0]) | 4114 | if (raw_inode->i_block[0]) |
4108 | init_special_inode(inode, inode->i_mode, | 4115 | init_special_inode(inode, inode->i_mode, |
4109 | old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); | 4116 | old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); |
4110 | else | 4117 | else |
4111 | init_special_inode(inode, inode->i_mode, | 4118 | init_special_inode(inode, inode->i_mode, |
4112 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 4119 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
4113 | } else if (ino == EXT4_BOOT_LOADER_INO) { | 4120 | } else if (ino == EXT4_BOOT_LOADER_INO) { |
4114 | make_bad_inode(inode); | 4121 | make_bad_inode(inode); |
4115 | } else { | 4122 | } else { |
4116 | ret = -EIO; | 4123 | ret = -EIO; |
4117 | EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); | 4124 | EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); |
4118 | goto bad_inode; | 4125 | goto bad_inode; |
4119 | } | 4126 | } |
4120 | brelse(iloc.bh); | 4127 | brelse(iloc.bh); |
4121 | ext4_set_inode_flags(inode); | 4128 | ext4_set_inode_flags(inode); |
4122 | unlock_new_inode(inode); | 4129 | unlock_new_inode(inode); |
4123 | return inode; | 4130 | return inode; |
4124 | 4131 | ||
4125 | bad_inode: | 4132 | bad_inode: |
4126 | brelse(iloc.bh); | 4133 | brelse(iloc.bh); |
4127 | iget_failed(inode); | 4134 | iget_failed(inode); |
4128 | return ERR_PTR(ret); | 4135 | return ERR_PTR(ret); |
4129 | } | 4136 | } |
4130 | 4137 | ||
4131 | struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) | 4138 | struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) |
4132 | { | 4139 | { |
4133 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) | 4140 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) |
4134 | return ERR_PTR(-EIO); | 4141 | return ERR_PTR(-EIO); |
4135 | return ext4_iget(sb, ino); | 4142 | return ext4_iget(sb, ino); |
4136 | } | 4143 | } |
4137 | 4144 | ||
4138 | static int ext4_inode_blocks_set(handle_t *handle, | 4145 | static int ext4_inode_blocks_set(handle_t *handle, |
4139 | struct ext4_inode *raw_inode, | 4146 | struct ext4_inode *raw_inode, |
4140 | struct ext4_inode_info *ei) | 4147 | struct ext4_inode_info *ei) |
4141 | { | 4148 | { |
4142 | struct inode *inode = &(ei->vfs_inode); | 4149 | struct inode *inode = &(ei->vfs_inode); |
4143 | u64 i_blocks = inode->i_blocks; | 4150 | u64 i_blocks = inode->i_blocks; |
4144 | struct super_block *sb = inode->i_sb; | 4151 | struct super_block *sb = inode->i_sb; |
4145 | 4152 | ||
4146 | if (i_blocks <= ~0U) { | 4153 | if (i_blocks <= ~0U) { |
4147 | /* | 4154 | /* |
4148 | * i_blocks can be represented in a 32 bit variable | 4155 | * i_blocks can be represented in a 32 bit variable |
4149 | * as multiple of 512 bytes | 4156 | * as multiple of 512 bytes |
4150 | */ | 4157 | */ |
4151 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4158 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
4152 | raw_inode->i_blocks_high = 0; | 4159 | raw_inode->i_blocks_high = 0; |
4153 | ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); | 4160 | ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); |
4154 | return 0; | 4161 | return 0; |
4155 | } | 4162 | } |
4156 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) | 4163 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) |
4157 | return -EFBIG; | 4164 | return -EFBIG; |
4158 | 4165 | ||
4159 | if (i_blocks <= 0xffffffffffffULL) { | 4166 | if (i_blocks <= 0xffffffffffffULL) { |
4160 | /* | 4167 | /* |
4161 | * i_blocks can be represented in a 48 bit variable | 4168 | * i_blocks can be represented in a 48 bit variable |
4162 | * as multiple of 512 bytes | 4169 | * as multiple of 512 bytes |
4163 | */ | 4170 | */ |
4164 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4171 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
4165 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4172 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
4166 | ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); | 4173 | ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); |
4167 | } else { | 4174 | } else { |
4168 | ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); | 4175 | ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); |
4169 | /* i_block is stored in file system block size */ | 4176 | /* i_block is stored in file system block size */ |
4170 | i_blocks = i_blocks >> (inode->i_blkbits - 9); | 4177 | i_blocks = i_blocks >> (inode->i_blkbits - 9); |
4171 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4178 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
4172 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4179 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
4173 | } | 4180 | } |
4174 | return 0; | 4181 | return 0; |
4175 | } | 4182 | } |
4176 | 4183 | ||
4177 | struct other_inode { | 4184 | struct other_inode { |
4178 | unsigned long orig_ino; | 4185 | unsigned long orig_ino; |
4179 | struct ext4_inode *raw_inode; | 4186 | struct ext4_inode *raw_inode; |
4180 | }; | 4187 | }; |
4181 | 4188 | ||
4182 | static int other_inode_match(struct inode * inode, unsigned long ino, | 4189 | static int other_inode_match(struct inode * inode, unsigned long ino, |
4183 | void *data) | 4190 | void *data) |
4184 | { | 4191 | { |
4185 | struct other_inode *oi = (struct other_inode *) data; | 4192 | struct other_inode *oi = (struct other_inode *) data; |
4186 | 4193 | ||
4187 | if ((inode->i_ino != ino) || | 4194 | if ((inode->i_ino != ino) || |
4188 | (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | | 4195 | (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | |
4189 | I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || | 4196 | I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || |
4190 | ((inode->i_state & I_DIRTY_TIME) == 0)) | 4197 | ((inode->i_state & I_DIRTY_TIME) == 0)) |
4191 | return 0; | 4198 | return 0; |
4192 | spin_lock(&inode->i_lock); | 4199 | spin_lock(&inode->i_lock); |
4193 | if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | | 4200 | if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | |
4194 | I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) && | 4201 | I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) && |
4195 | (inode->i_state & I_DIRTY_TIME)) { | 4202 | (inode->i_state & I_DIRTY_TIME)) { |
4196 | struct ext4_inode_info *ei = EXT4_I(inode); | 4203 | struct ext4_inode_info *ei = EXT4_I(inode); |
4197 | 4204 | ||
4198 | inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); | 4205 | inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); |
4199 | spin_unlock(&inode->i_lock); | 4206 | spin_unlock(&inode->i_lock); |
4200 | 4207 | ||
4201 | spin_lock(&ei->i_raw_lock); | 4208 | spin_lock(&ei->i_raw_lock); |
4202 | EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode); | 4209 | EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode); |
4203 | EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode); | 4210 | EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode); |
4204 | EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode); | 4211 | EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode); |
4205 | ext4_inode_csum_set(inode, oi->raw_inode, ei); | 4212 | ext4_inode_csum_set(inode, oi->raw_inode, ei); |
4206 | spin_unlock(&ei->i_raw_lock); | 4213 | spin_unlock(&ei->i_raw_lock); |
4207 | trace_ext4_other_inode_update_time(inode, oi->orig_ino); | 4214 | trace_ext4_other_inode_update_time(inode, oi->orig_ino); |
4208 | return -1; | 4215 | return -1; |
4209 | } | 4216 | } |
4210 | spin_unlock(&inode->i_lock); | 4217 | spin_unlock(&inode->i_lock); |
4211 | return -1; | 4218 | return -1; |
4212 | } | 4219 | } |
4213 | 4220 | ||
4214 | /* | 4221 | /* |
4215 | * Opportunistically update the other time fields for other inodes in | 4222 | * Opportunistically update the other time fields for other inodes in |
4216 | * the same inode table block. | 4223 | * the same inode table block. |
4217 | */ | 4224 | */ |
4218 | static void ext4_update_other_inodes_time(struct super_block *sb, | 4225 | static void ext4_update_other_inodes_time(struct super_block *sb, |
4219 | unsigned long orig_ino, char *buf) | 4226 | unsigned long orig_ino, char *buf) |
4220 | { | 4227 | { |
4221 | struct other_inode oi; | 4228 | struct other_inode oi; |
4222 | unsigned long ino; | 4229 | unsigned long ino; |
4223 | int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; | 4230 | int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; |
4224 | int inode_size = EXT4_INODE_SIZE(sb); | 4231 | int inode_size = EXT4_INODE_SIZE(sb); |
4225 | 4232 | ||
4226 | oi.orig_ino = orig_ino; | 4233 | oi.orig_ino = orig_ino; |
4227 | ino = orig_ino & ~(inodes_per_block - 1); | 4234 | ino = orig_ino & ~(inodes_per_block - 1); |
4228 | for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { | 4235 | for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { |
4229 | if (ino == orig_ino) | 4236 | if (ino == orig_ino) |
4230 | continue; | 4237 | continue; |
4231 | oi.raw_inode = (struct ext4_inode *) buf; | 4238 | oi.raw_inode = (struct ext4_inode *) buf; |
4232 | (void) find_inode_nowait(sb, ino, other_inode_match, &oi); | 4239 | (void) find_inode_nowait(sb, ino, other_inode_match, &oi); |
4233 | } | 4240 | } |
4234 | } | 4241 | } |
4235 | 4242 | ||
4236 | /* | 4243 | /* |
4237 | * Post the struct inode info into an on-disk inode location in the | 4244 | * Post the struct inode info into an on-disk inode location in the |
4238 | * buffer-cache. This gobbles the caller's reference to the | 4245 | * buffer-cache. This gobbles the caller's reference to the |
4239 | * buffer_head in the inode location struct. | 4246 | * buffer_head in the inode location struct. |
4240 | * | 4247 | * |
4241 | * The caller must have write access to iloc->bh. | 4248 | * The caller must have write access to iloc->bh. |
4242 | */ | 4249 | */ |
4243 | static int ext4_do_update_inode(handle_t *handle, | 4250 | static int ext4_do_update_inode(handle_t *handle, |
4244 | struct inode *inode, | 4251 | struct inode *inode, |
4245 | struct ext4_iloc *iloc) | 4252 | struct ext4_iloc *iloc) |
4246 | { | 4253 | { |
4247 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); | 4254 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); |
4248 | struct ext4_inode_info *ei = EXT4_I(inode); | 4255 | struct ext4_inode_info *ei = EXT4_I(inode); |
4249 | struct buffer_head *bh = iloc->bh; | 4256 | struct buffer_head *bh = iloc->bh; |
4250 | struct super_block *sb = inode->i_sb; | 4257 | struct super_block *sb = inode->i_sb; |
4251 | int err = 0, rc, block; | 4258 | int err = 0, rc, block; |
4252 | int need_datasync = 0, set_large_file = 0; | 4259 | int need_datasync = 0, set_large_file = 0; |
4253 | uid_t i_uid; | 4260 | uid_t i_uid; |
4254 | gid_t i_gid; | 4261 | gid_t i_gid; |
4255 | 4262 | ||
4256 | spin_lock(&ei->i_raw_lock); | 4263 | spin_lock(&ei->i_raw_lock); |
4257 | 4264 | ||
4258 | /* For fields not tracked in the in-memory inode, | 4265 | /* For fields not tracked in the in-memory inode, |
4259 | * initialise them to zero for new inodes. */ | 4266 | * initialise them to zero for new inodes. */ |
4260 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) | 4267 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) |
4261 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); | 4268 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); |
4262 | 4269 | ||
4263 | ext4_get_inode_flags(ei); | 4270 | ext4_get_inode_flags(ei); |
4264 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 4271 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
4265 | i_uid = i_uid_read(inode); | 4272 | i_uid = i_uid_read(inode); |
4266 | i_gid = i_gid_read(inode); | 4273 | i_gid = i_gid_read(inode); |
4267 | if (!(test_opt(inode->i_sb, NO_UID32))) { | 4274 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
4268 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); | 4275 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); |
4269 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); | 4276 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); |
4270 | /* | 4277 | /* |
4271 | * Fix up interoperability with old kernels. Otherwise, old inodes get | 4278 | * Fix up interoperability with old kernels. Otherwise, old inodes get |
4272 | * re-used with the upper 16 bits of the uid/gid intact | 4279 | * re-used with the upper 16 bits of the uid/gid intact |
4273 | */ | 4280 | */ |
4274 | if (!ei->i_dtime) { | 4281 | if (!ei->i_dtime) { |
4275 | raw_inode->i_uid_high = | 4282 | raw_inode->i_uid_high = |
4276 | cpu_to_le16(high_16_bits(i_uid)); | 4283 | cpu_to_le16(high_16_bits(i_uid)); |
4277 | raw_inode->i_gid_high = | 4284 | raw_inode->i_gid_high = |
4278 | cpu_to_le16(high_16_bits(i_gid)); | 4285 | cpu_to_le16(high_16_bits(i_gid)); |
4279 | } else { | 4286 | } else { |
4280 | raw_inode->i_uid_high = 0; | 4287 | raw_inode->i_uid_high = 0; |
4281 | raw_inode->i_gid_high = 0; | 4288 | raw_inode->i_gid_high = 0; |
4282 | } | 4289 | } |
4283 | } else { | 4290 | } else { |
4284 | raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); | 4291 | raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); |
4285 | raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); | 4292 | raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); |
4286 | raw_inode->i_uid_high = 0; | 4293 | raw_inode->i_uid_high = 0; |
4287 | raw_inode->i_gid_high = 0; | 4294 | raw_inode->i_gid_high = 0; |
4288 | } | 4295 | } |
4289 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | 4296 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); |
4290 | 4297 | ||
4291 | EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); | 4298 | EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); |
4292 | EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); | 4299 | EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); |
4293 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); | 4300 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); |
4294 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); | 4301 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); |
4295 | 4302 | ||
4296 | err = ext4_inode_blocks_set(handle, raw_inode, ei); | 4303 | err = ext4_inode_blocks_set(handle, raw_inode, ei); |
4297 | if (err) { | 4304 | if (err) { |
4298 | spin_unlock(&ei->i_raw_lock); | 4305 | spin_unlock(&ei->i_raw_lock); |
4299 | goto out_brelse; | 4306 | goto out_brelse; |
4300 | } | 4307 | } |
4301 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 4308 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
4302 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); | 4309 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); |
4303 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) | 4310 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) |
4304 | raw_inode->i_file_acl_high = | 4311 | raw_inode->i_file_acl_high = |
4305 | cpu_to_le16(ei->i_file_acl >> 32); | 4312 | cpu_to_le16(ei->i_file_acl >> 32); |
4306 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); | 4313 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); |
4307 | if (ei->i_disksize != ext4_isize(raw_inode)) { | 4314 | if (ei->i_disksize != ext4_isize(raw_inode)) { |
4308 | ext4_isize_set(raw_inode, ei->i_disksize); | 4315 | ext4_isize_set(raw_inode, ei->i_disksize); |
4309 | need_datasync = 1; | 4316 | need_datasync = 1; |
4310 | } | 4317 | } |
4311 | if (ei->i_disksize > 0x7fffffffULL) { | 4318 | if (ei->i_disksize > 0x7fffffffULL) { |
4312 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 4319 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, |
4313 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || | 4320 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || |
4314 | EXT4_SB(sb)->s_es->s_rev_level == | 4321 | EXT4_SB(sb)->s_es->s_rev_level == |
4315 | cpu_to_le32(EXT4_GOOD_OLD_REV)) | 4322 | cpu_to_le32(EXT4_GOOD_OLD_REV)) |
4316 | set_large_file = 1; | 4323 | set_large_file = 1; |
4317 | } | 4324 | } |
4318 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | 4325 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
4319 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 4326 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { |
4320 | if (old_valid_dev(inode->i_rdev)) { | 4327 | if (old_valid_dev(inode->i_rdev)) { |
4321 | raw_inode->i_block[0] = | 4328 | raw_inode->i_block[0] = |
4322 | cpu_to_le32(old_encode_dev(inode->i_rdev)); | 4329 | cpu_to_le32(old_encode_dev(inode->i_rdev)); |
4323 | raw_inode->i_block[1] = 0; | 4330 | raw_inode->i_block[1] = 0; |
4324 | } else { | 4331 | } else { |
4325 | raw_inode->i_block[0] = 0; | 4332 | raw_inode->i_block[0] = 0; |
4326 | raw_inode->i_block[1] = | 4333 | raw_inode->i_block[1] = |
4327 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | 4334 | cpu_to_le32(new_encode_dev(inode->i_rdev)); |
4328 | raw_inode->i_block[2] = 0; | 4335 | raw_inode->i_block[2] = 0; |
4329 | } | 4336 | } |
4330 | } else if (!ext4_has_inline_data(inode)) { | 4337 | } else if (!ext4_has_inline_data(inode)) { |
4331 | for (block = 0; block < EXT4_N_BLOCKS; block++) | 4338 | for (block = 0; block < EXT4_N_BLOCKS; block++) |
4332 | raw_inode->i_block[block] = ei->i_data[block]; | 4339 | raw_inode->i_block[block] = ei->i_data[block]; |
4333 | } | 4340 | } |
4334 | 4341 | ||
4335 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { | 4342 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { |
4336 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); | 4343 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); |
4337 | if (ei->i_extra_isize) { | 4344 | if (ei->i_extra_isize) { |
4338 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | 4345 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) |
4339 | raw_inode->i_version_hi = | 4346 | raw_inode->i_version_hi = |
4340 | cpu_to_le32(inode->i_version >> 32); | 4347 | cpu_to_le32(inode->i_version >> 32); |
4341 | raw_inode->i_extra_isize = | 4348 | raw_inode->i_extra_isize = |
4342 | cpu_to_le16(ei->i_extra_isize); | 4349 | cpu_to_le16(ei->i_extra_isize); |
4343 | } | 4350 | } |
4344 | } | 4351 | } |
4345 | ext4_inode_csum_set(inode, raw_inode, ei); | 4352 | ext4_inode_csum_set(inode, raw_inode, ei); |
4346 | spin_unlock(&ei->i_raw_lock); | 4353 | spin_unlock(&ei->i_raw_lock); |
4347 | if (inode->i_sb->s_flags & MS_LAZYTIME) | 4354 | if (inode->i_sb->s_flags & MS_LAZYTIME) |
4348 | ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, | 4355 | ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, |
4349 | bh->b_data); | 4356 | bh->b_data); |
4350 | 4357 | ||
4351 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4358 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
4352 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); | 4359 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); |
4353 | if (!err) | 4360 | if (!err) |
4354 | err = rc; | 4361 | err = rc; |
4355 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); | 4362 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); |
4356 | if (set_large_file) { | 4363 | if (set_large_file) { |
4357 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); | 4364 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); |
4358 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); | 4365 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); |
4359 | if (err) | 4366 | if (err) |
4360 | goto out_brelse; | 4367 | goto out_brelse; |
4361 | ext4_update_dynamic_rev(sb); | 4368 | ext4_update_dynamic_rev(sb); |
4362 | EXT4_SET_RO_COMPAT_FEATURE(sb, | 4369 | EXT4_SET_RO_COMPAT_FEATURE(sb, |
4363 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 4370 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); |
4364 | ext4_handle_sync(handle); | 4371 | ext4_handle_sync(handle); |
4365 | err = ext4_handle_dirty_super(handle, sb); | 4372 | err = ext4_handle_dirty_super(handle, sb); |
4366 | } | 4373 | } |
4367 | ext4_update_inode_fsync_trans(handle, inode, need_datasync); | 4374 | ext4_update_inode_fsync_trans(handle, inode, need_datasync); |
4368 | out_brelse: | 4375 | out_brelse: |
4369 | brelse(bh); | 4376 | brelse(bh); |
4370 | ext4_std_error(inode->i_sb, err); | 4377 | ext4_std_error(inode->i_sb, err); |
4371 | return err; | 4378 | return err; |
4372 | } | 4379 | } |
4373 | 4380 | ||
4374 | /* | 4381 | /* |
4375 | * ext4_write_inode() | 4382 | * ext4_write_inode() |
4376 | * | 4383 | * |
4377 | * We are called from a few places: | 4384 | * We are called from a few places: |
4378 | * | 4385 | * |
4379 | * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. | 4386 | * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. |
4380 | * Here, there will be no transaction running. We wait for any running | 4387 | * Here, there will be no transaction running. We wait for any running |
4381 | * transaction to commit. | 4388 | * transaction to commit. |
4382 | * | 4389 | * |
4383 | * - Within flush work (sys_sync(), kupdate and such). | 4390 | * - Within flush work (sys_sync(), kupdate and such). |
4384 | * We wait on commit, if told to. | 4391 | * We wait on commit, if told to. |
4385 | * | 4392 | * |
4386 | * - Within iput_final() -> write_inode_now() | 4393 | * - Within iput_final() -> write_inode_now() |
4387 | * We wait on commit, if told to. | 4394 | * We wait on commit, if told to. |
4388 | * | 4395 | * |
4389 | * In all cases it is actually safe for us to return without doing anything, | 4396 | * In all cases it is actually safe for us to return without doing anything, |
4390 | * because the inode has been copied into a raw inode buffer in | 4397 | * because the inode has been copied into a raw inode buffer in |
4391 | * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL | 4398 | * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL |
4392 | * writeback. | 4399 | * writeback. |
4393 | * | 4400 | * |
4394 | * Note that we are absolutely dependent upon all inode dirtiers doing the | 4401 | * Note that we are absolutely dependent upon all inode dirtiers doing the |
4395 | * right thing: they *must* call mark_inode_dirty() after dirtying info in | 4402 | * right thing: they *must* call mark_inode_dirty() after dirtying info in |
4396 | * which we are interested. | 4403 | * which we are interested. |
4397 | * | 4404 | * |
4398 | * It would be a bug for them to not do this. The code: | 4405 | * It would be a bug for them to not do this. The code: |
4399 | * | 4406 | * |
4400 | * mark_inode_dirty(inode) | 4407 | * mark_inode_dirty(inode) |
4401 | * stuff(); | 4408 | * stuff(); |
4402 | * inode->i_size = expr; | 4409 | * inode->i_size = expr; |
4403 | * | 4410 | * |
4404 | * is in error because write_inode() could occur while `stuff()' is running, | 4411 | * is in error because write_inode() could occur while `stuff()' is running, |
4405 | * and the new i_size will be lost. Plus the inode will no longer be on the | 4412 | * and the new i_size will be lost. Plus the inode will no longer be on the |
4406 | * superblock's dirty inode list. | 4413 | * superblock's dirty inode list. |
4407 | */ | 4414 | */ |
4408 | int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | 4415 | int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) |
4409 | { | 4416 | { |
4410 | int err; | 4417 | int err; |
4411 | 4418 | ||
4412 | if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) | 4419 | if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) |
4413 | return 0; | 4420 | return 0; |
4414 | 4421 | ||
4415 | if (EXT4_SB(inode->i_sb)->s_journal) { | 4422 | if (EXT4_SB(inode->i_sb)->s_journal) { |
4416 | if (ext4_journal_current_handle()) { | 4423 | if (ext4_journal_current_handle()) { |
4417 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); | 4424 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); |
4418 | dump_stack(); | 4425 | dump_stack(); |
4419 | return -EIO; | 4426 | return -EIO; |
4420 | } | 4427 | } |
4421 | 4428 | ||
4422 | /* | 4429 | /* |
4423 | * No need to force transaction in WB_SYNC_NONE mode. Also | 4430 | * No need to force transaction in WB_SYNC_NONE mode. Also |
4424 | * ext4_sync_fs() will force the commit after everything is | 4431 | * ext4_sync_fs() will force the commit after everything is |
4425 | * written. | 4432 | * written. |
4426 | */ | 4433 | */ |
4427 | if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) | 4434 | if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) |
4428 | return 0; | 4435 | return 0; |
4429 | 4436 | ||
4430 | err = ext4_force_commit(inode->i_sb); | 4437 | err = ext4_force_commit(inode->i_sb); |
4431 | } else { | 4438 | } else { |
4432 | struct ext4_iloc iloc; | 4439 | struct ext4_iloc iloc; |
4433 | 4440 | ||
4434 | err = __ext4_get_inode_loc(inode, &iloc, 0); | 4441 | err = __ext4_get_inode_loc(inode, &iloc, 0); |
4435 | if (err) | 4442 | if (err) |
4436 | return err; | 4443 | return err; |
4437 | /* | 4444 | /* |
4438 | * sync(2) will flush the whole buffer cache. No need to do | 4445 | * sync(2) will flush the whole buffer cache. No need to do |
4439 | * it here separately for each inode. | 4446 | * it here separately for each inode. |
4440 | */ | 4447 | */ |
4441 | if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) | 4448 | if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) |
4442 | sync_dirty_buffer(iloc.bh); | 4449 | sync_dirty_buffer(iloc.bh); |
4443 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 4450 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
4444 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, | 4451 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, |
4445 | "IO error syncing inode"); | 4452 | "IO error syncing inode"); |
4446 | err = -EIO; | 4453 | err = -EIO; |
4447 | } | 4454 | } |
4448 | brelse(iloc.bh); | 4455 | brelse(iloc.bh); |
4449 | } | 4456 | } |
4450 | return err; | 4457 | return err; |
4451 | } | 4458 | } |
4452 | 4459 | ||
4453 | /* | 4460 | /* |
4454 | * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate | 4461 | * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate |
4455 | * buffers that are attached to a page stradding i_size and are undergoing | 4462 | * buffers that are attached to a page stradding i_size and are undergoing |
4456 | * commit. In that case we have to wait for commit to finish and try again. | 4463 | * commit. In that case we have to wait for commit to finish and try again. |
4457 | */ | 4464 | */ |
4458 | static void ext4_wait_for_tail_page_commit(struct inode *inode) | 4465 | static void ext4_wait_for_tail_page_commit(struct inode *inode) |
4459 | { | 4466 | { |
4460 | struct page *page; | 4467 | struct page *page; |
4461 | unsigned offset; | 4468 | unsigned offset; |
4462 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 4469 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
4463 | tid_t commit_tid = 0; | 4470 | tid_t commit_tid = 0; |
4464 | int ret; | 4471 | int ret; |
4465 | 4472 | ||
4466 | offset = inode->i_size & (PAGE_CACHE_SIZE - 1); | 4473 | offset = inode->i_size & (PAGE_CACHE_SIZE - 1); |
4467 | /* | 4474 | /* |
4468 | * All buffers in the last page remain valid? Then there's nothing to | 4475 | * All buffers in the last page remain valid? Then there's nothing to |
4469 | * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == | 4476 | * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == |
4470 | * blocksize case | 4477 | * blocksize case |
4471 | */ | 4478 | */ |
4472 | if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) | 4479 | if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) |
4473 | return; | 4480 | return; |
4474 | while (1) { | 4481 | while (1) { |
4475 | page = find_lock_page(inode->i_mapping, | 4482 | page = find_lock_page(inode->i_mapping, |
4476 | inode->i_size >> PAGE_CACHE_SHIFT); | 4483 | inode->i_size >> PAGE_CACHE_SHIFT); |
4477 | if (!page) | 4484 | if (!page) |
4478 | return; | 4485 | return; |
4479 | ret = __ext4_journalled_invalidatepage(page, offset, | 4486 | ret = __ext4_journalled_invalidatepage(page, offset, |
4480 | PAGE_CACHE_SIZE - offset); | 4487 | PAGE_CACHE_SIZE - offset); |
4481 | unlock_page(page); | 4488 | unlock_page(page); |
4482 | page_cache_release(page); | 4489 | page_cache_release(page); |
4483 | if (ret != -EBUSY) | 4490 | if (ret != -EBUSY) |
4484 | return; | 4491 | return; |
4485 | commit_tid = 0; | 4492 | commit_tid = 0; |
4486 | read_lock(&journal->j_state_lock); | 4493 | read_lock(&journal->j_state_lock); |
4487 | if (journal->j_committing_transaction) | 4494 | if (journal->j_committing_transaction) |
4488 | commit_tid = journal->j_committing_transaction->t_tid; | 4495 | commit_tid = journal->j_committing_transaction->t_tid; |
4489 | read_unlock(&journal->j_state_lock); | 4496 | read_unlock(&journal->j_state_lock); |
4490 | if (commit_tid) | 4497 | if (commit_tid) |
4491 | jbd2_log_wait_commit(journal, commit_tid); | 4498 | jbd2_log_wait_commit(journal, commit_tid); |
4492 | } | 4499 | } |
4493 | } | 4500 | } |
4494 | 4501 | ||
4495 | /* | 4502 | /* |
4496 | * ext4_setattr() | 4503 | * ext4_setattr() |
4497 | * | 4504 | * |
4498 | * Called from notify_change. | 4505 | * Called from notify_change. |
4499 | * | 4506 | * |
4500 | * We want to trap VFS attempts to truncate the file as soon as | 4507 | * We want to trap VFS attempts to truncate the file as soon as |
4501 | * possible. In particular, we want to make sure that when the VFS | 4508 | * possible. In particular, we want to make sure that when the VFS |
4502 | * shrinks i_size, we put the inode on the orphan list and modify | 4509 | * shrinks i_size, we put the inode on the orphan list and modify |
4503 | * i_disksize immediately, so that during the subsequent flushing of | 4510 | * i_disksize immediately, so that during the subsequent flushing of |
4504 | * dirty pages and freeing of disk blocks, we can guarantee that any | 4511 | * dirty pages and freeing of disk blocks, we can guarantee that any |
4505 | * commit will leave the blocks being flushed in an unused state on | 4512 | * commit will leave the blocks being flushed in an unused state on |
4506 | * disk. (On recovery, the inode will get truncated and the blocks will | 4513 | * disk. (On recovery, the inode will get truncated and the blocks will |
4507 | * be freed, so we have a strong guarantee that no future commit will | 4514 | * be freed, so we have a strong guarantee that no future commit will |
4508 | * leave these blocks visible to the user.) | 4515 | * leave these blocks visible to the user.) |
4509 | * | 4516 | * |
4510 | * Another thing we have to assure is that if we are in ordered mode | 4517 | * Another thing we have to assure is that if we are in ordered mode |
4511 | * and inode is still attached to the committing transaction, we must | 4518 | * and inode is still attached to the committing transaction, we must |
4512 | * we start writeout of all the dirty pages which are being truncated. | 4519 | * we start writeout of all the dirty pages which are being truncated. |
4513 | * This way we are sure that all the data written in the previous | 4520 | * This way we are sure that all the data written in the previous |
4514 | * transaction are already on disk (truncate waits for pages under | 4521 | * transaction are already on disk (truncate waits for pages under |
4515 | * writeback). | 4522 | * writeback). |
4516 | * | 4523 | * |
4517 | * Called with inode->i_mutex down. | 4524 | * Called with inode->i_mutex down. |
4518 | */ | 4525 | */ |
4519 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) | 4526 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) |
4520 | { | 4527 | { |
4521 | struct inode *inode = dentry->d_inode; | 4528 | struct inode *inode = dentry->d_inode; |
4522 | int error, rc = 0; | 4529 | int error, rc = 0; |
4523 | int orphan = 0; | 4530 | int orphan = 0; |
4524 | const unsigned int ia_valid = attr->ia_valid; | 4531 | const unsigned int ia_valid = attr->ia_valid; |
4525 | 4532 | ||
4526 | error = inode_change_ok(inode, attr); | 4533 | error = inode_change_ok(inode, attr); |
4527 | if (error) | 4534 | if (error) |
4528 | return error; | 4535 | return error; |
4529 | 4536 | ||
4530 | if (is_quota_modification(inode, attr)) | 4537 | if (is_quota_modification(inode, attr)) |
4531 | dquot_initialize(inode); | 4538 | dquot_initialize(inode); |
4532 | if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || | 4539 | if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || |
4533 | (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { | 4540 | (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { |
4534 | handle_t *handle; | 4541 | handle_t *handle; |
4535 | 4542 | ||
4536 | /* (user+group)*(old+new) structure, inode write (sb, | 4543 | /* (user+group)*(old+new) structure, inode write (sb, |
4537 | * inode block, ? - but truncate inode update has it) */ | 4544 | * inode block, ? - but truncate inode update has it) */ |
4538 | handle = ext4_journal_start(inode, EXT4_HT_QUOTA, | 4545 | handle = ext4_journal_start(inode, EXT4_HT_QUOTA, |
4539 | (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) + | 4546 | (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) + |
4540 | EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3); | 4547 | EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3); |
4541 | if (IS_ERR(handle)) { | 4548 | if (IS_ERR(handle)) { |
4542 | error = PTR_ERR(handle); | 4549 | error = PTR_ERR(handle); |
4543 | goto err_out; | 4550 | goto err_out; |
4544 | } | 4551 | } |
4545 | error = dquot_transfer(inode, attr); | 4552 | error = dquot_transfer(inode, attr); |
4546 | if (error) { | 4553 | if (error) { |
4547 | ext4_journal_stop(handle); | 4554 | ext4_journal_stop(handle); |
4548 | return error; | 4555 | return error; |
4549 | } | 4556 | } |
4550 | /* Update corresponding info in inode so that everything is in | 4557 | /* Update corresponding info in inode so that everything is in |
4551 | * one transaction */ | 4558 | * one transaction */ |
4552 | if (attr->ia_valid & ATTR_UID) | 4559 | if (attr->ia_valid & ATTR_UID) |
4553 | inode->i_uid = attr->ia_uid; | 4560 | inode->i_uid = attr->ia_uid; |
4554 | if (attr->ia_valid & ATTR_GID) | 4561 | if (attr->ia_valid & ATTR_GID) |
4555 | inode->i_gid = attr->ia_gid; | 4562 | inode->i_gid = attr->ia_gid; |
4556 | error = ext4_mark_inode_dirty(handle, inode); | 4563 | error = ext4_mark_inode_dirty(handle, inode); |
4557 | ext4_journal_stop(handle); | 4564 | ext4_journal_stop(handle); |
4558 | } | 4565 | } |
4559 | 4566 | ||
4560 | if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { | 4567 | if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { |
4561 | handle_t *handle; | 4568 | handle_t *handle; |
4562 | 4569 | ||
4563 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 4570 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
4564 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 4571 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
4565 | 4572 | ||
4566 | if (attr->ia_size > sbi->s_bitmap_maxbytes) | 4573 | if (attr->ia_size > sbi->s_bitmap_maxbytes) |
4567 | return -EFBIG; | 4574 | return -EFBIG; |
4568 | } | 4575 | } |
4569 | 4576 | ||
4570 | if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) | 4577 | if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) |
4571 | inode_inc_iversion(inode); | 4578 | inode_inc_iversion(inode); |
4572 | 4579 | ||
4573 | if (S_ISREG(inode->i_mode) && | 4580 | if (S_ISREG(inode->i_mode) && |
4574 | (attr->ia_size < inode->i_size)) { | 4581 | (attr->ia_size < inode->i_size)) { |
4575 | if (ext4_should_order_data(inode)) { | 4582 | if (ext4_should_order_data(inode)) { |
4576 | error = ext4_begin_ordered_truncate(inode, | 4583 | error = ext4_begin_ordered_truncate(inode, |
4577 | attr->ia_size); | 4584 | attr->ia_size); |
4578 | if (error) | 4585 | if (error) |
4579 | goto err_out; | 4586 | goto err_out; |
4580 | } | 4587 | } |
4581 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); | 4588 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); |
4582 | if (IS_ERR(handle)) { | 4589 | if (IS_ERR(handle)) { |
4583 | error = PTR_ERR(handle); | 4590 | error = PTR_ERR(handle); |
4584 | goto err_out; | 4591 | goto err_out; |
4585 | } | 4592 | } |
4586 | if (ext4_handle_valid(handle)) { | 4593 | if (ext4_handle_valid(handle)) { |
4587 | error = ext4_orphan_add(handle, inode); | 4594 | error = ext4_orphan_add(handle, inode); |
4588 | orphan = 1; | 4595 | orphan = 1; |
4589 | } | 4596 | } |
4590 | down_write(&EXT4_I(inode)->i_data_sem); | 4597 | down_write(&EXT4_I(inode)->i_data_sem); |
4591 | EXT4_I(inode)->i_disksize = attr->ia_size; | 4598 | EXT4_I(inode)->i_disksize = attr->ia_size; |
4592 | rc = ext4_mark_inode_dirty(handle, inode); | 4599 | rc = ext4_mark_inode_dirty(handle, inode); |
4593 | if (!error) | 4600 | if (!error) |
4594 | error = rc; | 4601 | error = rc; |
4595 | /* | 4602 | /* |
4596 | * We have to update i_size under i_data_sem together | 4603 | * We have to update i_size under i_data_sem together |
4597 | * with i_disksize to avoid races with writeback code | 4604 | * with i_disksize to avoid races with writeback code |
4598 | * running ext4_wb_update_i_disksize(). | 4605 | * running ext4_wb_update_i_disksize(). |
4599 | */ | 4606 | */ |
4600 | if (!error) | 4607 | if (!error) |
4601 | i_size_write(inode, attr->ia_size); | 4608 | i_size_write(inode, attr->ia_size); |
4602 | up_write(&EXT4_I(inode)->i_data_sem); | 4609 | up_write(&EXT4_I(inode)->i_data_sem); |
4603 | ext4_journal_stop(handle); | 4610 | ext4_journal_stop(handle); |
4604 | if (error) { | 4611 | if (error) { |
4605 | ext4_orphan_del(NULL, inode); | 4612 | ext4_orphan_del(NULL, inode); |
4606 | goto err_out; | 4613 | goto err_out; |
4607 | } | 4614 | } |
4608 | } else { | 4615 | } else { |
4609 | loff_t oldsize = inode->i_size; | 4616 | loff_t oldsize = inode->i_size; |
4610 | 4617 | ||
4611 | i_size_write(inode, attr->ia_size); | 4618 | i_size_write(inode, attr->ia_size); |
4612 | pagecache_isize_extended(inode, oldsize, inode->i_size); | 4619 | pagecache_isize_extended(inode, oldsize, inode->i_size); |
4613 | } | 4620 | } |
4614 | 4621 | ||
4615 | /* | 4622 | /* |
4616 | * Blocks are going to be removed from the inode. Wait | 4623 | * Blocks are going to be removed from the inode. Wait |
4617 | * for dio in flight. Temporarily disable | 4624 | * for dio in flight. Temporarily disable |
4618 | * dioread_nolock to prevent livelock. | 4625 | * dioread_nolock to prevent livelock. |
4619 | */ | 4626 | */ |
4620 | if (orphan) { | 4627 | if (orphan) { |
4621 | if (!ext4_should_journal_data(inode)) { | 4628 | if (!ext4_should_journal_data(inode)) { |
4622 | ext4_inode_block_unlocked_dio(inode); | 4629 | ext4_inode_block_unlocked_dio(inode); |
4623 | inode_dio_wait(inode); | 4630 | inode_dio_wait(inode); |
4624 | ext4_inode_resume_unlocked_dio(inode); | 4631 | ext4_inode_resume_unlocked_dio(inode); |
4625 | } else | 4632 | } else |
4626 | ext4_wait_for_tail_page_commit(inode); | 4633 | ext4_wait_for_tail_page_commit(inode); |
4627 | } | 4634 | } |
4628 | /* | 4635 | /* |
4629 | * Truncate pagecache after we've waited for commit | 4636 | * Truncate pagecache after we've waited for commit |
4630 | * in data=journal mode to make pages freeable. | 4637 | * in data=journal mode to make pages freeable. |
4631 | */ | 4638 | */ |
4632 | truncate_pagecache(inode, inode->i_size); | 4639 | truncate_pagecache(inode, inode->i_size); |
4633 | } | 4640 | } |
4634 | /* | 4641 | /* |
4635 | * We want to call ext4_truncate() even if attr->ia_size == | 4642 | * We want to call ext4_truncate() even if attr->ia_size == |
4636 | * inode->i_size for cases like truncation of fallocated space | 4643 | * inode->i_size for cases like truncation of fallocated space |
4637 | */ | 4644 | */ |
4638 | if (attr->ia_valid & ATTR_SIZE) | 4645 | if (attr->ia_valid & ATTR_SIZE) |
4639 | ext4_truncate(inode); | 4646 | ext4_truncate(inode); |
4640 | 4647 | ||
4641 | if (!rc) { | 4648 | if (!rc) { |
4642 | setattr_copy(inode, attr); | 4649 | setattr_copy(inode, attr); |
4643 | mark_inode_dirty(inode); | 4650 | mark_inode_dirty(inode); |
4644 | } | 4651 | } |
4645 | 4652 | ||
4646 | /* | 4653 | /* |
4647 | * If the call to ext4_truncate failed to get a transaction handle at | 4654 | * If the call to ext4_truncate failed to get a transaction handle at |
4648 | * all, we need to clean up the in-core orphan list manually. | 4655 | * all, we need to clean up the in-core orphan list manually. |
4649 | */ | 4656 | */ |
4650 | if (orphan && inode->i_nlink) | 4657 | if (orphan && inode->i_nlink) |
4651 | ext4_orphan_del(NULL, inode); | 4658 | ext4_orphan_del(NULL, inode); |
4652 | 4659 | ||
4653 | if (!rc && (ia_valid & ATTR_MODE)) | 4660 | if (!rc && (ia_valid & ATTR_MODE)) |
4654 | rc = posix_acl_chmod(inode, inode->i_mode); | 4661 | rc = posix_acl_chmod(inode, inode->i_mode); |
4655 | 4662 | ||
4656 | err_out: | 4663 | err_out: |
4657 | ext4_std_error(inode->i_sb, error); | 4664 | ext4_std_error(inode->i_sb, error); |
4658 | if (!error) | 4665 | if (!error) |
4659 | error = rc; | 4666 | error = rc; |
4660 | return error; | 4667 | return error; |
4661 | } | 4668 | } |
4662 | 4669 | ||
4663 | int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 4670 | int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
4664 | struct kstat *stat) | 4671 | struct kstat *stat) |
4665 | { | 4672 | { |
4666 | struct inode *inode; | 4673 | struct inode *inode; |
4667 | unsigned long long delalloc_blocks; | 4674 | unsigned long long delalloc_blocks; |
4668 | 4675 | ||
4669 | inode = dentry->d_inode; | 4676 | inode = dentry->d_inode; |
4670 | generic_fillattr(inode, stat); | 4677 | generic_fillattr(inode, stat); |
4671 | 4678 | ||
4672 | /* | 4679 | /* |
4673 | * If there is inline data in the inode, the inode will normally not | 4680 | * If there is inline data in the inode, the inode will normally not |
4674 | * have data blocks allocated (it may have an external xattr block). | 4681 | * have data blocks allocated (it may have an external xattr block). |
4675 | * Report at least one sector for such files, so tools like tar, rsync, | 4682 | * Report at least one sector for such files, so tools like tar, rsync, |
4676 | * others doen't incorrectly think the file is completely sparse. | 4683 | * others doen't incorrectly think the file is completely sparse. |
4677 | */ | 4684 | */ |
4678 | if (unlikely(ext4_has_inline_data(inode))) | 4685 | if (unlikely(ext4_has_inline_data(inode))) |
4679 | stat->blocks += (stat->size + 511) >> 9; | 4686 | stat->blocks += (stat->size + 511) >> 9; |
4680 | 4687 | ||
4681 | /* | 4688 | /* |
4682 | * We can't update i_blocks if the block allocation is delayed | 4689 | * We can't update i_blocks if the block allocation is delayed |
4683 | * otherwise in the case of system crash before the real block | 4690 | * otherwise in the case of system crash before the real block |
4684 | * allocation is done, we will have i_blocks inconsistent with | 4691 | * allocation is done, we will have i_blocks inconsistent with |
4685 | * on-disk file blocks. | 4692 | * on-disk file blocks. |
4686 | * We always keep i_blocks updated together with real | 4693 | * We always keep i_blocks updated together with real |
4687 | * allocation. But to not confuse with user, stat | 4694 | * allocation. But to not confuse with user, stat |
4688 | * will return the blocks that include the delayed allocation | 4695 | * will return the blocks that include the delayed allocation |
4689 | * blocks for this file. | 4696 | * blocks for this file. |
4690 | */ | 4697 | */ |
4691 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), | 4698 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), |
4692 | EXT4_I(inode)->i_reserved_data_blocks); | 4699 | EXT4_I(inode)->i_reserved_data_blocks); |
4693 | stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9); | 4700 | stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9); |
4694 | return 0; | 4701 | return 0; |
4695 | } | 4702 | } |
4696 | 4703 | ||
4697 | static int ext4_index_trans_blocks(struct inode *inode, int lblocks, | 4704 | static int ext4_index_trans_blocks(struct inode *inode, int lblocks, |
4698 | int pextents) | 4705 | int pextents) |
4699 | { | 4706 | { |
4700 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 4707 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
4701 | return ext4_ind_trans_blocks(inode, lblocks); | 4708 | return ext4_ind_trans_blocks(inode, lblocks); |
4702 | return ext4_ext_index_trans_blocks(inode, pextents); | 4709 | return ext4_ext_index_trans_blocks(inode, pextents); |
4703 | } | 4710 | } |
4704 | 4711 | ||
4705 | /* | 4712 | /* |
4706 | * Account for index blocks, block groups bitmaps and block group | 4713 | * Account for index blocks, block groups bitmaps and block group |
4707 | * descriptor blocks if modify datablocks and index blocks | 4714 | * descriptor blocks if modify datablocks and index blocks |
4708 | * worse case, the indexs blocks spread over different block groups | 4715 | * worse case, the indexs blocks spread over different block groups |
4709 | * | 4716 | * |
4710 | * If datablocks are discontiguous, they are possible to spread over | 4717 | * If datablocks are discontiguous, they are possible to spread over |
4711 | * different block groups too. If they are contiguous, with flexbg, | 4718 | * different block groups too. If they are contiguous, with flexbg, |
4712 | * they could still across block group boundary. | 4719 | * they could still across block group boundary. |
4713 | * | 4720 | * |
4714 | * Also account for superblock, inode, quota and xattr blocks | 4721 | * Also account for superblock, inode, quota and xattr blocks |
4715 | */ | 4722 | */ |
4716 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, | 4723 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
4717 | int pextents) | 4724 | int pextents) |
4718 | { | 4725 | { |
4719 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 4726 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
4720 | int gdpblocks; | 4727 | int gdpblocks; |
4721 | int idxblocks; | 4728 | int idxblocks; |
4722 | int ret = 0; | 4729 | int ret = 0; |
4723 | 4730 | ||
4724 | /* | 4731 | /* |
4725 | * How many index blocks need to touch to map @lblocks logical blocks | 4732 | * How many index blocks need to touch to map @lblocks logical blocks |
4726 | * to @pextents physical extents? | 4733 | * to @pextents physical extents? |
4727 | */ | 4734 | */ |
4728 | idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); | 4735 | idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); |
4729 | 4736 | ||
4730 | ret = idxblocks; | 4737 | ret = idxblocks; |
4731 | 4738 | ||
4732 | /* | 4739 | /* |
4733 | * Now let's see how many group bitmaps and group descriptors need | 4740 | * Now let's see how many group bitmaps and group descriptors need |
4734 | * to account | 4741 | * to account |
4735 | */ | 4742 | */ |
4736 | groups = idxblocks + pextents; | 4743 | groups = idxblocks + pextents; |
4737 | gdpblocks = groups; | 4744 | gdpblocks = groups; |
4738 | if (groups > ngroups) | 4745 | if (groups > ngroups) |
4739 | groups = ngroups; | 4746 | groups = ngroups; |
4740 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | 4747 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) |
4741 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | 4748 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; |
4742 | 4749 | ||
4743 | /* bitmaps and block group descriptor blocks */ | 4750 | /* bitmaps and block group descriptor blocks */ |
4744 | ret += groups + gdpblocks; | 4751 | ret += groups + gdpblocks; |
4745 | 4752 | ||
4746 | /* Blocks for super block, inode, quota and xattr blocks */ | 4753 | /* Blocks for super block, inode, quota and xattr blocks */ |
4747 | ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); | 4754 | ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); |
4748 | 4755 | ||
4749 | return ret; | 4756 | return ret; |
4750 | } | 4757 | } |
4751 | 4758 | ||
4752 | /* | 4759 | /* |
4753 | * Calculate the total number of credits to reserve to fit | 4760 | * Calculate the total number of credits to reserve to fit |
4754 | * the modification of a single pages into a single transaction, | 4761 | * the modification of a single pages into a single transaction, |
4755 | * which may include multiple chunks of block allocations. | 4762 | * which may include multiple chunks of block allocations. |
4756 | * | 4763 | * |
4757 | * This could be called via ext4_write_begin() | 4764 | * This could be called via ext4_write_begin() |
4758 | * | 4765 | * |
4759 | * We need to consider the worse case, when | 4766 | * We need to consider the worse case, when |
4760 | * one new block per extent. | 4767 | * one new block per extent. |
4761 | */ | 4768 | */ |
4762 | int ext4_writepage_trans_blocks(struct inode *inode) | 4769 | int ext4_writepage_trans_blocks(struct inode *inode) |
4763 | { | 4770 | { |
4764 | int bpp = ext4_journal_blocks_per_page(inode); | 4771 | int bpp = ext4_journal_blocks_per_page(inode); |
4765 | int ret; | 4772 | int ret; |
4766 | 4773 | ||
4767 | ret = ext4_meta_trans_blocks(inode, bpp, bpp); | 4774 | ret = ext4_meta_trans_blocks(inode, bpp, bpp); |
4768 | 4775 | ||
4769 | /* Account for data blocks for journalled mode */ | 4776 | /* Account for data blocks for journalled mode */ |
4770 | if (ext4_should_journal_data(inode)) | 4777 | if (ext4_should_journal_data(inode)) |
4771 | ret += bpp; | 4778 | ret += bpp; |
4772 | return ret; | 4779 | return ret; |
4773 | } | 4780 | } |
4774 | 4781 | ||
4775 | /* | 4782 | /* |
4776 | * Calculate the journal credits for a chunk of data modification. | 4783 | * Calculate the journal credits for a chunk of data modification. |
4777 | * | 4784 | * |
4778 | * This is called from DIO, fallocate or whoever calling | 4785 | * This is called from DIO, fallocate or whoever calling |
4779 | * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. | 4786 | * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. |
4780 | * | 4787 | * |
4781 | * journal buffers for data blocks are not included here, as DIO | 4788 | * journal buffers for data blocks are not included here, as DIO |
4782 | * and fallocate do no need to journal data buffers. | 4789 | * and fallocate do no need to journal data buffers. |
4783 | */ | 4790 | */ |
4784 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) | 4791 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) |
4785 | { | 4792 | { |
4786 | return ext4_meta_trans_blocks(inode, nrblocks, 1); | 4793 | return ext4_meta_trans_blocks(inode, nrblocks, 1); |
4787 | } | 4794 | } |
4788 | 4795 | ||
4789 | /* | 4796 | /* |
4790 | * The caller must have previously called ext4_reserve_inode_write(). | 4797 | * The caller must have previously called ext4_reserve_inode_write(). |
4791 | * Give this, we know that the caller already has write access to iloc->bh. | 4798 | * Give this, we know that the caller already has write access to iloc->bh. |
4792 | */ | 4799 | */ |
4793 | int ext4_mark_iloc_dirty(handle_t *handle, | 4800 | int ext4_mark_iloc_dirty(handle_t *handle, |
4794 | struct inode *inode, struct ext4_iloc *iloc) | 4801 | struct inode *inode, struct ext4_iloc *iloc) |
4795 | { | 4802 | { |
4796 | int err = 0; | 4803 | int err = 0; |
4797 | 4804 | ||
4798 | if (IS_I_VERSION(inode)) | 4805 | if (IS_I_VERSION(inode)) |
4799 | inode_inc_iversion(inode); | 4806 | inode_inc_iversion(inode); |
4800 | 4807 | ||
4801 | /* the do_update_inode consumes one bh->b_count */ | 4808 | /* the do_update_inode consumes one bh->b_count */ |
4802 | get_bh(iloc->bh); | 4809 | get_bh(iloc->bh); |
4803 | 4810 | ||
4804 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ | 4811 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ |
4805 | err = ext4_do_update_inode(handle, inode, iloc); | 4812 | err = ext4_do_update_inode(handle, inode, iloc); |
4806 | put_bh(iloc->bh); | 4813 | put_bh(iloc->bh); |
4807 | return err; | 4814 | return err; |
4808 | } | 4815 | } |
4809 | 4816 | ||
4810 | /* | 4817 | /* |
4811 | * On success, We end up with an outstanding reference count against | 4818 | * On success, We end up with an outstanding reference count against |
4812 | * iloc->bh. This _must_ be cleaned up later. | 4819 | * iloc->bh. This _must_ be cleaned up later. |
4813 | */ | 4820 | */ |
4814 | 4821 | ||
4815 | int | 4822 | int |
4816 | ext4_reserve_inode_write(handle_t *handle, struct inode *inode, | 4823 | ext4_reserve_inode_write(handle_t *handle, struct inode *inode, |
4817 | struct ext4_iloc *iloc) | 4824 | struct ext4_iloc *iloc) |
4818 | { | 4825 | { |
4819 | int err; | 4826 | int err; |
4820 | 4827 | ||
4821 | err = ext4_get_inode_loc(inode, iloc); | 4828 | err = ext4_get_inode_loc(inode, iloc); |
4822 | if (!err) { | 4829 | if (!err) { |
4823 | BUFFER_TRACE(iloc->bh, "get_write_access"); | 4830 | BUFFER_TRACE(iloc->bh, "get_write_access"); |
4824 | err = ext4_journal_get_write_access(handle, iloc->bh); | 4831 | err = ext4_journal_get_write_access(handle, iloc->bh); |
4825 | if (err) { | 4832 | if (err) { |
4826 | brelse(iloc->bh); | 4833 | brelse(iloc->bh); |
4827 | iloc->bh = NULL; | 4834 | iloc->bh = NULL; |
4828 | } | 4835 | } |
4829 | } | 4836 | } |
4830 | ext4_std_error(inode->i_sb, err); | 4837 | ext4_std_error(inode->i_sb, err); |
4831 | return err; | 4838 | return err; |
4832 | } | 4839 | } |
4833 | 4840 | ||
4834 | /* | 4841 | /* |
4835 | * Expand an inode by new_extra_isize bytes. | 4842 | * Expand an inode by new_extra_isize bytes. |
4836 | * Returns 0 on success or negative error number on failure. | 4843 | * Returns 0 on success or negative error number on failure. |
4837 | */ | 4844 | */ |
4838 | static int ext4_expand_extra_isize(struct inode *inode, | 4845 | static int ext4_expand_extra_isize(struct inode *inode, |
4839 | unsigned int new_extra_isize, | 4846 | unsigned int new_extra_isize, |
4840 | struct ext4_iloc iloc, | 4847 | struct ext4_iloc iloc, |
4841 | handle_t *handle) | 4848 | handle_t *handle) |
4842 | { | 4849 | { |
4843 | struct ext4_inode *raw_inode; | 4850 | struct ext4_inode *raw_inode; |
4844 | struct ext4_xattr_ibody_header *header; | 4851 | struct ext4_xattr_ibody_header *header; |
4845 | 4852 | ||
4846 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) | 4853 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) |
4847 | return 0; | 4854 | return 0; |
4848 | 4855 | ||
4849 | raw_inode = ext4_raw_inode(&iloc); | 4856 | raw_inode = ext4_raw_inode(&iloc); |
4850 | 4857 | ||
4851 | header = IHDR(inode, raw_inode); | 4858 | header = IHDR(inode, raw_inode); |
4852 | 4859 | ||
4853 | /* No extended attributes present */ | 4860 | /* No extended attributes present */ |
4854 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || | 4861 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || |
4855 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { | 4862 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { |
4856 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, | 4863 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, |
4857 | new_extra_isize); | 4864 | new_extra_isize); |
4858 | EXT4_I(inode)->i_extra_isize = new_extra_isize; | 4865 | EXT4_I(inode)->i_extra_isize = new_extra_isize; |
4859 | return 0; | 4866 | return 0; |
4860 | } | 4867 | } |
4861 | 4868 | ||
4862 | /* try to expand with EAs present */ | 4869 | /* try to expand with EAs present */ |
4863 | return ext4_expand_extra_isize_ea(inode, new_extra_isize, | 4870 | return ext4_expand_extra_isize_ea(inode, new_extra_isize, |
4864 | raw_inode, handle); | 4871 | raw_inode, handle); |
4865 | } | 4872 | } |
4866 | 4873 | ||
4867 | /* | 4874 | /* |
4868 | * What we do here is to mark the in-core inode as clean with respect to inode | 4875 | * What we do here is to mark the in-core inode as clean with respect to inode |
4869 | * dirtiness (it may still be data-dirty). | 4876 | * dirtiness (it may still be data-dirty). |
4870 | * This means that the in-core inode may be reaped by prune_icache | 4877 | * This means that the in-core inode may be reaped by prune_icache |
4871 | * without having to perform any I/O. This is a very good thing, | 4878 | * without having to perform any I/O. This is a very good thing, |
4872 | * because *any* task may call prune_icache - even ones which | 4879 | * because *any* task may call prune_icache - even ones which |
4873 | * have a transaction open against a different journal. | 4880 | * have a transaction open against a different journal. |
4874 | * | 4881 | * |
4875 | * Is this cheating? Not really. Sure, we haven't written the | 4882 | * Is this cheating? Not really. Sure, we haven't written the |
4876 | * inode out, but prune_icache isn't a user-visible syncing function. | 4883 | * inode out, but prune_icache isn't a user-visible syncing function. |
4877 | * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) | 4884 | * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) |
4878 | * we start and wait on commits. | 4885 | * we start and wait on commits. |
4879 | */ | 4886 | */ |
4880 | int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | 4887 | int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) |
4881 | { | 4888 | { |
4882 | struct ext4_iloc iloc; | 4889 | struct ext4_iloc iloc; |
4883 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 4890 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
4884 | static unsigned int mnt_count; | 4891 | static unsigned int mnt_count; |
4885 | int err, ret; | 4892 | int err, ret; |
4886 | 4893 | ||
4887 | might_sleep(); | 4894 | might_sleep(); |
4888 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); | 4895 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); |
4889 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 4896 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
4890 | if (ext4_handle_valid(handle) && | 4897 | if (ext4_handle_valid(handle) && |
4891 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 4898 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
4892 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { | 4899 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { |
4893 | /* | 4900 | /* |
4894 | * We need extra buffer credits since we may write into EA block | 4901 | * We need extra buffer credits since we may write into EA block |
4895 | * with this same handle. If journal_extend fails, then it will | 4902 | * with this same handle. If journal_extend fails, then it will |
4896 | * only result in a minor loss of functionality for that inode. | 4903 | * only result in a minor loss of functionality for that inode. |
4897 | * If this is felt to be critical, then e2fsck should be run to | 4904 | * If this is felt to be critical, then e2fsck should be run to |
4898 | * force a large enough s_min_extra_isize. | 4905 | * force a large enough s_min_extra_isize. |
4899 | */ | 4906 | */ |
4900 | if ((jbd2_journal_extend(handle, | 4907 | if ((jbd2_journal_extend(handle, |
4901 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { | 4908 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { |
4902 | ret = ext4_expand_extra_isize(inode, | 4909 | ret = ext4_expand_extra_isize(inode, |
4903 | sbi->s_want_extra_isize, | 4910 | sbi->s_want_extra_isize, |
4904 | iloc, handle); | 4911 | iloc, handle); |
4905 | if (ret) { | 4912 | if (ret) { |
4906 | ext4_set_inode_state(inode, | 4913 | ext4_set_inode_state(inode, |
4907 | EXT4_STATE_NO_EXPAND); | 4914 | EXT4_STATE_NO_EXPAND); |
4908 | if (mnt_count != | 4915 | if (mnt_count != |
4909 | le16_to_cpu(sbi->s_es->s_mnt_count)) { | 4916 | le16_to_cpu(sbi->s_es->s_mnt_count)) { |
4910 | ext4_warning(inode->i_sb, | 4917 | ext4_warning(inode->i_sb, |
4911 | "Unable to expand inode %lu. Delete" | 4918 | "Unable to expand inode %lu. Delete" |
4912 | " some EAs or run e2fsck.", | 4919 | " some EAs or run e2fsck.", |
4913 | inode->i_ino); | 4920 | inode->i_ino); |
4914 | mnt_count = | 4921 | mnt_count = |
4915 | le16_to_cpu(sbi->s_es->s_mnt_count); | 4922 | le16_to_cpu(sbi->s_es->s_mnt_count); |
4916 | } | 4923 | } |
4917 | } | 4924 | } |
4918 | } | 4925 | } |
4919 | } | 4926 | } |
4920 | if (!err) | 4927 | if (!err) |
4921 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); | 4928 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); |
4922 | return err; | 4929 | return err; |
4923 | } | 4930 | } |
4924 | 4931 | ||
4925 | /* | 4932 | /* |
4926 | * ext4_dirty_inode() is called from __mark_inode_dirty() | 4933 | * ext4_dirty_inode() is called from __mark_inode_dirty() |
4927 | * | 4934 | * |
4928 | * We're really interested in the case where a file is being extended. | 4935 | * We're really interested in the case where a file is being extended. |
4929 | * i_size has been changed by generic_commit_write() and we thus need | 4936 | * i_size has been changed by generic_commit_write() and we thus need |
4930 | * to include the updated inode in the current transaction. | 4937 | * to include the updated inode in the current transaction. |
4931 | * | 4938 | * |
4932 | * Also, dquot_alloc_block() will always dirty the inode when blocks | 4939 | * Also, dquot_alloc_block() will always dirty the inode when blocks |
4933 | * are allocated to the file. | 4940 | * are allocated to the file. |
4934 | * | 4941 | * |
4935 | * If the inode is marked synchronous, we don't honour that here - doing | 4942 | * If the inode is marked synchronous, we don't honour that here - doing |
4936 | * so would cause a commit on atime updates, which we don't bother doing. | 4943 | * so would cause a commit on atime updates, which we don't bother doing. |
4937 | * We handle synchronous inodes at the highest possible level. | 4944 | * We handle synchronous inodes at the highest possible level. |
4938 | * | 4945 | * |
4939 | * If only the I_DIRTY_TIME flag is set, we can skip everything. If | 4946 | * If only the I_DIRTY_TIME flag is set, we can skip everything. If |
4940 | * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need | 4947 | * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need |
4941 | * to copy into the on-disk inode structure are the timestamp files. | 4948 | * to copy into the on-disk inode structure are the timestamp files. |
4942 | */ | 4949 | */ |
4943 | void ext4_dirty_inode(struct inode *inode, int flags) | 4950 | void ext4_dirty_inode(struct inode *inode, int flags) |
4944 | { | 4951 | { |
4945 | handle_t *handle; | 4952 | handle_t *handle; |
4946 | 4953 | ||
4947 | if (flags == I_DIRTY_TIME) | 4954 | if (flags == I_DIRTY_TIME) |
4948 | return; | 4955 | return; |
4949 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | 4956 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); |
4950 | if (IS_ERR(handle)) | 4957 | if (IS_ERR(handle)) |
4951 | goto out; | 4958 | goto out; |
4952 | 4959 | ||
4953 | ext4_mark_inode_dirty(handle, inode); | 4960 | ext4_mark_inode_dirty(handle, inode); |
4954 | 4961 | ||
4955 | ext4_journal_stop(handle); | 4962 | ext4_journal_stop(handle); |
4956 | out: | 4963 | out: |
4957 | return; | 4964 | return; |
4958 | } | 4965 | } |
4959 | 4966 | ||
4960 | #if 0 | 4967 | #if 0 |
4961 | /* | 4968 | /* |
4962 | * Bind an inode's backing buffer_head into this transaction, to prevent | 4969 | * Bind an inode's backing buffer_head into this transaction, to prevent |
4963 | * it from being flushed to disk early. Unlike | 4970 | * it from being flushed to disk early. Unlike |
4964 | * ext4_reserve_inode_write, this leaves behind no bh reference and | 4971 | * ext4_reserve_inode_write, this leaves behind no bh reference and |
4965 | * returns no iloc structure, so the caller needs to repeat the iloc | 4972 | * returns no iloc structure, so the caller needs to repeat the iloc |
4966 | * lookup to mark the inode dirty later. | 4973 | * lookup to mark the inode dirty later. |
4967 | */ | 4974 | */ |
4968 | static int ext4_pin_inode(handle_t *handle, struct inode *inode) | 4975 | static int ext4_pin_inode(handle_t *handle, struct inode *inode) |
4969 | { | 4976 | { |
4970 | struct ext4_iloc iloc; | 4977 | struct ext4_iloc iloc; |
4971 | 4978 | ||
4972 | int err = 0; | 4979 | int err = 0; |
4973 | if (handle) { | 4980 | if (handle) { |
4974 | err = ext4_get_inode_loc(inode, &iloc); | 4981 | err = ext4_get_inode_loc(inode, &iloc); |
4975 | if (!err) { | 4982 | if (!err) { |
4976 | BUFFER_TRACE(iloc.bh, "get_write_access"); | 4983 | BUFFER_TRACE(iloc.bh, "get_write_access"); |
4977 | err = jbd2_journal_get_write_access(handle, iloc.bh); | 4984 | err = jbd2_journal_get_write_access(handle, iloc.bh); |
4978 | if (!err) | 4985 | if (!err) |
4979 | err = ext4_handle_dirty_metadata(handle, | 4986 | err = ext4_handle_dirty_metadata(handle, |
4980 | NULL, | 4987 | NULL, |
4981 | iloc.bh); | 4988 | iloc.bh); |
4982 | brelse(iloc.bh); | 4989 | brelse(iloc.bh); |
4983 | } | 4990 | } |
4984 | } | 4991 | } |
4985 | ext4_std_error(inode->i_sb, err); | 4992 | ext4_std_error(inode->i_sb, err); |
4986 | return err; | 4993 | return err; |
4987 | } | 4994 | } |
4988 | #endif | 4995 | #endif |
4989 | 4996 | ||
4990 | int ext4_change_inode_journal_flag(struct inode *inode, int val) | 4997 | int ext4_change_inode_journal_flag(struct inode *inode, int val) |
4991 | { | 4998 | { |
4992 | journal_t *journal; | 4999 | journal_t *journal; |
4993 | handle_t *handle; | 5000 | handle_t *handle; |
4994 | int err; | 5001 | int err; |
4995 | 5002 | ||
4996 | /* | 5003 | /* |
4997 | * We have to be very careful here: changing a data block's | 5004 | * We have to be very careful here: changing a data block's |
4998 | * journaling status dynamically is dangerous. If we write a | 5005 | * journaling status dynamically is dangerous. If we write a |
4999 | * data block to the journal, change the status and then delete | 5006 | * data block to the journal, change the status and then delete |
5000 | * that block, we risk forgetting to revoke the old log record | 5007 | * that block, we risk forgetting to revoke the old log record |
5001 | * from the journal and so a subsequent replay can corrupt data. | 5008 | * from the journal and so a subsequent replay can corrupt data. |
5002 | * So, first we make sure that the journal is empty and that | 5009 | * So, first we make sure that the journal is empty and that |
5003 | * nobody is changing anything. | 5010 | * nobody is changing anything. |
5004 | */ | 5011 | */ |
5005 | 5012 | ||
5006 | journal = EXT4_JOURNAL(inode); | 5013 | journal = EXT4_JOURNAL(inode); |
5007 | if (!journal) | 5014 | if (!journal) |
5008 | return 0; | 5015 | return 0; |
5009 | if (is_journal_aborted(journal)) | 5016 | if (is_journal_aborted(journal)) |
5010 | return -EROFS; | 5017 | return -EROFS; |
5011 | /* We have to allocate physical blocks for delalloc blocks | 5018 | /* We have to allocate physical blocks for delalloc blocks |
5012 | * before flushing journal. otherwise delalloc blocks can not | 5019 | * before flushing journal. otherwise delalloc blocks can not |
5013 | * be allocated any more. even more truncate on delalloc blocks | 5020 | * be allocated any more. even more truncate on delalloc blocks |
5014 | * could trigger BUG by flushing delalloc blocks in journal. | 5021 | * could trigger BUG by flushing delalloc blocks in journal. |
5015 | * There is no delalloc block in non-journal data mode. | 5022 | * There is no delalloc block in non-journal data mode. |
5016 | */ | 5023 | */ |
5017 | if (val && test_opt(inode->i_sb, DELALLOC)) { | 5024 | if (val && test_opt(inode->i_sb, DELALLOC)) { |
5018 | err = ext4_alloc_da_blocks(inode); | 5025 | err = ext4_alloc_da_blocks(inode); |
5019 | if (err < 0) | 5026 | if (err < 0) |
5020 | return err; | 5027 | return err; |
5021 | } | 5028 | } |
5022 | 5029 | ||
5023 | /* Wait for all existing dio workers */ | 5030 | /* Wait for all existing dio workers */ |
5024 | ext4_inode_block_unlocked_dio(inode); | 5031 | ext4_inode_block_unlocked_dio(inode); |
5025 | inode_dio_wait(inode); | 5032 | inode_dio_wait(inode); |
5026 | 5033 | ||
5027 | jbd2_journal_lock_updates(journal); | 5034 | jbd2_journal_lock_updates(journal); |
5028 | 5035 | ||
5029 | /* | 5036 | /* |
5030 | * OK, there are no updates running now, and all cached data is | 5037 | * OK, there are no updates running now, and all cached data is |
5031 | * synced to disk. We are now in a completely consistent state | 5038 | * synced to disk. We are now in a completely consistent state |
5032 | * which doesn't have anything in the journal, and we know that | 5039 | * which doesn't have anything in the journal, and we know that |
5033 | * no filesystem updates are running, so it is safe to modify | 5040 | * no filesystem updates are running, so it is safe to modify |
5034 | * the inode's in-core data-journaling state flag now. | 5041 | * the inode's in-core data-journaling state flag now. |
5035 | */ | 5042 | */ |
5036 | 5043 | ||
5037 | if (val) | 5044 | if (val) |
5038 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 5045 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5039 | else { | 5046 | else { |
5040 | err = jbd2_journal_flush(journal); | 5047 | err = jbd2_journal_flush(journal); |
5041 | if (err < 0) { | 5048 | if (err < 0) { |
5042 | jbd2_journal_unlock_updates(journal); | 5049 | jbd2_journal_unlock_updates(journal); |
5043 | ext4_inode_resume_unlocked_dio(inode); | 5050 | ext4_inode_resume_unlocked_dio(inode); |
5044 | return err; | 5051 | return err; |
5045 | } | 5052 | } |
5046 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 5053 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5047 | } | 5054 | } |
5048 | ext4_set_aops(inode); | 5055 | ext4_set_aops(inode); |
5049 | 5056 | ||
5050 | jbd2_journal_unlock_updates(journal); | 5057 | jbd2_journal_unlock_updates(journal); |
5051 | ext4_inode_resume_unlocked_dio(inode); | 5058 | ext4_inode_resume_unlocked_dio(inode); |
5052 | 5059 | ||
5053 | /* Finally we can mark the inode as dirty. */ | 5060 | /* Finally we can mark the inode as dirty. */ |
5054 | 5061 | ||
5055 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); | 5062 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); |
5056 | if (IS_ERR(handle)) | 5063 | if (IS_ERR(handle)) |
5057 | return PTR_ERR(handle); | 5064 | return PTR_ERR(handle); |
5058 | 5065 | ||
5059 | err = ext4_mark_inode_dirty(handle, inode); | 5066 | err = ext4_mark_inode_dirty(handle, inode); |
5060 | ext4_handle_sync(handle); | 5067 | ext4_handle_sync(handle); |
5061 | ext4_journal_stop(handle); | 5068 | ext4_journal_stop(handle); |
5062 | ext4_std_error(inode->i_sb, err); | 5069 | ext4_std_error(inode->i_sb, err); |
5063 | 5070 | ||
5064 | return err; | 5071 | return err; |
5065 | } | 5072 | } |
5066 | 5073 | ||
5067 | static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) | 5074 | static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) |
5068 | { | 5075 | { |
5069 | return !buffer_mapped(bh); | 5076 | return !buffer_mapped(bh); |
5070 | } | 5077 | } |
5071 | 5078 | ||
5072 | int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | 5079 | int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) |
5073 | { | 5080 | { |
5074 | struct page *page = vmf->page; | 5081 | struct page *page = vmf->page; |
5075 | loff_t size; | 5082 | loff_t size; |
5076 | unsigned long len; | 5083 | unsigned long len; |
5077 | int ret; | 5084 | int ret; |
5078 | struct file *file = vma->vm_file; | 5085 | struct file *file = vma->vm_file; |
5079 | struct inode *inode = file_inode(file); | 5086 | struct inode *inode = file_inode(file); |
5080 | struct address_space *mapping = inode->i_mapping; | 5087 | struct address_space *mapping = inode->i_mapping; |
5081 | handle_t *handle; | 5088 | handle_t *handle; |
5082 | get_block_t *get_block; | 5089 | get_block_t *get_block; |
5083 | int retries = 0; | 5090 | int retries = 0; |
5084 | 5091 | ||
5085 | sb_start_pagefault(inode->i_sb); | 5092 | sb_start_pagefault(inode->i_sb); |
5086 | file_update_time(vma->vm_file); | 5093 | file_update_time(vma->vm_file); |
5087 | /* Delalloc case is easy... */ | 5094 | /* Delalloc case is easy... */ |
5088 | if (test_opt(inode->i_sb, DELALLOC) && | 5095 | if (test_opt(inode->i_sb, DELALLOC) && |
5089 | !ext4_should_journal_data(inode) && | 5096 | !ext4_should_journal_data(inode) && |
5090 | !ext4_nonda_switch(inode->i_sb)) { | 5097 | !ext4_nonda_switch(inode->i_sb)) { |
5091 | do { | 5098 | do { |
5092 | ret = __block_page_mkwrite(vma, vmf, | 5099 | ret = __block_page_mkwrite(vma, vmf, |
5093 | ext4_da_get_block_prep); | 5100 | ext4_da_get_block_prep); |
5094 | } while (ret == -ENOSPC && | 5101 | } while (ret == -ENOSPC && |
5095 | ext4_should_retry_alloc(inode->i_sb, &retries)); | 5102 | ext4_should_retry_alloc(inode->i_sb, &retries)); |
5096 | goto out_ret; | 5103 | goto out_ret; |
5097 | } | 5104 | } |
5098 | 5105 | ||
5099 | lock_page(page); | 5106 | lock_page(page); |
5100 | size = i_size_read(inode); | 5107 | size = i_size_read(inode); |
5101 | /* Page got truncated from under us? */ | 5108 | /* Page got truncated from under us? */ |
5102 | if (page->mapping != mapping || page_offset(page) > size) { | 5109 | if (page->mapping != mapping || page_offset(page) > size) { |
5103 | unlock_page(page); | 5110 | unlock_page(page); |
5104 | ret = VM_FAULT_NOPAGE; | 5111 | ret = VM_FAULT_NOPAGE; |
5105 | goto out; | 5112 | goto out; |
5106 | } | 5113 | } |
5107 | 5114 | ||
5108 | if (page->index == size >> PAGE_CACHE_SHIFT) | 5115 | if (page->index == size >> PAGE_CACHE_SHIFT) |
5109 | len = size & ~PAGE_CACHE_MASK; | 5116 | len = size & ~PAGE_CACHE_MASK; |
5110 | else | 5117 | else |
5111 | len = PAGE_CACHE_SIZE; | 5118 | len = PAGE_CACHE_SIZE; |
5112 | /* | 5119 | /* |
5113 | * Return if we have all the buffers mapped. This avoids the need to do | 5120 | * Return if we have all the buffers mapped. This avoids the need to do |
5114 | * journal_start/journal_stop which can block and take a long time | 5121 | * journal_start/journal_stop which can block and take a long time |
5115 | */ | 5122 | */ |
5116 | if (page_has_buffers(page)) { | 5123 | if (page_has_buffers(page)) { |
5117 | if (!ext4_walk_page_buffers(NULL, page_buffers(page), | 5124 | if (!ext4_walk_page_buffers(NULL, page_buffers(page), |
5118 | 0, len, NULL, | 5125 | 0, len, NULL, |
5119 | ext4_bh_unmapped)) { | 5126 | ext4_bh_unmapped)) { |
5120 | /* Wait so that we don't change page under IO */ | 5127 | /* Wait so that we don't change page under IO */ |
5121 | wait_for_stable_page(page); | 5128 | wait_for_stable_page(page); |
5122 | ret = VM_FAULT_LOCKED; | 5129 | ret = VM_FAULT_LOCKED; |
5123 | goto out; | 5130 | goto out; |
5124 | } | 5131 | } |
5125 | } | 5132 | } |
5126 | unlock_page(page); | 5133 | unlock_page(page); |
5127 | /* OK, we need to fill the hole... */ | 5134 | /* OK, we need to fill the hole... */ |
5128 | if (ext4_should_dioread_nolock(inode)) | 5135 | if (ext4_should_dioread_nolock(inode)) |
5129 | get_block = ext4_get_block_write; | 5136 | get_block = ext4_get_block_write; |
5130 | else | 5137 | else |
5131 | get_block = ext4_get_block; | 5138 | get_block = ext4_get_block; |
5132 | retry_alloc: | 5139 | retry_alloc: |
5133 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, | 5140 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, |
5134 | ext4_writepage_trans_blocks(inode)); | 5141 | ext4_writepage_trans_blocks(inode)); |
5135 | if (IS_ERR(handle)) { | 5142 | if (IS_ERR(handle)) { |
5136 | ret = VM_FAULT_SIGBUS; | 5143 | ret = VM_FAULT_SIGBUS; |
5137 | goto out; | 5144 | goto out; |
5138 | } | 5145 | } |
5139 | ret = __block_page_mkwrite(vma, vmf, get_block); | 5146 | ret = __block_page_mkwrite(vma, vmf, get_block); |
5140 | if (!ret && ext4_should_journal_data(inode)) { | 5147 | if (!ret && ext4_should_journal_data(inode)) { |
5141 | if (ext4_walk_page_buffers(handle, page_buffers(page), 0, | 5148 | if (ext4_walk_page_buffers(handle, page_buffers(page), 0, |
5142 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | 5149 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { |
5143 | unlock_page(page); | 5150 | unlock_page(page); |
5144 | ret = VM_FAULT_SIGBUS; | 5151 | ret = VM_FAULT_SIGBUS; |
5145 | ext4_journal_stop(handle); | 5152 | ext4_journal_stop(handle); |
5146 | goto out; | 5153 | goto out; |
5147 | } | 5154 | } |
5148 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 5155 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
5149 | } | 5156 | } |
5150 | ext4_journal_stop(handle); | 5157 | ext4_journal_stop(handle); |
5151 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 5158 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
5152 | goto retry_alloc; | 5159 | goto retry_alloc; |
5153 | out_ret: | 5160 | out_ret: |
5154 | ret = block_page_mkwrite_return(ret); | 5161 | ret = block_page_mkwrite_return(ret); |
5155 | out: | 5162 | out: |
5156 | sb_end_pagefault(inode->i_sb); | 5163 | sb_end_pagefault(inode->i_sb); |
5157 | return ret; | 5164 | return ret; |
5158 | } | 5165 | } |
5159 | 5166 |
fs/ext4/super.c
1 | /* | 1 | /* |
2 | * linux/fs/ext4/super.c | 2 | * linux/fs/ext4/super.c |
3 | * | 3 | * |
4 | * Copyright (C) 1992, 1993, 1994, 1995 | 4 | * Copyright (C) 1992, 1993, 1994, 1995 |
5 | * Remy Card (card@masi.ibp.fr) | 5 | * Remy Card (card@masi.ibp.fr) |
6 | * Laboratoire MASI - Institut Blaise Pascal | 6 | * Laboratoire MASI - Institut Blaise Pascal |
7 | * Universite Pierre et Marie Curie (Paris VI) | 7 | * Universite Pierre et Marie Curie (Paris VI) |
8 | * | 8 | * |
9 | * from | 9 | * from |
10 | * | 10 | * |
11 | * linux/fs/minix/inode.c | 11 | * linux/fs/minix/inode.c |
12 | * | 12 | * |
13 | * Copyright (C) 1991, 1992 Linus Torvalds | 13 | * Copyright (C) 1991, 1992 Linus Torvalds |
14 | * | 14 | * |
15 | * Big-endian to little-endian byte-swapping/bitmaps by | 15 | * Big-endian to little-endian byte-swapping/bitmaps by |
16 | * David S. Miller (davem@caip.rutgers.edu), 1995 | 16 | * David S. Miller (davem@caip.rutgers.edu), 1995 |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/string.h> | 20 | #include <linux/string.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/time.h> | 22 | #include <linux/time.h> |
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/jbd2.h> | 24 | #include <linux/jbd2.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/blkdev.h> | 27 | #include <linux/blkdev.h> |
28 | #include <linux/parser.h> | 28 | #include <linux/parser.h> |
29 | #include <linux/buffer_head.h> | 29 | #include <linux/buffer_head.h> |
30 | #include <linux/exportfs.h> | 30 | #include <linux/exportfs.h> |
31 | #include <linux/vfs.h> | 31 | #include <linux/vfs.h> |
32 | #include <linux/random.h> | 32 | #include <linux/random.h> |
33 | #include <linux/mount.h> | 33 | #include <linux/mount.h> |
34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
35 | #include <linux/quotaops.h> | 35 | #include <linux/quotaops.h> |
36 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
37 | #include <linux/proc_fs.h> | 37 | #include <linux/proc_fs.h> |
38 | #include <linux/ctype.h> | 38 | #include <linux/ctype.h> |
39 | #include <linux/log2.h> | 39 | #include <linux/log2.h> |
40 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
41 | #include <linux/cleancache.h> | 41 | #include <linux/cleancache.h> |
42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
43 | 43 | ||
44 | #include <linux/kthread.h> | 44 | #include <linux/kthread.h> |
45 | #include <linux/freezer.h> | 45 | #include <linux/freezer.h> |
46 | 46 | ||
47 | #include "ext4.h" | 47 | #include "ext4.h" |
48 | #include "ext4_extents.h" /* Needed for trace points definition */ | 48 | #include "ext4_extents.h" /* Needed for trace points definition */ |
49 | #include "ext4_jbd2.h" | 49 | #include "ext4_jbd2.h" |
50 | #include "xattr.h" | 50 | #include "xattr.h" |
51 | #include "acl.h" | 51 | #include "acl.h" |
52 | #include "mballoc.h" | 52 | #include "mballoc.h" |
53 | 53 | ||
54 | #define CREATE_TRACE_POINTS | 54 | #define CREATE_TRACE_POINTS |
55 | #include <trace/events/ext4.h> | 55 | #include <trace/events/ext4.h> |
56 | 56 | ||
57 | static struct proc_dir_entry *ext4_proc_root; | 57 | static struct proc_dir_entry *ext4_proc_root; |
58 | static struct kset *ext4_kset; | 58 | static struct kset *ext4_kset; |
59 | static struct ext4_lazy_init *ext4_li_info; | 59 | static struct ext4_lazy_init *ext4_li_info; |
60 | static struct mutex ext4_li_mtx; | 60 | static struct mutex ext4_li_mtx; |
61 | static struct ext4_features *ext4_feat; | 61 | static struct ext4_features *ext4_feat; |
62 | static int ext4_mballoc_ready; | 62 | static int ext4_mballoc_ready; |
63 | 63 | ||
64 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 64 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
65 | unsigned long journal_devnum); | 65 | unsigned long journal_devnum); |
66 | static int ext4_show_options(struct seq_file *seq, struct dentry *root); | 66 | static int ext4_show_options(struct seq_file *seq, struct dentry *root); |
67 | static int ext4_commit_super(struct super_block *sb, int sync); | 67 | static int ext4_commit_super(struct super_block *sb, int sync); |
68 | static void ext4_mark_recovery_complete(struct super_block *sb, | 68 | static void ext4_mark_recovery_complete(struct super_block *sb, |
69 | struct ext4_super_block *es); | 69 | struct ext4_super_block *es); |
70 | static void ext4_clear_journal_err(struct super_block *sb, | 70 | static void ext4_clear_journal_err(struct super_block *sb, |
71 | struct ext4_super_block *es); | 71 | struct ext4_super_block *es); |
72 | static int ext4_sync_fs(struct super_block *sb, int wait); | 72 | static int ext4_sync_fs(struct super_block *sb, int wait); |
73 | static int ext4_remount(struct super_block *sb, int *flags, char *data); | 73 | static int ext4_remount(struct super_block *sb, int *flags, char *data); |
74 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | 74 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); |
75 | static int ext4_unfreeze(struct super_block *sb); | 75 | static int ext4_unfreeze(struct super_block *sb); |
76 | static int ext4_freeze(struct super_block *sb); | 76 | static int ext4_freeze(struct super_block *sb); |
77 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, | 77 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
78 | const char *dev_name, void *data); | 78 | const char *dev_name, void *data); |
79 | static inline int ext2_feature_set_ok(struct super_block *sb); | 79 | static inline int ext2_feature_set_ok(struct super_block *sb); |
80 | static inline int ext3_feature_set_ok(struct super_block *sb); | 80 | static inline int ext3_feature_set_ok(struct super_block *sb); |
81 | static int ext4_feature_set_ok(struct super_block *sb, int readonly); | 81 | static int ext4_feature_set_ok(struct super_block *sb, int readonly); |
82 | static void ext4_destroy_lazyinit_thread(void); | 82 | static void ext4_destroy_lazyinit_thread(void); |
83 | static void ext4_unregister_li_request(struct super_block *sb); | 83 | static void ext4_unregister_li_request(struct super_block *sb); |
84 | static void ext4_clear_request_list(void); | 84 | static void ext4_clear_request_list(void); |
85 | static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); | 85 | static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); |
86 | 86 | ||
87 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 87 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
88 | static struct file_system_type ext2_fs_type = { | 88 | static struct file_system_type ext2_fs_type = { |
89 | .owner = THIS_MODULE, | 89 | .owner = THIS_MODULE, |
90 | .name = "ext2", | 90 | .name = "ext2", |
91 | .mount = ext4_mount, | 91 | .mount = ext4_mount, |
92 | .kill_sb = kill_block_super, | 92 | .kill_sb = kill_block_super, |
93 | .fs_flags = FS_REQUIRES_DEV, | 93 | .fs_flags = FS_REQUIRES_DEV, |
94 | }; | 94 | }; |
95 | MODULE_ALIAS_FS("ext2"); | 95 | MODULE_ALIAS_FS("ext2"); |
96 | MODULE_ALIAS("ext2"); | 96 | MODULE_ALIAS("ext2"); |
97 | #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) | 97 | #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) |
98 | #else | 98 | #else |
99 | #define IS_EXT2_SB(sb) (0) | 99 | #define IS_EXT2_SB(sb) (0) |
100 | #endif | 100 | #endif |
101 | 101 | ||
102 | 102 | ||
103 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 103 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
104 | static struct file_system_type ext3_fs_type = { | 104 | static struct file_system_type ext3_fs_type = { |
105 | .owner = THIS_MODULE, | 105 | .owner = THIS_MODULE, |
106 | .name = "ext3", | 106 | .name = "ext3", |
107 | .mount = ext4_mount, | 107 | .mount = ext4_mount, |
108 | .kill_sb = kill_block_super, | 108 | .kill_sb = kill_block_super, |
109 | .fs_flags = FS_REQUIRES_DEV, | 109 | .fs_flags = FS_REQUIRES_DEV, |
110 | }; | 110 | }; |
111 | MODULE_ALIAS_FS("ext3"); | 111 | MODULE_ALIAS_FS("ext3"); |
112 | MODULE_ALIAS("ext3"); | 112 | MODULE_ALIAS("ext3"); |
113 | #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) | 113 | #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) |
114 | #else | 114 | #else |
115 | #define IS_EXT3_SB(sb) (0) | 115 | #define IS_EXT3_SB(sb) (0) |
116 | #endif | 116 | #endif |
117 | 117 | ||
118 | static int ext4_verify_csum_type(struct super_block *sb, | 118 | static int ext4_verify_csum_type(struct super_block *sb, |
119 | struct ext4_super_block *es) | 119 | struct ext4_super_block *es) |
120 | { | 120 | { |
121 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 121 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, |
122 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | 122 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) |
123 | return 1; | 123 | return 1; |
124 | 124 | ||
125 | return es->s_checksum_type == EXT4_CRC32C_CHKSUM; | 125 | return es->s_checksum_type == EXT4_CRC32C_CHKSUM; |
126 | } | 126 | } |
127 | 127 | ||
128 | static __le32 ext4_superblock_csum(struct super_block *sb, | 128 | static __le32 ext4_superblock_csum(struct super_block *sb, |
129 | struct ext4_super_block *es) | 129 | struct ext4_super_block *es) |
130 | { | 130 | { |
131 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 131 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
132 | int offset = offsetof(struct ext4_super_block, s_checksum); | 132 | int offset = offsetof(struct ext4_super_block, s_checksum); |
133 | __u32 csum; | 133 | __u32 csum; |
134 | 134 | ||
135 | csum = ext4_chksum(sbi, ~0, (char *)es, offset); | 135 | csum = ext4_chksum(sbi, ~0, (char *)es, offset); |
136 | 136 | ||
137 | return cpu_to_le32(csum); | 137 | return cpu_to_le32(csum); |
138 | } | 138 | } |
139 | 139 | ||
140 | static int ext4_superblock_csum_verify(struct super_block *sb, | 140 | static int ext4_superblock_csum_verify(struct super_block *sb, |
141 | struct ext4_super_block *es) | 141 | struct ext4_super_block *es) |
142 | { | 142 | { |
143 | if (!ext4_has_metadata_csum(sb)) | 143 | if (!ext4_has_metadata_csum(sb)) |
144 | return 1; | 144 | return 1; |
145 | 145 | ||
146 | return es->s_checksum == ext4_superblock_csum(sb, es); | 146 | return es->s_checksum == ext4_superblock_csum(sb, es); |
147 | } | 147 | } |
148 | 148 | ||
149 | void ext4_superblock_csum_set(struct super_block *sb) | 149 | void ext4_superblock_csum_set(struct super_block *sb) |
150 | { | 150 | { |
151 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 151 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
152 | 152 | ||
153 | if (!ext4_has_metadata_csum(sb)) | 153 | if (!ext4_has_metadata_csum(sb)) |
154 | return; | 154 | return; |
155 | 155 | ||
156 | es->s_checksum = ext4_superblock_csum(sb, es); | 156 | es->s_checksum = ext4_superblock_csum(sb, es); |
157 | } | 157 | } |
158 | 158 | ||
159 | void *ext4_kvmalloc(size_t size, gfp_t flags) | 159 | void *ext4_kvmalloc(size_t size, gfp_t flags) |
160 | { | 160 | { |
161 | void *ret; | 161 | void *ret; |
162 | 162 | ||
163 | ret = kmalloc(size, flags | __GFP_NOWARN); | 163 | ret = kmalloc(size, flags | __GFP_NOWARN); |
164 | if (!ret) | 164 | if (!ret) |
165 | ret = __vmalloc(size, flags, PAGE_KERNEL); | 165 | ret = __vmalloc(size, flags, PAGE_KERNEL); |
166 | return ret; | 166 | return ret; |
167 | } | 167 | } |
168 | 168 | ||
169 | void *ext4_kvzalloc(size_t size, gfp_t flags) | 169 | void *ext4_kvzalloc(size_t size, gfp_t flags) |
170 | { | 170 | { |
171 | void *ret; | 171 | void *ret; |
172 | 172 | ||
173 | ret = kzalloc(size, flags | __GFP_NOWARN); | 173 | ret = kzalloc(size, flags | __GFP_NOWARN); |
174 | if (!ret) | 174 | if (!ret) |
175 | ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); | 175 | ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); |
176 | return ret; | 176 | return ret; |
177 | } | 177 | } |
178 | 178 | ||
179 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, | 179 | ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, |
180 | struct ext4_group_desc *bg) | 180 | struct ext4_group_desc *bg) |
181 | { | 181 | { |
182 | return le32_to_cpu(bg->bg_block_bitmap_lo) | | 182 | return le32_to_cpu(bg->bg_block_bitmap_lo) | |
183 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 183 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
184 | (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); | 184 | (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); |
185 | } | 185 | } |
186 | 186 | ||
187 | ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, | 187 | ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, |
188 | struct ext4_group_desc *bg) | 188 | struct ext4_group_desc *bg) |
189 | { | 189 | { |
190 | return le32_to_cpu(bg->bg_inode_bitmap_lo) | | 190 | return le32_to_cpu(bg->bg_inode_bitmap_lo) | |
191 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 191 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
192 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); | 192 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); |
193 | } | 193 | } |
194 | 194 | ||
195 | ext4_fsblk_t ext4_inode_table(struct super_block *sb, | 195 | ext4_fsblk_t ext4_inode_table(struct super_block *sb, |
196 | struct ext4_group_desc *bg) | 196 | struct ext4_group_desc *bg) |
197 | { | 197 | { |
198 | return le32_to_cpu(bg->bg_inode_table_lo) | | 198 | return le32_to_cpu(bg->bg_inode_table_lo) | |
199 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 199 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
200 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); | 200 | (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); |
201 | } | 201 | } |
202 | 202 | ||
203 | __u32 ext4_free_group_clusters(struct super_block *sb, | 203 | __u32 ext4_free_group_clusters(struct super_block *sb, |
204 | struct ext4_group_desc *bg) | 204 | struct ext4_group_desc *bg) |
205 | { | 205 | { |
206 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | | 206 | return le16_to_cpu(bg->bg_free_blocks_count_lo) | |
207 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 207 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
208 | (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); | 208 | (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); |
209 | } | 209 | } |
210 | 210 | ||
211 | __u32 ext4_free_inodes_count(struct super_block *sb, | 211 | __u32 ext4_free_inodes_count(struct super_block *sb, |
212 | struct ext4_group_desc *bg) | 212 | struct ext4_group_desc *bg) |
213 | { | 213 | { |
214 | return le16_to_cpu(bg->bg_free_inodes_count_lo) | | 214 | return le16_to_cpu(bg->bg_free_inodes_count_lo) | |
215 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 215 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
216 | (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); | 216 | (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); |
217 | } | 217 | } |
218 | 218 | ||
219 | __u32 ext4_used_dirs_count(struct super_block *sb, | 219 | __u32 ext4_used_dirs_count(struct super_block *sb, |
220 | struct ext4_group_desc *bg) | 220 | struct ext4_group_desc *bg) |
221 | { | 221 | { |
222 | return le16_to_cpu(bg->bg_used_dirs_count_lo) | | 222 | return le16_to_cpu(bg->bg_used_dirs_count_lo) | |
223 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 223 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
224 | (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); | 224 | (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); |
225 | } | 225 | } |
226 | 226 | ||
227 | __u32 ext4_itable_unused_count(struct super_block *sb, | 227 | __u32 ext4_itable_unused_count(struct super_block *sb, |
228 | struct ext4_group_desc *bg) | 228 | struct ext4_group_desc *bg) |
229 | { | 229 | { |
230 | return le16_to_cpu(bg->bg_itable_unused_lo) | | 230 | return le16_to_cpu(bg->bg_itable_unused_lo) | |
231 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? | 231 | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? |
232 | (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); | 232 | (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); |
233 | } | 233 | } |
234 | 234 | ||
235 | void ext4_block_bitmap_set(struct super_block *sb, | 235 | void ext4_block_bitmap_set(struct super_block *sb, |
236 | struct ext4_group_desc *bg, ext4_fsblk_t blk) | 236 | struct ext4_group_desc *bg, ext4_fsblk_t blk) |
237 | { | 237 | { |
238 | bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); | 238 | bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk); |
239 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) | 239 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) |
240 | bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); | 240 | bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32); |
241 | } | 241 | } |
242 | 242 | ||
243 | void ext4_inode_bitmap_set(struct super_block *sb, | 243 | void ext4_inode_bitmap_set(struct super_block *sb, |
244 | struct ext4_group_desc *bg, ext4_fsblk_t blk) | 244 | struct ext4_group_desc *bg, ext4_fsblk_t blk) |
245 | { | 245 | { |
246 | bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); | 246 | bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk); |
247 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) | 247 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) |
248 | bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); | 248 | bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32); |
249 | } | 249 | } |
250 | 250 | ||
251 | void ext4_inode_table_set(struct super_block *sb, | 251 | void ext4_inode_table_set(struct super_block *sb, |
252 | struct ext4_group_desc *bg, ext4_fsblk_t blk) | 252 | struct ext4_group_desc *bg, ext4_fsblk_t blk) |
253 | { | 253 | { |
254 | bg->bg_inode_table_lo = cpu_to_le32((u32)blk); | 254 | bg->bg_inode_table_lo = cpu_to_le32((u32)blk); |
255 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) | 255 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) |
256 | bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); | 256 | bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); |
257 | } | 257 | } |
258 | 258 | ||
259 | void ext4_free_group_clusters_set(struct super_block *sb, | 259 | void ext4_free_group_clusters_set(struct super_block *sb, |
260 | struct ext4_group_desc *bg, __u32 count) | 260 | struct ext4_group_desc *bg, __u32 count) |
261 | { | 261 | { |
262 | bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); | 262 | bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); |
263 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) | 263 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) |
264 | bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); | 264 | bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16); |
265 | } | 265 | } |
266 | 266 | ||
267 | void ext4_free_inodes_set(struct super_block *sb, | 267 | void ext4_free_inodes_set(struct super_block *sb, |
268 | struct ext4_group_desc *bg, __u32 count) | 268 | struct ext4_group_desc *bg, __u32 count) |
269 | { | 269 | { |
270 | bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); | 270 | bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count); |
271 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) | 271 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) |
272 | bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); | 272 | bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16); |
273 | } | 273 | } |
274 | 274 | ||
275 | void ext4_used_dirs_set(struct super_block *sb, | 275 | void ext4_used_dirs_set(struct super_block *sb, |
276 | struct ext4_group_desc *bg, __u32 count) | 276 | struct ext4_group_desc *bg, __u32 count) |
277 | { | 277 | { |
278 | bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); | 278 | bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count); |
279 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) | 279 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) |
280 | bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); | 280 | bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16); |
281 | } | 281 | } |
282 | 282 | ||
283 | void ext4_itable_unused_set(struct super_block *sb, | 283 | void ext4_itable_unused_set(struct super_block *sb, |
284 | struct ext4_group_desc *bg, __u32 count) | 284 | struct ext4_group_desc *bg, __u32 count) |
285 | { | 285 | { |
286 | bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); | 286 | bg->bg_itable_unused_lo = cpu_to_le16((__u16)count); |
287 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) | 287 | if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) |
288 | bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); | 288 | bg->bg_itable_unused_hi = cpu_to_le16(count >> 16); |
289 | } | 289 | } |
290 | 290 | ||
291 | 291 | ||
292 | static void __save_error_info(struct super_block *sb, const char *func, | 292 | static void __save_error_info(struct super_block *sb, const char *func, |
293 | unsigned int line) | 293 | unsigned int line) |
294 | { | 294 | { |
295 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 295 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
296 | 296 | ||
297 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 297 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
298 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 298 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
299 | es->s_last_error_time = cpu_to_le32(get_seconds()); | 299 | es->s_last_error_time = cpu_to_le32(get_seconds()); |
300 | strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); | 300 | strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); |
301 | es->s_last_error_line = cpu_to_le32(line); | 301 | es->s_last_error_line = cpu_to_le32(line); |
302 | if (!es->s_first_error_time) { | 302 | if (!es->s_first_error_time) { |
303 | es->s_first_error_time = es->s_last_error_time; | 303 | es->s_first_error_time = es->s_last_error_time; |
304 | strncpy(es->s_first_error_func, func, | 304 | strncpy(es->s_first_error_func, func, |
305 | sizeof(es->s_first_error_func)); | 305 | sizeof(es->s_first_error_func)); |
306 | es->s_first_error_line = cpu_to_le32(line); | 306 | es->s_first_error_line = cpu_to_le32(line); |
307 | es->s_first_error_ino = es->s_last_error_ino; | 307 | es->s_first_error_ino = es->s_last_error_ino; |
308 | es->s_first_error_block = es->s_last_error_block; | 308 | es->s_first_error_block = es->s_last_error_block; |
309 | } | 309 | } |
310 | /* | 310 | /* |
311 | * Start the daily error reporting function if it hasn't been | 311 | * Start the daily error reporting function if it hasn't been |
312 | * started already | 312 | * started already |
313 | */ | 313 | */ |
314 | if (!es->s_error_count) | 314 | if (!es->s_error_count) |
315 | mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); | 315 | mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); |
316 | le32_add_cpu(&es->s_error_count, 1); | 316 | le32_add_cpu(&es->s_error_count, 1); |
317 | } | 317 | } |
318 | 318 | ||
319 | static void save_error_info(struct super_block *sb, const char *func, | 319 | static void save_error_info(struct super_block *sb, const char *func, |
320 | unsigned int line) | 320 | unsigned int line) |
321 | { | 321 | { |
322 | __save_error_info(sb, func, line); | 322 | __save_error_info(sb, func, line); |
323 | ext4_commit_super(sb, 1); | 323 | ext4_commit_super(sb, 1); |
324 | } | 324 | } |
325 | 325 | ||
326 | /* | 326 | /* |
327 | * The del_gendisk() function uninitializes the disk-specific data | 327 | * The del_gendisk() function uninitializes the disk-specific data |
328 | * structures, including the bdi structure, without telling anyone | 328 | * structures, including the bdi structure, without telling anyone |
329 | * else. Once this happens, any attempt to call mark_buffer_dirty() | 329 | * else. Once this happens, any attempt to call mark_buffer_dirty() |
330 | * (for example, by ext4_commit_super), will cause a kernel OOPS. | 330 | * (for example, by ext4_commit_super), will cause a kernel OOPS. |
331 | * This is a kludge to prevent these oops until we can put in a proper | 331 | * This is a kludge to prevent these oops until we can put in a proper |
332 | * hook in del_gendisk() to inform the VFS and file system layers. | 332 | * hook in del_gendisk() to inform the VFS and file system layers. |
333 | */ | 333 | */ |
334 | static int block_device_ejected(struct super_block *sb) | 334 | static int block_device_ejected(struct super_block *sb) |
335 | { | 335 | { |
336 | struct inode *bd_inode = sb->s_bdev->bd_inode; | 336 | struct inode *bd_inode = sb->s_bdev->bd_inode; |
337 | struct backing_dev_info *bdi = inode_to_bdi(bd_inode); | 337 | struct backing_dev_info *bdi = inode_to_bdi(bd_inode); |
338 | 338 | ||
339 | return bdi->dev == NULL; | 339 | return bdi->dev == NULL; |
340 | } | 340 | } |
341 | 341 | ||
342 | static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) | 342 | static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) |
343 | { | 343 | { |
344 | struct super_block *sb = journal->j_private; | 344 | struct super_block *sb = journal->j_private; |
345 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 345 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
346 | int error = is_journal_aborted(journal); | 346 | int error = is_journal_aborted(journal); |
347 | struct ext4_journal_cb_entry *jce; | 347 | struct ext4_journal_cb_entry *jce; |
348 | 348 | ||
349 | BUG_ON(txn->t_state == T_FINISHED); | 349 | BUG_ON(txn->t_state == T_FINISHED); |
350 | spin_lock(&sbi->s_md_lock); | 350 | spin_lock(&sbi->s_md_lock); |
351 | while (!list_empty(&txn->t_private_list)) { | 351 | while (!list_empty(&txn->t_private_list)) { |
352 | jce = list_entry(txn->t_private_list.next, | 352 | jce = list_entry(txn->t_private_list.next, |
353 | struct ext4_journal_cb_entry, jce_list); | 353 | struct ext4_journal_cb_entry, jce_list); |
354 | list_del_init(&jce->jce_list); | 354 | list_del_init(&jce->jce_list); |
355 | spin_unlock(&sbi->s_md_lock); | 355 | spin_unlock(&sbi->s_md_lock); |
356 | jce->jce_func(sb, jce, error); | 356 | jce->jce_func(sb, jce, error); |
357 | spin_lock(&sbi->s_md_lock); | 357 | spin_lock(&sbi->s_md_lock); |
358 | } | 358 | } |
359 | spin_unlock(&sbi->s_md_lock); | 359 | spin_unlock(&sbi->s_md_lock); |
360 | } | 360 | } |
361 | 361 | ||
362 | /* Deal with the reporting of failure conditions on a filesystem such as | 362 | /* Deal with the reporting of failure conditions on a filesystem such as |
363 | * inconsistencies detected or read IO failures. | 363 | * inconsistencies detected or read IO failures. |
364 | * | 364 | * |
365 | * On ext2, we can store the error state of the filesystem in the | 365 | * On ext2, we can store the error state of the filesystem in the |
366 | * superblock. That is not possible on ext4, because we may have other | 366 | * superblock. That is not possible on ext4, because we may have other |
367 | * write ordering constraints on the superblock which prevent us from | 367 | * write ordering constraints on the superblock which prevent us from |
368 | * writing it out straight away; and given that the journal is about to | 368 | * writing it out straight away; and given that the journal is about to |
369 | * be aborted, we can't rely on the current, or future, transactions to | 369 | * be aborted, we can't rely on the current, or future, transactions to |
370 | * write out the superblock safely. | 370 | * write out the superblock safely. |
371 | * | 371 | * |
372 | * We'll just use the jbd2_journal_abort() error code to record an error in | 372 | * We'll just use the jbd2_journal_abort() error code to record an error in |
373 | * the journal instead. On recovery, the journal will complain about | 373 | * the journal instead. On recovery, the journal will complain about |
374 | * that error until we've noted it down and cleared it. | 374 | * that error until we've noted it down and cleared it. |
375 | */ | 375 | */ |
376 | 376 | ||
377 | static void ext4_handle_error(struct super_block *sb) | 377 | static void ext4_handle_error(struct super_block *sb) |
378 | { | 378 | { |
379 | if (sb->s_flags & MS_RDONLY) | 379 | if (sb->s_flags & MS_RDONLY) |
380 | return; | 380 | return; |
381 | 381 | ||
382 | if (!test_opt(sb, ERRORS_CONT)) { | 382 | if (!test_opt(sb, ERRORS_CONT)) { |
383 | journal_t *journal = EXT4_SB(sb)->s_journal; | 383 | journal_t *journal = EXT4_SB(sb)->s_journal; |
384 | 384 | ||
385 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | 385 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; |
386 | if (journal) | 386 | if (journal) |
387 | jbd2_journal_abort(journal, -EIO); | 387 | jbd2_journal_abort(journal, -EIO); |
388 | } | 388 | } |
389 | if (test_opt(sb, ERRORS_RO)) { | 389 | if (test_opt(sb, ERRORS_RO)) { |
390 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 390 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
391 | /* | 391 | /* |
392 | * Make sure updated value of ->s_mount_flags will be visible | 392 | * Make sure updated value of ->s_mount_flags will be visible |
393 | * before ->s_flags update | 393 | * before ->s_flags update |
394 | */ | 394 | */ |
395 | smp_wmb(); | 395 | smp_wmb(); |
396 | sb->s_flags |= MS_RDONLY; | 396 | sb->s_flags |= MS_RDONLY; |
397 | } | 397 | } |
398 | if (test_opt(sb, ERRORS_PANIC)) | 398 | if (test_opt(sb, ERRORS_PANIC)) |
399 | panic("EXT4-fs (device %s): panic forced after error\n", | 399 | panic("EXT4-fs (device %s): panic forced after error\n", |
400 | sb->s_id); | 400 | sb->s_id); |
401 | } | 401 | } |
402 | 402 | ||
403 | #define ext4_error_ratelimit(sb) \ | 403 | #define ext4_error_ratelimit(sb) \ |
404 | ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ | 404 | ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ |
405 | "EXT4-fs error") | 405 | "EXT4-fs error") |
406 | 406 | ||
407 | void __ext4_error(struct super_block *sb, const char *function, | 407 | void __ext4_error(struct super_block *sb, const char *function, |
408 | unsigned int line, const char *fmt, ...) | 408 | unsigned int line, const char *fmt, ...) |
409 | { | 409 | { |
410 | struct va_format vaf; | 410 | struct va_format vaf; |
411 | va_list args; | 411 | va_list args; |
412 | 412 | ||
413 | if (ext4_error_ratelimit(sb)) { | 413 | if (ext4_error_ratelimit(sb)) { |
414 | va_start(args, fmt); | 414 | va_start(args, fmt); |
415 | vaf.fmt = fmt; | 415 | vaf.fmt = fmt; |
416 | vaf.va = &args; | 416 | vaf.va = &args; |
417 | printk(KERN_CRIT | 417 | printk(KERN_CRIT |
418 | "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", | 418 | "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", |
419 | sb->s_id, function, line, current->comm, &vaf); | 419 | sb->s_id, function, line, current->comm, &vaf); |
420 | va_end(args); | 420 | va_end(args); |
421 | } | 421 | } |
422 | save_error_info(sb, function, line); | 422 | save_error_info(sb, function, line); |
423 | ext4_handle_error(sb); | 423 | ext4_handle_error(sb); |
424 | } | 424 | } |
425 | 425 | ||
426 | void __ext4_error_inode(struct inode *inode, const char *function, | 426 | void __ext4_error_inode(struct inode *inode, const char *function, |
427 | unsigned int line, ext4_fsblk_t block, | 427 | unsigned int line, ext4_fsblk_t block, |
428 | const char *fmt, ...) | 428 | const char *fmt, ...) |
429 | { | 429 | { |
430 | va_list args; | 430 | va_list args; |
431 | struct va_format vaf; | 431 | struct va_format vaf; |
432 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | 432 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; |
433 | 433 | ||
434 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); | 434 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); |
435 | es->s_last_error_block = cpu_to_le64(block); | 435 | es->s_last_error_block = cpu_to_le64(block); |
436 | if (ext4_error_ratelimit(inode->i_sb)) { | 436 | if (ext4_error_ratelimit(inode->i_sb)) { |
437 | va_start(args, fmt); | 437 | va_start(args, fmt); |
438 | vaf.fmt = fmt; | 438 | vaf.fmt = fmt; |
439 | vaf.va = &args; | 439 | vaf.va = &args; |
440 | if (block) | 440 | if (block) |
441 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " | 441 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " |
442 | "inode #%lu: block %llu: comm %s: %pV\n", | 442 | "inode #%lu: block %llu: comm %s: %pV\n", |
443 | inode->i_sb->s_id, function, line, inode->i_ino, | 443 | inode->i_sb->s_id, function, line, inode->i_ino, |
444 | block, current->comm, &vaf); | 444 | block, current->comm, &vaf); |
445 | else | 445 | else |
446 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " | 446 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " |
447 | "inode #%lu: comm %s: %pV\n", | 447 | "inode #%lu: comm %s: %pV\n", |
448 | inode->i_sb->s_id, function, line, inode->i_ino, | 448 | inode->i_sb->s_id, function, line, inode->i_ino, |
449 | current->comm, &vaf); | 449 | current->comm, &vaf); |
450 | va_end(args); | 450 | va_end(args); |
451 | } | 451 | } |
452 | save_error_info(inode->i_sb, function, line); | 452 | save_error_info(inode->i_sb, function, line); |
453 | ext4_handle_error(inode->i_sb); | 453 | ext4_handle_error(inode->i_sb); |
454 | } | 454 | } |
455 | 455 | ||
456 | void __ext4_error_file(struct file *file, const char *function, | 456 | void __ext4_error_file(struct file *file, const char *function, |
457 | unsigned int line, ext4_fsblk_t block, | 457 | unsigned int line, ext4_fsblk_t block, |
458 | const char *fmt, ...) | 458 | const char *fmt, ...) |
459 | { | 459 | { |
460 | va_list args; | 460 | va_list args; |
461 | struct va_format vaf; | 461 | struct va_format vaf; |
462 | struct ext4_super_block *es; | 462 | struct ext4_super_block *es; |
463 | struct inode *inode = file_inode(file); | 463 | struct inode *inode = file_inode(file); |
464 | char pathname[80], *path; | 464 | char pathname[80], *path; |
465 | 465 | ||
466 | es = EXT4_SB(inode->i_sb)->s_es; | 466 | es = EXT4_SB(inode->i_sb)->s_es; |
467 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); | 467 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); |
468 | if (ext4_error_ratelimit(inode->i_sb)) { | 468 | if (ext4_error_ratelimit(inode->i_sb)) { |
469 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); | 469 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); |
470 | if (IS_ERR(path)) | 470 | if (IS_ERR(path)) |
471 | path = "(unknown)"; | 471 | path = "(unknown)"; |
472 | va_start(args, fmt); | 472 | va_start(args, fmt); |
473 | vaf.fmt = fmt; | 473 | vaf.fmt = fmt; |
474 | vaf.va = &args; | 474 | vaf.va = &args; |
475 | if (block) | 475 | if (block) |
476 | printk(KERN_CRIT | 476 | printk(KERN_CRIT |
477 | "EXT4-fs error (device %s): %s:%d: inode #%lu: " | 477 | "EXT4-fs error (device %s): %s:%d: inode #%lu: " |
478 | "block %llu: comm %s: path %s: %pV\n", | 478 | "block %llu: comm %s: path %s: %pV\n", |
479 | inode->i_sb->s_id, function, line, inode->i_ino, | 479 | inode->i_sb->s_id, function, line, inode->i_ino, |
480 | block, current->comm, path, &vaf); | 480 | block, current->comm, path, &vaf); |
481 | else | 481 | else |
482 | printk(KERN_CRIT | 482 | printk(KERN_CRIT |
483 | "EXT4-fs error (device %s): %s:%d: inode #%lu: " | 483 | "EXT4-fs error (device %s): %s:%d: inode #%lu: " |
484 | "comm %s: path %s: %pV\n", | 484 | "comm %s: path %s: %pV\n", |
485 | inode->i_sb->s_id, function, line, inode->i_ino, | 485 | inode->i_sb->s_id, function, line, inode->i_ino, |
486 | current->comm, path, &vaf); | 486 | current->comm, path, &vaf); |
487 | va_end(args); | 487 | va_end(args); |
488 | } | 488 | } |
489 | save_error_info(inode->i_sb, function, line); | 489 | save_error_info(inode->i_sb, function, line); |
490 | ext4_handle_error(inode->i_sb); | 490 | ext4_handle_error(inode->i_sb); |
491 | } | 491 | } |
492 | 492 | ||
493 | const char *ext4_decode_error(struct super_block *sb, int errno, | 493 | const char *ext4_decode_error(struct super_block *sb, int errno, |
494 | char nbuf[16]) | 494 | char nbuf[16]) |
495 | { | 495 | { |
496 | char *errstr = NULL; | 496 | char *errstr = NULL; |
497 | 497 | ||
498 | switch (errno) { | 498 | switch (errno) { |
499 | case -EIO: | 499 | case -EIO: |
500 | errstr = "IO failure"; | 500 | errstr = "IO failure"; |
501 | break; | 501 | break; |
502 | case -ENOMEM: | 502 | case -ENOMEM: |
503 | errstr = "Out of memory"; | 503 | errstr = "Out of memory"; |
504 | break; | 504 | break; |
505 | case -EROFS: | 505 | case -EROFS: |
506 | if (!sb || (EXT4_SB(sb)->s_journal && | 506 | if (!sb || (EXT4_SB(sb)->s_journal && |
507 | EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) | 507 | EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) |
508 | errstr = "Journal has aborted"; | 508 | errstr = "Journal has aborted"; |
509 | else | 509 | else |
510 | errstr = "Readonly filesystem"; | 510 | errstr = "Readonly filesystem"; |
511 | break; | 511 | break; |
512 | default: | 512 | default: |
513 | /* If the caller passed in an extra buffer for unknown | 513 | /* If the caller passed in an extra buffer for unknown |
514 | * errors, textualise them now. Else we just return | 514 | * errors, textualise them now. Else we just return |
515 | * NULL. */ | 515 | * NULL. */ |
516 | if (nbuf) { | 516 | if (nbuf) { |
517 | /* Check for truncated error codes... */ | 517 | /* Check for truncated error codes... */ |
518 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) | 518 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) |
519 | errstr = nbuf; | 519 | errstr = nbuf; |
520 | } | 520 | } |
521 | break; | 521 | break; |
522 | } | 522 | } |
523 | 523 | ||
524 | return errstr; | 524 | return errstr; |
525 | } | 525 | } |
526 | 526 | ||
527 | /* __ext4_std_error decodes expected errors from journaling functions | 527 | /* __ext4_std_error decodes expected errors from journaling functions |
528 | * automatically and invokes the appropriate error response. */ | 528 | * automatically and invokes the appropriate error response. */ |
529 | 529 | ||
530 | void __ext4_std_error(struct super_block *sb, const char *function, | 530 | void __ext4_std_error(struct super_block *sb, const char *function, |
531 | unsigned int line, int errno) | 531 | unsigned int line, int errno) |
532 | { | 532 | { |
533 | char nbuf[16]; | 533 | char nbuf[16]; |
534 | const char *errstr; | 534 | const char *errstr; |
535 | 535 | ||
536 | /* Special case: if the error is EROFS, and we're not already | 536 | /* Special case: if the error is EROFS, and we're not already |
537 | * inside a transaction, then there's really no point in logging | 537 | * inside a transaction, then there's really no point in logging |
538 | * an error. */ | 538 | * an error. */ |
539 | if (errno == -EROFS && journal_current_handle() == NULL && | 539 | if (errno == -EROFS && journal_current_handle() == NULL && |
540 | (sb->s_flags & MS_RDONLY)) | 540 | (sb->s_flags & MS_RDONLY)) |
541 | return; | 541 | return; |
542 | 542 | ||
543 | if (ext4_error_ratelimit(sb)) { | 543 | if (ext4_error_ratelimit(sb)) { |
544 | errstr = ext4_decode_error(sb, errno, nbuf); | 544 | errstr = ext4_decode_error(sb, errno, nbuf); |
545 | printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", | 545 | printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", |
546 | sb->s_id, function, line, errstr); | 546 | sb->s_id, function, line, errstr); |
547 | } | 547 | } |
548 | 548 | ||
549 | save_error_info(sb, function, line); | 549 | save_error_info(sb, function, line); |
550 | ext4_handle_error(sb); | 550 | ext4_handle_error(sb); |
551 | } | 551 | } |
552 | 552 | ||
553 | /* | 553 | /* |
554 | * ext4_abort is a much stronger failure handler than ext4_error. The | 554 | * ext4_abort is a much stronger failure handler than ext4_error. The |
555 | * abort function may be used to deal with unrecoverable failures such | 555 | * abort function may be used to deal with unrecoverable failures such |
556 | * as journal IO errors or ENOMEM at a critical moment in log management. | 556 | * as journal IO errors or ENOMEM at a critical moment in log management. |
557 | * | 557 | * |
558 | * We unconditionally force the filesystem into an ABORT|READONLY state, | 558 | * We unconditionally force the filesystem into an ABORT|READONLY state, |
559 | * unless the error response on the fs has been set to panic in which | 559 | * unless the error response on the fs has been set to panic in which |
560 | * case we take the easy way out and panic immediately. | 560 | * case we take the easy way out and panic immediately. |
561 | */ | 561 | */ |
562 | 562 | ||
563 | void __ext4_abort(struct super_block *sb, const char *function, | 563 | void __ext4_abort(struct super_block *sb, const char *function, |
564 | unsigned int line, const char *fmt, ...) | 564 | unsigned int line, const char *fmt, ...) |
565 | { | 565 | { |
566 | va_list args; | 566 | va_list args; |
567 | 567 | ||
568 | save_error_info(sb, function, line); | 568 | save_error_info(sb, function, line); |
569 | va_start(args, fmt); | 569 | va_start(args, fmt); |
570 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, | 570 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, |
571 | function, line); | 571 | function, line); |
572 | vprintk(fmt, args); | 572 | vprintk(fmt, args); |
573 | printk("\n"); | 573 | printk("\n"); |
574 | va_end(args); | 574 | va_end(args); |
575 | 575 | ||
576 | if ((sb->s_flags & MS_RDONLY) == 0) { | 576 | if ((sb->s_flags & MS_RDONLY) == 0) { |
577 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 577 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
578 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | 578 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; |
579 | /* | 579 | /* |
580 | * Make sure updated value of ->s_mount_flags will be visible | 580 | * Make sure updated value of ->s_mount_flags will be visible |
581 | * before ->s_flags update | 581 | * before ->s_flags update |
582 | */ | 582 | */ |
583 | smp_wmb(); | 583 | smp_wmb(); |
584 | sb->s_flags |= MS_RDONLY; | 584 | sb->s_flags |= MS_RDONLY; |
585 | if (EXT4_SB(sb)->s_journal) | 585 | if (EXT4_SB(sb)->s_journal) |
586 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | 586 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); |
587 | save_error_info(sb, function, line); | 587 | save_error_info(sb, function, line); |
588 | } | 588 | } |
589 | if (test_opt(sb, ERRORS_PANIC)) | 589 | if (test_opt(sb, ERRORS_PANIC)) |
590 | panic("EXT4-fs panic from previous error\n"); | 590 | panic("EXT4-fs panic from previous error\n"); |
591 | } | 591 | } |
592 | 592 | ||
593 | void __ext4_msg(struct super_block *sb, | 593 | void __ext4_msg(struct super_block *sb, |
594 | const char *prefix, const char *fmt, ...) | 594 | const char *prefix, const char *fmt, ...) |
595 | { | 595 | { |
596 | struct va_format vaf; | 596 | struct va_format vaf; |
597 | va_list args; | 597 | va_list args; |
598 | 598 | ||
599 | if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) | 599 | if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) |
600 | return; | 600 | return; |
601 | 601 | ||
602 | va_start(args, fmt); | 602 | va_start(args, fmt); |
603 | vaf.fmt = fmt; | 603 | vaf.fmt = fmt; |
604 | vaf.va = &args; | 604 | vaf.va = &args; |
605 | printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); | 605 | printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf); |
606 | va_end(args); | 606 | va_end(args); |
607 | } | 607 | } |
608 | 608 | ||
609 | void __ext4_warning(struct super_block *sb, const char *function, | 609 | void __ext4_warning(struct super_block *sb, const char *function, |
610 | unsigned int line, const char *fmt, ...) | 610 | unsigned int line, const char *fmt, ...) |
611 | { | 611 | { |
612 | struct va_format vaf; | 612 | struct va_format vaf; |
613 | va_list args; | 613 | va_list args; |
614 | 614 | ||
615 | if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), | 615 | if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), |
616 | "EXT4-fs warning")) | 616 | "EXT4-fs warning")) |
617 | return; | 617 | return; |
618 | 618 | ||
619 | va_start(args, fmt); | 619 | va_start(args, fmt); |
620 | vaf.fmt = fmt; | 620 | vaf.fmt = fmt; |
621 | vaf.va = &args; | 621 | vaf.va = &args; |
622 | printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", | 622 | printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", |
623 | sb->s_id, function, line, &vaf); | 623 | sb->s_id, function, line, &vaf); |
624 | va_end(args); | 624 | va_end(args); |
625 | } | 625 | } |
626 | 626 | ||
627 | void __ext4_grp_locked_error(const char *function, unsigned int line, | 627 | void __ext4_grp_locked_error(const char *function, unsigned int line, |
628 | struct super_block *sb, ext4_group_t grp, | 628 | struct super_block *sb, ext4_group_t grp, |
629 | unsigned long ino, ext4_fsblk_t block, | 629 | unsigned long ino, ext4_fsblk_t block, |
630 | const char *fmt, ...) | 630 | const char *fmt, ...) |
631 | __releases(bitlock) | 631 | __releases(bitlock) |
632 | __acquires(bitlock) | 632 | __acquires(bitlock) |
633 | { | 633 | { |
634 | struct va_format vaf; | 634 | struct va_format vaf; |
635 | va_list args; | 635 | va_list args; |
636 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 636 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
637 | 637 | ||
638 | es->s_last_error_ino = cpu_to_le32(ino); | 638 | es->s_last_error_ino = cpu_to_le32(ino); |
639 | es->s_last_error_block = cpu_to_le64(block); | 639 | es->s_last_error_block = cpu_to_le64(block); |
640 | __save_error_info(sb, function, line); | 640 | __save_error_info(sb, function, line); |
641 | 641 | ||
642 | if (ext4_error_ratelimit(sb)) { | 642 | if (ext4_error_ratelimit(sb)) { |
643 | va_start(args, fmt); | 643 | va_start(args, fmt); |
644 | vaf.fmt = fmt; | 644 | vaf.fmt = fmt; |
645 | vaf.va = &args; | 645 | vaf.va = &args; |
646 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", | 646 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", |
647 | sb->s_id, function, line, grp); | 647 | sb->s_id, function, line, grp); |
648 | if (ino) | 648 | if (ino) |
649 | printk(KERN_CONT "inode %lu: ", ino); | 649 | printk(KERN_CONT "inode %lu: ", ino); |
650 | if (block) | 650 | if (block) |
651 | printk(KERN_CONT "block %llu:", | 651 | printk(KERN_CONT "block %llu:", |
652 | (unsigned long long) block); | 652 | (unsigned long long) block); |
653 | printk(KERN_CONT "%pV\n", &vaf); | 653 | printk(KERN_CONT "%pV\n", &vaf); |
654 | va_end(args); | 654 | va_end(args); |
655 | } | 655 | } |
656 | 656 | ||
657 | if (test_opt(sb, ERRORS_CONT)) { | 657 | if (test_opt(sb, ERRORS_CONT)) { |
658 | ext4_commit_super(sb, 0); | 658 | ext4_commit_super(sb, 0); |
659 | return; | 659 | return; |
660 | } | 660 | } |
661 | 661 | ||
662 | ext4_unlock_group(sb, grp); | 662 | ext4_unlock_group(sb, grp); |
663 | ext4_handle_error(sb); | 663 | ext4_handle_error(sb); |
664 | /* | 664 | /* |
665 | * We only get here in the ERRORS_RO case; relocking the group | 665 | * We only get here in the ERRORS_RO case; relocking the group |
666 | * may be dangerous, but nothing bad will happen since the | 666 | * may be dangerous, but nothing bad will happen since the |
667 | * filesystem will have already been marked read/only and the | 667 | * filesystem will have already been marked read/only and the |
668 | * journal has been aborted. We return 1 as a hint to callers | 668 | * journal has been aborted. We return 1 as a hint to callers |
669 | * who might what to use the return value from | 669 | * who might what to use the return value from |
670 | * ext4_grp_locked_error() to distinguish between the | 670 | * ext4_grp_locked_error() to distinguish between the |
671 | * ERRORS_CONT and ERRORS_RO case, and perhaps return more | 671 | * ERRORS_CONT and ERRORS_RO case, and perhaps return more |
672 | * aggressively from the ext4 function in question, with a | 672 | * aggressively from the ext4 function in question, with a |
673 | * more appropriate error code. | 673 | * more appropriate error code. |
674 | */ | 674 | */ |
675 | ext4_lock_group(sb, grp); | 675 | ext4_lock_group(sb, grp); |
676 | return; | 676 | return; |
677 | } | 677 | } |
678 | 678 | ||
679 | void ext4_update_dynamic_rev(struct super_block *sb) | 679 | void ext4_update_dynamic_rev(struct super_block *sb) |
680 | { | 680 | { |
681 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 681 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
682 | 682 | ||
683 | if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) | 683 | if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) |
684 | return; | 684 | return; |
685 | 685 | ||
686 | ext4_warning(sb, | 686 | ext4_warning(sb, |
687 | "updating to rev %d because of new feature flag, " | 687 | "updating to rev %d because of new feature flag, " |
688 | "running e2fsck is recommended", | 688 | "running e2fsck is recommended", |
689 | EXT4_DYNAMIC_REV); | 689 | EXT4_DYNAMIC_REV); |
690 | 690 | ||
691 | es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); | 691 | es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO); |
692 | es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); | 692 | es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE); |
693 | es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); | 693 | es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV); |
694 | /* leave es->s_feature_*compat flags alone */ | 694 | /* leave es->s_feature_*compat flags alone */ |
695 | /* es->s_uuid will be set by e2fsck if empty */ | 695 | /* es->s_uuid will be set by e2fsck if empty */ |
696 | 696 | ||
697 | /* | 697 | /* |
698 | * The rest of the superblock fields should be zero, and if not it | 698 | * The rest of the superblock fields should be zero, and if not it |
699 | * means they are likely already in use, so leave them alone. We | 699 | * means they are likely already in use, so leave them alone. We |
700 | * can leave it up to e2fsck to clean up any inconsistencies there. | 700 | * can leave it up to e2fsck to clean up any inconsistencies there. |
701 | */ | 701 | */ |
702 | } | 702 | } |
703 | 703 | ||
704 | /* | 704 | /* |
705 | * Open the external journal device | 705 | * Open the external journal device |
706 | */ | 706 | */ |
707 | static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) | 707 | static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) |
708 | { | 708 | { |
709 | struct block_device *bdev; | 709 | struct block_device *bdev; |
710 | char b[BDEVNAME_SIZE]; | 710 | char b[BDEVNAME_SIZE]; |
711 | 711 | ||
712 | bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); | 712 | bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); |
713 | if (IS_ERR(bdev)) | 713 | if (IS_ERR(bdev)) |
714 | goto fail; | 714 | goto fail; |
715 | return bdev; | 715 | return bdev; |
716 | 716 | ||
717 | fail: | 717 | fail: |
718 | ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", | 718 | ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", |
719 | __bdevname(dev, b), PTR_ERR(bdev)); | 719 | __bdevname(dev, b), PTR_ERR(bdev)); |
720 | return NULL; | 720 | return NULL; |
721 | } | 721 | } |
722 | 722 | ||
723 | /* | 723 | /* |
724 | * Release the journal device | 724 | * Release the journal device |
725 | */ | 725 | */ |
726 | static void ext4_blkdev_put(struct block_device *bdev) | 726 | static void ext4_blkdev_put(struct block_device *bdev) |
727 | { | 727 | { |
728 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | 728 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
729 | } | 729 | } |
730 | 730 | ||
731 | static void ext4_blkdev_remove(struct ext4_sb_info *sbi) | 731 | static void ext4_blkdev_remove(struct ext4_sb_info *sbi) |
732 | { | 732 | { |
733 | struct block_device *bdev; | 733 | struct block_device *bdev; |
734 | bdev = sbi->journal_bdev; | 734 | bdev = sbi->journal_bdev; |
735 | if (bdev) { | 735 | if (bdev) { |
736 | ext4_blkdev_put(bdev); | 736 | ext4_blkdev_put(bdev); |
737 | sbi->journal_bdev = NULL; | 737 | sbi->journal_bdev = NULL; |
738 | } | 738 | } |
739 | } | 739 | } |
740 | 740 | ||
741 | static inline struct inode *orphan_list_entry(struct list_head *l) | 741 | static inline struct inode *orphan_list_entry(struct list_head *l) |
742 | { | 742 | { |
743 | return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; | 743 | return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode; |
744 | } | 744 | } |
745 | 745 | ||
746 | static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) | 746 | static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) |
747 | { | 747 | { |
748 | struct list_head *l; | 748 | struct list_head *l; |
749 | 749 | ||
750 | ext4_msg(sb, KERN_ERR, "sb orphan head is %d", | 750 | ext4_msg(sb, KERN_ERR, "sb orphan head is %d", |
751 | le32_to_cpu(sbi->s_es->s_last_orphan)); | 751 | le32_to_cpu(sbi->s_es->s_last_orphan)); |
752 | 752 | ||
753 | printk(KERN_ERR "sb_info orphan list:\n"); | 753 | printk(KERN_ERR "sb_info orphan list:\n"); |
754 | list_for_each(l, &sbi->s_orphan) { | 754 | list_for_each(l, &sbi->s_orphan) { |
755 | struct inode *inode = orphan_list_entry(l); | 755 | struct inode *inode = orphan_list_entry(l); |
756 | printk(KERN_ERR " " | 756 | printk(KERN_ERR " " |
757 | "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", | 757 | "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", |
758 | inode->i_sb->s_id, inode->i_ino, inode, | 758 | inode->i_sb->s_id, inode->i_ino, inode, |
759 | inode->i_mode, inode->i_nlink, | 759 | inode->i_mode, inode->i_nlink, |
760 | NEXT_ORPHAN(inode)); | 760 | NEXT_ORPHAN(inode)); |
761 | } | 761 | } |
762 | } | 762 | } |
763 | 763 | ||
764 | static void ext4_put_super(struct super_block *sb) | 764 | static void ext4_put_super(struct super_block *sb) |
765 | { | 765 | { |
766 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 766 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
767 | struct ext4_super_block *es = sbi->s_es; | 767 | struct ext4_super_block *es = sbi->s_es; |
768 | int i, err; | 768 | int i, err; |
769 | 769 | ||
770 | ext4_unregister_li_request(sb); | 770 | ext4_unregister_li_request(sb); |
771 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 771 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
772 | 772 | ||
773 | flush_workqueue(sbi->rsv_conversion_wq); | 773 | flush_workqueue(sbi->rsv_conversion_wq); |
774 | destroy_workqueue(sbi->rsv_conversion_wq); | 774 | destroy_workqueue(sbi->rsv_conversion_wq); |
775 | 775 | ||
776 | if (sbi->s_journal) { | 776 | if (sbi->s_journal) { |
777 | err = jbd2_journal_destroy(sbi->s_journal); | 777 | err = jbd2_journal_destroy(sbi->s_journal); |
778 | sbi->s_journal = NULL; | 778 | sbi->s_journal = NULL; |
779 | if (err < 0) | 779 | if (err < 0) |
780 | ext4_abort(sb, "Couldn't clean up the journal"); | 780 | ext4_abort(sb, "Couldn't clean up the journal"); |
781 | } | 781 | } |
782 | 782 | ||
783 | ext4_es_unregister_shrinker(sbi); | 783 | ext4_es_unregister_shrinker(sbi); |
784 | del_timer_sync(&sbi->s_err_report); | 784 | del_timer_sync(&sbi->s_err_report); |
785 | ext4_release_system_zone(sb); | 785 | ext4_release_system_zone(sb); |
786 | ext4_mb_release(sb); | 786 | ext4_mb_release(sb); |
787 | ext4_ext_release(sb); | 787 | ext4_ext_release(sb); |
788 | ext4_xattr_put_super(sb); | 788 | ext4_xattr_put_super(sb); |
789 | 789 | ||
790 | if (!(sb->s_flags & MS_RDONLY)) { | 790 | if (!(sb->s_flags & MS_RDONLY)) { |
791 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 791 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
792 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 792 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
793 | } | 793 | } |
794 | if (!(sb->s_flags & MS_RDONLY)) | 794 | if (!(sb->s_flags & MS_RDONLY)) |
795 | ext4_commit_super(sb, 1); | 795 | ext4_commit_super(sb, 1); |
796 | 796 | ||
797 | if (sbi->s_proc) { | 797 | if (sbi->s_proc) { |
798 | remove_proc_entry("options", sbi->s_proc); | 798 | remove_proc_entry("options", sbi->s_proc); |
799 | remove_proc_entry(sb->s_id, ext4_proc_root); | 799 | remove_proc_entry(sb->s_id, ext4_proc_root); |
800 | } | 800 | } |
801 | kobject_del(&sbi->s_kobj); | 801 | kobject_del(&sbi->s_kobj); |
802 | 802 | ||
803 | for (i = 0; i < sbi->s_gdb_count; i++) | 803 | for (i = 0; i < sbi->s_gdb_count; i++) |
804 | brelse(sbi->s_group_desc[i]); | 804 | brelse(sbi->s_group_desc[i]); |
805 | kvfree(sbi->s_group_desc); | 805 | kvfree(sbi->s_group_desc); |
806 | kvfree(sbi->s_flex_groups); | 806 | kvfree(sbi->s_flex_groups); |
807 | percpu_counter_destroy(&sbi->s_freeclusters_counter); | 807 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
808 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 808 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
809 | percpu_counter_destroy(&sbi->s_dirs_counter); | 809 | percpu_counter_destroy(&sbi->s_dirs_counter); |
810 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); | 810 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); |
811 | brelse(sbi->s_sbh); | 811 | brelse(sbi->s_sbh); |
812 | #ifdef CONFIG_QUOTA | 812 | #ifdef CONFIG_QUOTA |
813 | for (i = 0; i < EXT4_MAXQUOTAS; i++) | 813 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
814 | kfree(sbi->s_qf_names[i]); | 814 | kfree(sbi->s_qf_names[i]); |
815 | #endif | 815 | #endif |
816 | 816 | ||
817 | /* Debugging code just in case the in-memory inode orphan list | 817 | /* Debugging code just in case the in-memory inode orphan list |
818 | * isn't empty. The on-disk one can be non-empty if we've | 818 | * isn't empty. The on-disk one can be non-empty if we've |
819 | * detected an error and taken the fs readonly, but the | 819 | * detected an error and taken the fs readonly, but the |
820 | * in-memory list had better be clean by this point. */ | 820 | * in-memory list had better be clean by this point. */ |
821 | if (!list_empty(&sbi->s_orphan)) | 821 | if (!list_empty(&sbi->s_orphan)) |
822 | dump_orphan_list(sb, sbi); | 822 | dump_orphan_list(sb, sbi); |
823 | J_ASSERT(list_empty(&sbi->s_orphan)); | 823 | J_ASSERT(list_empty(&sbi->s_orphan)); |
824 | 824 | ||
825 | invalidate_bdev(sb->s_bdev); | 825 | invalidate_bdev(sb->s_bdev); |
826 | if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { | 826 | if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { |
827 | /* | 827 | /* |
828 | * Invalidate the journal device's buffers. We don't want them | 828 | * Invalidate the journal device's buffers. We don't want them |
829 | * floating about in memory - the physical journal device may | 829 | * floating about in memory - the physical journal device may |
830 | * hotswapped, and it breaks the `ro-after' testing code. | 830 | * hotswapped, and it breaks the `ro-after' testing code. |
831 | */ | 831 | */ |
832 | sync_blockdev(sbi->journal_bdev); | 832 | sync_blockdev(sbi->journal_bdev); |
833 | invalidate_bdev(sbi->journal_bdev); | 833 | invalidate_bdev(sbi->journal_bdev); |
834 | ext4_blkdev_remove(sbi); | 834 | ext4_blkdev_remove(sbi); |
835 | } | 835 | } |
836 | if (sbi->s_mb_cache) { | 836 | if (sbi->s_mb_cache) { |
837 | ext4_xattr_destroy_cache(sbi->s_mb_cache); | 837 | ext4_xattr_destroy_cache(sbi->s_mb_cache); |
838 | sbi->s_mb_cache = NULL; | 838 | sbi->s_mb_cache = NULL; |
839 | } | 839 | } |
840 | if (sbi->s_mmp_tsk) | 840 | if (sbi->s_mmp_tsk) |
841 | kthread_stop(sbi->s_mmp_tsk); | 841 | kthread_stop(sbi->s_mmp_tsk); |
842 | sb->s_fs_info = NULL; | 842 | sb->s_fs_info = NULL; |
843 | /* | 843 | /* |
844 | * Now that we are completely done shutting down the | 844 | * Now that we are completely done shutting down the |
845 | * superblock, we need to actually destroy the kobject. | 845 | * superblock, we need to actually destroy the kobject. |
846 | */ | 846 | */ |
847 | kobject_put(&sbi->s_kobj); | 847 | kobject_put(&sbi->s_kobj); |
848 | wait_for_completion(&sbi->s_kobj_unregister); | 848 | wait_for_completion(&sbi->s_kobj_unregister); |
849 | if (sbi->s_chksum_driver) | 849 | if (sbi->s_chksum_driver) |
850 | crypto_free_shash(sbi->s_chksum_driver); | 850 | crypto_free_shash(sbi->s_chksum_driver); |
851 | kfree(sbi->s_blockgroup_lock); | 851 | kfree(sbi->s_blockgroup_lock); |
852 | kfree(sbi); | 852 | kfree(sbi); |
853 | } | 853 | } |
854 | 854 | ||
855 | static struct kmem_cache *ext4_inode_cachep; | 855 | static struct kmem_cache *ext4_inode_cachep; |
856 | 856 | ||
857 | /* | 857 | /* |
858 | * Called inside transaction, so use GFP_NOFS | 858 | * Called inside transaction, so use GFP_NOFS |
859 | */ | 859 | */ |
860 | static struct inode *ext4_alloc_inode(struct super_block *sb) | 860 | static struct inode *ext4_alloc_inode(struct super_block *sb) |
861 | { | 861 | { |
862 | struct ext4_inode_info *ei; | 862 | struct ext4_inode_info *ei; |
863 | 863 | ||
864 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); | 864 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); |
865 | if (!ei) | 865 | if (!ei) |
866 | return NULL; | 866 | return NULL; |
867 | 867 | ||
868 | ei->vfs_inode.i_version = 1; | 868 | ei->vfs_inode.i_version = 1; |
869 | spin_lock_init(&ei->i_raw_lock); | 869 | spin_lock_init(&ei->i_raw_lock); |
870 | INIT_LIST_HEAD(&ei->i_prealloc_list); | 870 | INIT_LIST_HEAD(&ei->i_prealloc_list); |
871 | spin_lock_init(&ei->i_prealloc_lock); | 871 | spin_lock_init(&ei->i_prealloc_lock); |
872 | ext4_es_init_tree(&ei->i_es_tree); | 872 | ext4_es_init_tree(&ei->i_es_tree); |
873 | rwlock_init(&ei->i_es_lock); | 873 | rwlock_init(&ei->i_es_lock); |
874 | INIT_LIST_HEAD(&ei->i_es_list); | 874 | INIT_LIST_HEAD(&ei->i_es_list); |
875 | ei->i_es_all_nr = 0; | 875 | ei->i_es_all_nr = 0; |
876 | ei->i_es_shk_nr = 0; | 876 | ei->i_es_shk_nr = 0; |
877 | ei->i_es_shrink_lblk = 0; | 877 | ei->i_es_shrink_lblk = 0; |
878 | ei->i_reserved_data_blocks = 0; | 878 | ei->i_reserved_data_blocks = 0; |
879 | ei->i_reserved_meta_blocks = 0; | 879 | ei->i_reserved_meta_blocks = 0; |
880 | ei->i_allocated_meta_blocks = 0; | 880 | ei->i_allocated_meta_blocks = 0; |
881 | ei->i_da_metadata_calc_len = 0; | 881 | ei->i_da_metadata_calc_len = 0; |
882 | ei->i_da_metadata_calc_last_lblock = 0; | 882 | ei->i_da_metadata_calc_last_lblock = 0; |
883 | spin_lock_init(&(ei->i_block_reservation_lock)); | 883 | spin_lock_init(&(ei->i_block_reservation_lock)); |
884 | #ifdef CONFIG_QUOTA | 884 | #ifdef CONFIG_QUOTA |
885 | ei->i_reserved_quota = 0; | 885 | ei->i_reserved_quota = 0; |
886 | memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); | 886 | memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); |
887 | #endif | 887 | #endif |
888 | ei->jinode = NULL; | 888 | ei->jinode = NULL; |
889 | INIT_LIST_HEAD(&ei->i_rsv_conversion_list); | 889 | INIT_LIST_HEAD(&ei->i_rsv_conversion_list); |
890 | spin_lock_init(&ei->i_completed_io_lock); | 890 | spin_lock_init(&ei->i_completed_io_lock); |
891 | ei->i_sync_tid = 0; | 891 | ei->i_sync_tid = 0; |
892 | ei->i_datasync_tid = 0; | 892 | ei->i_datasync_tid = 0; |
893 | atomic_set(&ei->i_ioend_count, 0); | 893 | atomic_set(&ei->i_ioend_count, 0); |
894 | atomic_set(&ei->i_unwritten, 0); | 894 | atomic_set(&ei->i_unwritten, 0); |
895 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); | 895 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); |
896 | 896 | ||
897 | return &ei->vfs_inode; | 897 | return &ei->vfs_inode; |
898 | } | 898 | } |
899 | 899 | ||
900 | static int ext4_drop_inode(struct inode *inode) | 900 | static int ext4_drop_inode(struct inode *inode) |
901 | { | 901 | { |
902 | int drop = generic_drop_inode(inode); | 902 | int drop = generic_drop_inode(inode); |
903 | 903 | ||
904 | trace_ext4_drop_inode(inode, drop); | 904 | trace_ext4_drop_inode(inode, drop); |
905 | return drop; | 905 | return drop; |
906 | } | 906 | } |
907 | 907 | ||
908 | static void ext4_i_callback(struct rcu_head *head) | 908 | static void ext4_i_callback(struct rcu_head *head) |
909 | { | 909 | { |
910 | struct inode *inode = container_of(head, struct inode, i_rcu); | 910 | struct inode *inode = container_of(head, struct inode, i_rcu); |
911 | kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); | 911 | kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); |
912 | } | 912 | } |
913 | 913 | ||
914 | static void ext4_destroy_inode(struct inode *inode) | 914 | static void ext4_destroy_inode(struct inode *inode) |
915 | { | 915 | { |
916 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | 916 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { |
917 | ext4_msg(inode->i_sb, KERN_ERR, | 917 | ext4_msg(inode->i_sb, KERN_ERR, |
918 | "Inode %lu (%p): orphan list check failed!", | 918 | "Inode %lu (%p): orphan list check failed!", |
919 | inode->i_ino, EXT4_I(inode)); | 919 | inode->i_ino, EXT4_I(inode)); |
920 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, | 920 | print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, |
921 | EXT4_I(inode), sizeof(struct ext4_inode_info), | 921 | EXT4_I(inode), sizeof(struct ext4_inode_info), |
922 | true); | 922 | true); |
923 | dump_stack(); | 923 | dump_stack(); |
924 | } | 924 | } |
925 | call_rcu(&inode->i_rcu, ext4_i_callback); | 925 | call_rcu(&inode->i_rcu, ext4_i_callback); |
926 | } | 926 | } |
927 | 927 | ||
928 | static void init_once(void *foo) | 928 | static void init_once(void *foo) |
929 | { | 929 | { |
930 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; | 930 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; |
931 | 931 | ||
932 | INIT_LIST_HEAD(&ei->i_orphan); | 932 | INIT_LIST_HEAD(&ei->i_orphan); |
933 | init_rwsem(&ei->xattr_sem); | 933 | init_rwsem(&ei->xattr_sem); |
934 | init_rwsem(&ei->i_data_sem); | 934 | init_rwsem(&ei->i_data_sem); |
935 | inode_init_once(&ei->vfs_inode); | 935 | inode_init_once(&ei->vfs_inode); |
936 | } | 936 | } |
937 | 937 | ||
938 | static int __init init_inodecache(void) | 938 | static int __init init_inodecache(void) |
939 | { | 939 | { |
940 | ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", | 940 | ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", |
941 | sizeof(struct ext4_inode_info), | 941 | sizeof(struct ext4_inode_info), |
942 | 0, (SLAB_RECLAIM_ACCOUNT| | 942 | 0, (SLAB_RECLAIM_ACCOUNT| |
943 | SLAB_MEM_SPREAD), | 943 | SLAB_MEM_SPREAD), |
944 | init_once); | 944 | init_once); |
945 | if (ext4_inode_cachep == NULL) | 945 | if (ext4_inode_cachep == NULL) |
946 | return -ENOMEM; | 946 | return -ENOMEM; |
947 | return 0; | 947 | return 0; |
948 | } | 948 | } |
949 | 949 | ||
950 | static void destroy_inodecache(void) | 950 | static void destroy_inodecache(void) |
951 | { | 951 | { |
952 | /* | 952 | /* |
953 | * Make sure all delayed rcu free inodes are flushed before we | 953 | * Make sure all delayed rcu free inodes are flushed before we |
954 | * destroy cache. | 954 | * destroy cache. |
955 | */ | 955 | */ |
956 | rcu_barrier(); | 956 | rcu_barrier(); |
957 | kmem_cache_destroy(ext4_inode_cachep); | 957 | kmem_cache_destroy(ext4_inode_cachep); |
958 | } | 958 | } |
959 | 959 | ||
960 | void ext4_clear_inode(struct inode *inode) | 960 | void ext4_clear_inode(struct inode *inode) |
961 | { | 961 | { |
962 | invalidate_inode_buffers(inode); | 962 | invalidate_inode_buffers(inode); |
963 | clear_inode(inode); | 963 | clear_inode(inode); |
964 | dquot_drop(inode); | 964 | dquot_drop(inode); |
965 | ext4_discard_preallocations(inode); | 965 | ext4_discard_preallocations(inode); |
966 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); | 966 | ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); |
967 | if (EXT4_I(inode)->jinode) { | 967 | if (EXT4_I(inode)->jinode) { |
968 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), | 968 | jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), |
969 | EXT4_I(inode)->jinode); | 969 | EXT4_I(inode)->jinode); |
970 | jbd2_free_inode(EXT4_I(inode)->jinode); | 970 | jbd2_free_inode(EXT4_I(inode)->jinode); |
971 | EXT4_I(inode)->jinode = NULL; | 971 | EXT4_I(inode)->jinode = NULL; |
972 | } | 972 | } |
973 | } | 973 | } |
974 | 974 | ||
975 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, | 975 | static struct inode *ext4_nfs_get_inode(struct super_block *sb, |
976 | u64 ino, u32 generation) | 976 | u64 ino, u32 generation) |
977 | { | 977 | { |
978 | struct inode *inode; | 978 | struct inode *inode; |
979 | 979 | ||
980 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) | 980 | if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) |
981 | return ERR_PTR(-ESTALE); | 981 | return ERR_PTR(-ESTALE); |
982 | if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) | 982 | if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) |
983 | return ERR_PTR(-ESTALE); | 983 | return ERR_PTR(-ESTALE); |
984 | 984 | ||
985 | /* iget isn't really right if the inode is currently unallocated!! | 985 | /* iget isn't really right if the inode is currently unallocated!! |
986 | * | 986 | * |
987 | * ext4_read_inode will return a bad_inode if the inode had been | 987 | * ext4_read_inode will return a bad_inode if the inode had been |
988 | * deleted, so we should be safe. | 988 | * deleted, so we should be safe. |
989 | * | 989 | * |
990 | * Currently we don't know the generation for parent directory, so | 990 | * Currently we don't know the generation for parent directory, so |
991 | * a generation of 0 means "accept any" | 991 | * a generation of 0 means "accept any" |
992 | */ | 992 | */ |
993 | inode = ext4_iget_normal(sb, ino); | 993 | inode = ext4_iget_normal(sb, ino); |
994 | if (IS_ERR(inode)) | 994 | if (IS_ERR(inode)) |
995 | return ERR_CAST(inode); | 995 | return ERR_CAST(inode); |
996 | if (generation && inode->i_generation != generation) { | 996 | if (generation && inode->i_generation != generation) { |
997 | iput(inode); | 997 | iput(inode); |
998 | return ERR_PTR(-ESTALE); | 998 | return ERR_PTR(-ESTALE); |
999 | } | 999 | } |
1000 | 1000 | ||
1001 | return inode; | 1001 | return inode; |
1002 | } | 1002 | } |
1003 | 1003 | ||
1004 | static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, | 1004 | static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, |
1005 | int fh_len, int fh_type) | 1005 | int fh_len, int fh_type) |
1006 | { | 1006 | { |
1007 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | 1007 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, |
1008 | ext4_nfs_get_inode); | 1008 | ext4_nfs_get_inode); |
1009 | } | 1009 | } |
1010 | 1010 | ||
1011 | static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | 1011 | static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, |
1012 | int fh_len, int fh_type) | 1012 | int fh_len, int fh_type) |
1013 | { | 1013 | { |
1014 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | 1014 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, |
1015 | ext4_nfs_get_inode); | 1015 | ext4_nfs_get_inode); |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | /* | 1018 | /* |
1019 | * Try to release metadata pages (indirect blocks, directories) which are | 1019 | * Try to release metadata pages (indirect blocks, directories) which are |
1020 | * mapped via the block device. Since these pages could have journal heads | 1020 | * mapped via the block device. Since these pages could have journal heads |
1021 | * which would prevent try_to_free_buffers() from freeing them, we must use | 1021 | * which would prevent try_to_free_buffers() from freeing them, we must use |
1022 | * jbd2 layer's try_to_free_buffers() function to release them. | 1022 | * jbd2 layer's try_to_free_buffers() function to release them. |
1023 | */ | 1023 | */ |
1024 | static int bdev_try_to_free_page(struct super_block *sb, struct page *page, | 1024 | static int bdev_try_to_free_page(struct super_block *sb, struct page *page, |
1025 | gfp_t wait) | 1025 | gfp_t wait) |
1026 | { | 1026 | { |
1027 | journal_t *journal = EXT4_SB(sb)->s_journal; | 1027 | journal_t *journal = EXT4_SB(sb)->s_journal; |
1028 | 1028 | ||
1029 | WARN_ON(PageChecked(page)); | 1029 | WARN_ON(PageChecked(page)); |
1030 | if (!page_has_buffers(page)) | 1030 | if (!page_has_buffers(page)) |
1031 | return 0; | 1031 | return 0; |
1032 | if (journal) | 1032 | if (journal) |
1033 | return jbd2_journal_try_to_free_buffers(journal, page, | 1033 | return jbd2_journal_try_to_free_buffers(journal, page, |
1034 | wait & ~__GFP_WAIT); | 1034 | wait & ~__GFP_WAIT); |
1035 | return try_to_free_buffers(page); | 1035 | return try_to_free_buffers(page); |
1036 | } | 1036 | } |
1037 | 1037 | ||
1038 | #ifdef CONFIG_QUOTA | 1038 | #ifdef CONFIG_QUOTA |
1039 | #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") | 1039 | #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") |
1040 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) | 1040 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) |
1041 | 1041 | ||
1042 | static int ext4_write_dquot(struct dquot *dquot); | 1042 | static int ext4_write_dquot(struct dquot *dquot); |
1043 | static int ext4_acquire_dquot(struct dquot *dquot); | 1043 | static int ext4_acquire_dquot(struct dquot *dquot); |
1044 | static int ext4_release_dquot(struct dquot *dquot); | 1044 | static int ext4_release_dquot(struct dquot *dquot); |
1045 | static int ext4_mark_dquot_dirty(struct dquot *dquot); | 1045 | static int ext4_mark_dquot_dirty(struct dquot *dquot); |
1046 | static int ext4_write_info(struct super_block *sb, int type); | 1046 | static int ext4_write_info(struct super_block *sb, int type); |
1047 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 1047 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
1048 | struct path *path); | 1048 | struct path *path); |
1049 | static int ext4_quota_off(struct super_block *sb, int type); | 1049 | static int ext4_quota_off(struct super_block *sb, int type); |
1050 | static int ext4_quota_on_mount(struct super_block *sb, int type); | 1050 | static int ext4_quota_on_mount(struct super_block *sb, int type); |
1051 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 1051 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
1052 | size_t len, loff_t off); | 1052 | size_t len, loff_t off); |
1053 | static ssize_t ext4_quota_write(struct super_block *sb, int type, | 1053 | static ssize_t ext4_quota_write(struct super_block *sb, int type, |
1054 | const char *data, size_t len, loff_t off); | 1054 | const char *data, size_t len, loff_t off); |
1055 | static int ext4_quota_enable(struct super_block *sb, int type, int format_id, | 1055 | static int ext4_quota_enable(struct super_block *sb, int type, int format_id, |
1056 | unsigned int flags); | 1056 | unsigned int flags); |
1057 | static int ext4_enable_quotas(struct super_block *sb); | 1057 | static int ext4_enable_quotas(struct super_block *sb); |
1058 | 1058 | ||
1059 | static struct dquot **ext4_get_dquots(struct inode *inode) | 1059 | static struct dquot **ext4_get_dquots(struct inode *inode) |
1060 | { | 1060 | { |
1061 | return EXT4_I(inode)->i_dquot; | 1061 | return EXT4_I(inode)->i_dquot; |
1062 | } | 1062 | } |
1063 | 1063 | ||
1064 | static const struct dquot_operations ext4_quota_operations = { | 1064 | static const struct dquot_operations ext4_quota_operations = { |
1065 | .get_reserved_space = ext4_get_reserved_space, | 1065 | .get_reserved_space = ext4_get_reserved_space, |
1066 | .write_dquot = ext4_write_dquot, | 1066 | .write_dquot = ext4_write_dquot, |
1067 | .acquire_dquot = ext4_acquire_dquot, | 1067 | .acquire_dquot = ext4_acquire_dquot, |
1068 | .release_dquot = ext4_release_dquot, | 1068 | .release_dquot = ext4_release_dquot, |
1069 | .mark_dirty = ext4_mark_dquot_dirty, | 1069 | .mark_dirty = ext4_mark_dquot_dirty, |
1070 | .write_info = ext4_write_info, | 1070 | .write_info = ext4_write_info, |
1071 | .alloc_dquot = dquot_alloc, | 1071 | .alloc_dquot = dquot_alloc, |
1072 | .destroy_dquot = dquot_destroy, | 1072 | .destroy_dquot = dquot_destroy, |
1073 | }; | 1073 | }; |
1074 | 1074 | ||
1075 | static const struct quotactl_ops ext4_qctl_operations = { | 1075 | static const struct quotactl_ops ext4_qctl_operations = { |
1076 | .quota_on = ext4_quota_on, | 1076 | .quota_on = ext4_quota_on, |
1077 | .quota_off = ext4_quota_off, | 1077 | .quota_off = ext4_quota_off, |
1078 | .quota_sync = dquot_quota_sync, | 1078 | .quota_sync = dquot_quota_sync, |
1079 | .get_info = dquot_get_dqinfo, | 1079 | .get_info = dquot_get_dqinfo, |
1080 | .set_info = dquot_set_dqinfo, | 1080 | .set_info = dquot_set_dqinfo, |
1081 | .get_dqblk = dquot_get_dqblk, | 1081 | .get_dqblk = dquot_get_dqblk, |
1082 | .set_dqblk = dquot_set_dqblk | 1082 | .set_dqblk = dquot_set_dqblk |
1083 | }; | 1083 | }; |
1084 | #endif | 1084 | #endif |
1085 | 1085 | ||
1086 | static const struct super_operations ext4_sops = { | 1086 | static const struct super_operations ext4_sops = { |
1087 | .alloc_inode = ext4_alloc_inode, | 1087 | .alloc_inode = ext4_alloc_inode, |
1088 | .destroy_inode = ext4_destroy_inode, | 1088 | .destroy_inode = ext4_destroy_inode, |
1089 | .write_inode = ext4_write_inode, | 1089 | .write_inode = ext4_write_inode, |
1090 | .dirty_inode = ext4_dirty_inode, | 1090 | .dirty_inode = ext4_dirty_inode, |
1091 | .drop_inode = ext4_drop_inode, | 1091 | .drop_inode = ext4_drop_inode, |
1092 | .evict_inode = ext4_evict_inode, | 1092 | .evict_inode = ext4_evict_inode, |
1093 | .put_super = ext4_put_super, | 1093 | .put_super = ext4_put_super, |
1094 | .sync_fs = ext4_sync_fs, | 1094 | .sync_fs = ext4_sync_fs, |
1095 | .freeze_fs = ext4_freeze, | 1095 | .freeze_fs = ext4_freeze, |
1096 | .unfreeze_fs = ext4_unfreeze, | 1096 | .unfreeze_fs = ext4_unfreeze, |
1097 | .statfs = ext4_statfs, | 1097 | .statfs = ext4_statfs, |
1098 | .remount_fs = ext4_remount, | 1098 | .remount_fs = ext4_remount, |
1099 | .show_options = ext4_show_options, | 1099 | .show_options = ext4_show_options, |
1100 | #ifdef CONFIG_QUOTA | 1100 | #ifdef CONFIG_QUOTA |
1101 | .quota_read = ext4_quota_read, | 1101 | .quota_read = ext4_quota_read, |
1102 | .quota_write = ext4_quota_write, | 1102 | .quota_write = ext4_quota_write, |
1103 | .get_dquots = ext4_get_dquots, | 1103 | .get_dquots = ext4_get_dquots, |
1104 | #endif | 1104 | #endif |
1105 | .bdev_try_to_free_page = bdev_try_to_free_page, | 1105 | .bdev_try_to_free_page = bdev_try_to_free_page, |
1106 | }; | 1106 | }; |
1107 | 1107 | ||
1108 | static const struct export_operations ext4_export_ops = { | 1108 | static const struct export_operations ext4_export_ops = { |
1109 | .fh_to_dentry = ext4_fh_to_dentry, | 1109 | .fh_to_dentry = ext4_fh_to_dentry, |
1110 | .fh_to_parent = ext4_fh_to_parent, | 1110 | .fh_to_parent = ext4_fh_to_parent, |
1111 | .get_parent = ext4_get_parent, | 1111 | .get_parent = ext4_get_parent, |
1112 | }; | 1112 | }; |
1113 | 1113 | ||
1114 | enum { | 1114 | enum { |
1115 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, | 1115 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, |
1116 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, | 1116 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, |
1117 | Opt_nouid32, Opt_debug, Opt_removed, | 1117 | Opt_nouid32, Opt_debug, Opt_removed, |
1118 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 1118 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
1119 | Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, | 1119 | Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, |
1120 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, | 1120 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, |
1121 | Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, | 1121 | Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, |
1122 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 1122 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
1123 | Opt_data_err_abort, Opt_data_err_ignore, | 1123 | Opt_data_err_abort, Opt_data_err_ignore, |
1124 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1124 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1125 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, | 1125 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, |
1126 | Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, | 1126 | Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, |
1127 | Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax, | 1127 | Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax, |
1128 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, | 1128 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, |
1129 | Opt_lazytime, Opt_nolazytime, | 1129 | Opt_lazytime, Opt_nolazytime, |
1130 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, | 1130 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, |
1131 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1131 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1132 | Opt_dioread_nolock, Opt_dioread_lock, | 1132 | Opt_dioread_nolock, Opt_dioread_lock, |
1133 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, | 1133 | Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, |
1134 | Opt_max_dir_size_kb, Opt_nojournal_checksum, | 1134 | Opt_max_dir_size_kb, Opt_nojournal_checksum, |
1135 | }; | 1135 | }; |
1136 | 1136 | ||
1137 | static const match_table_t tokens = { | 1137 | static const match_table_t tokens = { |
1138 | {Opt_bsd_df, "bsddf"}, | 1138 | {Opt_bsd_df, "bsddf"}, |
1139 | {Opt_minix_df, "minixdf"}, | 1139 | {Opt_minix_df, "minixdf"}, |
1140 | {Opt_grpid, "grpid"}, | 1140 | {Opt_grpid, "grpid"}, |
1141 | {Opt_grpid, "bsdgroups"}, | 1141 | {Opt_grpid, "bsdgroups"}, |
1142 | {Opt_nogrpid, "nogrpid"}, | 1142 | {Opt_nogrpid, "nogrpid"}, |
1143 | {Opt_nogrpid, "sysvgroups"}, | 1143 | {Opt_nogrpid, "sysvgroups"}, |
1144 | {Opt_resgid, "resgid=%u"}, | 1144 | {Opt_resgid, "resgid=%u"}, |
1145 | {Opt_resuid, "resuid=%u"}, | 1145 | {Opt_resuid, "resuid=%u"}, |
1146 | {Opt_sb, "sb=%u"}, | 1146 | {Opt_sb, "sb=%u"}, |
1147 | {Opt_err_cont, "errors=continue"}, | 1147 | {Opt_err_cont, "errors=continue"}, |
1148 | {Opt_err_panic, "errors=panic"}, | 1148 | {Opt_err_panic, "errors=panic"}, |
1149 | {Opt_err_ro, "errors=remount-ro"}, | 1149 | {Opt_err_ro, "errors=remount-ro"}, |
1150 | {Opt_nouid32, "nouid32"}, | 1150 | {Opt_nouid32, "nouid32"}, |
1151 | {Opt_debug, "debug"}, | 1151 | {Opt_debug, "debug"}, |
1152 | {Opt_removed, "oldalloc"}, | 1152 | {Opt_removed, "oldalloc"}, |
1153 | {Opt_removed, "orlov"}, | 1153 | {Opt_removed, "orlov"}, |
1154 | {Opt_user_xattr, "user_xattr"}, | 1154 | {Opt_user_xattr, "user_xattr"}, |
1155 | {Opt_nouser_xattr, "nouser_xattr"}, | 1155 | {Opt_nouser_xattr, "nouser_xattr"}, |
1156 | {Opt_acl, "acl"}, | 1156 | {Opt_acl, "acl"}, |
1157 | {Opt_noacl, "noacl"}, | 1157 | {Opt_noacl, "noacl"}, |
1158 | {Opt_noload, "norecovery"}, | 1158 | {Opt_noload, "norecovery"}, |
1159 | {Opt_noload, "noload"}, | 1159 | {Opt_noload, "noload"}, |
1160 | {Opt_removed, "nobh"}, | 1160 | {Opt_removed, "nobh"}, |
1161 | {Opt_removed, "bh"}, | 1161 | {Opt_removed, "bh"}, |
1162 | {Opt_commit, "commit=%u"}, | 1162 | {Opt_commit, "commit=%u"}, |
1163 | {Opt_min_batch_time, "min_batch_time=%u"}, | 1163 | {Opt_min_batch_time, "min_batch_time=%u"}, |
1164 | {Opt_max_batch_time, "max_batch_time=%u"}, | 1164 | {Opt_max_batch_time, "max_batch_time=%u"}, |
1165 | {Opt_journal_dev, "journal_dev=%u"}, | 1165 | {Opt_journal_dev, "journal_dev=%u"}, |
1166 | {Opt_journal_path, "journal_path=%s"}, | 1166 | {Opt_journal_path, "journal_path=%s"}, |
1167 | {Opt_journal_checksum, "journal_checksum"}, | 1167 | {Opt_journal_checksum, "journal_checksum"}, |
1168 | {Opt_nojournal_checksum, "nojournal_checksum"}, | 1168 | {Opt_nojournal_checksum, "nojournal_checksum"}, |
1169 | {Opt_journal_async_commit, "journal_async_commit"}, | 1169 | {Opt_journal_async_commit, "journal_async_commit"}, |
1170 | {Opt_abort, "abort"}, | 1170 | {Opt_abort, "abort"}, |
1171 | {Opt_data_journal, "data=journal"}, | 1171 | {Opt_data_journal, "data=journal"}, |
1172 | {Opt_data_ordered, "data=ordered"}, | 1172 | {Opt_data_ordered, "data=ordered"}, |
1173 | {Opt_data_writeback, "data=writeback"}, | 1173 | {Opt_data_writeback, "data=writeback"}, |
1174 | {Opt_data_err_abort, "data_err=abort"}, | 1174 | {Opt_data_err_abort, "data_err=abort"}, |
1175 | {Opt_data_err_ignore, "data_err=ignore"}, | 1175 | {Opt_data_err_ignore, "data_err=ignore"}, |
1176 | {Opt_offusrjquota, "usrjquota="}, | 1176 | {Opt_offusrjquota, "usrjquota="}, |
1177 | {Opt_usrjquota, "usrjquota=%s"}, | 1177 | {Opt_usrjquota, "usrjquota=%s"}, |
1178 | {Opt_offgrpjquota, "grpjquota="}, | 1178 | {Opt_offgrpjquota, "grpjquota="}, |
1179 | {Opt_grpjquota, "grpjquota=%s"}, | 1179 | {Opt_grpjquota, "grpjquota=%s"}, |
1180 | {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, | 1180 | {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, |
1181 | {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, | 1181 | {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, |
1182 | {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, | 1182 | {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, |
1183 | {Opt_grpquota, "grpquota"}, | 1183 | {Opt_grpquota, "grpquota"}, |
1184 | {Opt_noquota, "noquota"}, | 1184 | {Opt_noquota, "noquota"}, |
1185 | {Opt_quota, "quota"}, | 1185 | {Opt_quota, "quota"}, |
1186 | {Opt_usrquota, "usrquota"}, | 1186 | {Opt_usrquota, "usrquota"}, |
1187 | {Opt_barrier, "barrier=%u"}, | 1187 | {Opt_barrier, "barrier=%u"}, |
1188 | {Opt_barrier, "barrier"}, | 1188 | {Opt_barrier, "barrier"}, |
1189 | {Opt_nobarrier, "nobarrier"}, | 1189 | {Opt_nobarrier, "nobarrier"}, |
1190 | {Opt_i_version, "i_version"}, | 1190 | {Opt_i_version, "i_version"}, |
1191 | {Opt_dax, "dax"}, | 1191 | {Opt_dax, "dax"}, |
1192 | {Opt_stripe, "stripe=%u"}, | 1192 | {Opt_stripe, "stripe=%u"}, |
1193 | {Opt_delalloc, "delalloc"}, | 1193 | {Opt_delalloc, "delalloc"}, |
1194 | {Opt_lazytime, "lazytime"}, | 1194 | {Opt_lazytime, "lazytime"}, |
1195 | {Opt_nolazytime, "nolazytime"}, | 1195 | {Opt_nolazytime, "nolazytime"}, |
1196 | {Opt_nodelalloc, "nodelalloc"}, | 1196 | {Opt_nodelalloc, "nodelalloc"}, |
1197 | {Opt_removed, "mblk_io_submit"}, | 1197 | {Opt_removed, "mblk_io_submit"}, |
1198 | {Opt_removed, "nomblk_io_submit"}, | 1198 | {Opt_removed, "nomblk_io_submit"}, |
1199 | {Opt_block_validity, "block_validity"}, | 1199 | {Opt_block_validity, "block_validity"}, |
1200 | {Opt_noblock_validity, "noblock_validity"}, | 1200 | {Opt_noblock_validity, "noblock_validity"}, |
1201 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | 1201 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, |
1202 | {Opt_journal_ioprio, "journal_ioprio=%u"}, | 1202 | {Opt_journal_ioprio, "journal_ioprio=%u"}, |
1203 | {Opt_auto_da_alloc, "auto_da_alloc=%u"}, | 1203 | {Opt_auto_da_alloc, "auto_da_alloc=%u"}, |
1204 | {Opt_auto_da_alloc, "auto_da_alloc"}, | 1204 | {Opt_auto_da_alloc, "auto_da_alloc"}, |
1205 | {Opt_noauto_da_alloc, "noauto_da_alloc"}, | 1205 | {Opt_noauto_da_alloc, "noauto_da_alloc"}, |
1206 | {Opt_dioread_nolock, "dioread_nolock"}, | 1206 | {Opt_dioread_nolock, "dioread_nolock"}, |
1207 | {Opt_dioread_lock, "dioread_lock"}, | 1207 | {Opt_dioread_lock, "dioread_lock"}, |
1208 | {Opt_discard, "discard"}, | 1208 | {Opt_discard, "discard"}, |
1209 | {Opt_nodiscard, "nodiscard"}, | 1209 | {Opt_nodiscard, "nodiscard"}, |
1210 | {Opt_init_itable, "init_itable=%u"}, | 1210 | {Opt_init_itable, "init_itable=%u"}, |
1211 | {Opt_init_itable, "init_itable"}, | 1211 | {Opt_init_itable, "init_itable"}, |
1212 | {Opt_noinit_itable, "noinit_itable"}, | 1212 | {Opt_noinit_itable, "noinit_itable"}, |
1213 | {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, | 1213 | {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, |
1214 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ | 1214 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ |
1215 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ | 1215 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ |
1216 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ | 1216 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ |
1217 | {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ | 1217 | {Opt_removed, "noreservation"}, /* mount option from ext2/3 */ |
1218 | {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ | 1218 | {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */ |
1219 | {Opt_err, NULL}, | 1219 | {Opt_err, NULL}, |
1220 | }; | 1220 | }; |
1221 | 1221 | ||
1222 | static ext4_fsblk_t get_sb_block(void **data) | 1222 | static ext4_fsblk_t get_sb_block(void **data) |
1223 | { | 1223 | { |
1224 | ext4_fsblk_t sb_block; | 1224 | ext4_fsblk_t sb_block; |
1225 | char *options = (char *) *data; | 1225 | char *options = (char *) *data; |
1226 | 1226 | ||
1227 | if (!options || strncmp(options, "sb=", 3) != 0) | 1227 | if (!options || strncmp(options, "sb=", 3) != 0) |
1228 | return 1; /* Default location */ | 1228 | return 1; /* Default location */ |
1229 | 1229 | ||
1230 | options += 3; | 1230 | options += 3; |
1231 | /* TODO: use simple_strtoll with >32bit ext4 */ | 1231 | /* TODO: use simple_strtoll with >32bit ext4 */ |
1232 | sb_block = simple_strtoul(options, &options, 0); | 1232 | sb_block = simple_strtoul(options, &options, 0); |
1233 | if (*options && *options != ',') { | 1233 | if (*options && *options != ',') { |
1234 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", | 1234 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", |
1235 | (char *) *data); | 1235 | (char *) *data); |
1236 | return 1; | 1236 | return 1; |
1237 | } | 1237 | } |
1238 | if (*options == ',') | 1238 | if (*options == ',') |
1239 | options++; | 1239 | options++; |
1240 | *data = (void *) options; | 1240 | *data = (void *) options; |
1241 | 1241 | ||
1242 | return sb_block; | 1242 | return sb_block; |
1243 | } | 1243 | } |
1244 | 1244 | ||
1245 | #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) | 1245 | #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) |
1246 | static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" | 1246 | static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" |
1247 | "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; | 1247 | "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; |
1248 | 1248 | ||
1249 | #ifdef CONFIG_QUOTA | 1249 | #ifdef CONFIG_QUOTA |
1250 | static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) | 1250 | static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) |
1251 | { | 1251 | { |
1252 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1252 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1253 | char *qname; | 1253 | char *qname; |
1254 | int ret = -1; | 1254 | int ret = -1; |
1255 | 1255 | ||
1256 | if (sb_any_quota_loaded(sb) && | 1256 | if (sb_any_quota_loaded(sb) && |
1257 | !sbi->s_qf_names[qtype]) { | 1257 | !sbi->s_qf_names[qtype]) { |
1258 | ext4_msg(sb, KERN_ERR, | 1258 | ext4_msg(sb, KERN_ERR, |
1259 | "Cannot change journaled " | 1259 | "Cannot change journaled " |
1260 | "quota options when quota turned on"); | 1260 | "quota options when quota turned on"); |
1261 | return -1; | 1261 | return -1; |
1262 | } | 1262 | } |
1263 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { | 1263 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { |
1264 | ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " | 1264 | ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " |
1265 | "when QUOTA feature is enabled"); | 1265 | "when QUOTA feature is enabled"); |
1266 | return -1; | 1266 | return -1; |
1267 | } | 1267 | } |
1268 | qname = match_strdup(args); | 1268 | qname = match_strdup(args); |
1269 | if (!qname) { | 1269 | if (!qname) { |
1270 | ext4_msg(sb, KERN_ERR, | 1270 | ext4_msg(sb, KERN_ERR, |
1271 | "Not enough memory for storing quotafile name"); | 1271 | "Not enough memory for storing quotafile name"); |
1272 | return -1; | 1272 | return -1; |
1273 | } | 1273 | } |
1274 | if (sbi->s_qf_names[qtype]) { | 1274 | if (sbi->s_qf_names[qtype]) { |
1275 | if (strcmp(sbi->s_qf_names[qtype], qname) == 0) | 1275 | if (strcmp(sbi->s_qf_names[qtype], qname) == 0) |
1276 | ret = 1; | 1276 | ret = 1; |
1277 | else | 1277 | else |
1278 | ext4_msg(sb, KERN_ERR, | 1278 | ext4_msg(sb, KERN_ERR, |
1279 | "%s quota file already specified", | 1279 | "%s quota file already specified", |
1280 | QTYPE2NAME(qtype)); | 1280 | QTYPE2NAME(qtype)); |
1281 | goto errout; | 1281 | goto errout; |
1282 | } | 1282 | } |
1283 | if (strchr(qname, '/')) { | 1283 | if (strchr(qname, '/')) { |
1284 | ext4_msg(sb, KERN_ERR, | 1284 | ext4_msg(sb, KERN_ERR, |
1285 | "quotafile must be on filesystem root"); | 1285 | "quotafile must be on filesystem root"); |
1286 | goto errout; | 1286 | goto errout; |
1287 | } | 1287 | } |
1288 | sbi->s_qf_names[qtype] = qname; | 1288 | sbi->s_qf_names[qtype] = qname; |
1289 | set_opt(sb, QUOTA); | 1289 | set_opt(sb, QUOTA); |
1290 | return 1; | 1290 | return 1; |
1291 | errout: | 1291 | errout: |
1292 | kfree(qname); | 1292 | kfree(qname); |
1293 | return ret; | 1293 | return ret; |
1294 | } | 1294 | } |
1295 | 1295 | ||
1296 | static int clear_qf_name(struct super_block *sb, int qtype) | 1296 | static int clear_qf_name(struct super_block *sb, int qtype) |
1297 | { | 1297 | { |
1298 | 1298 | ||
1299 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1299 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1300 | 1300 | ||
1301 | if (sb_any_quota_loaded(sb) && | 1301 | if (sb_any_quota_loaded(sb) && |
1302 | sbi->s_qf_names[qtype]) { | 1302 | sbi->s_qf_names[qtype]) { |
1303 | ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" | 1303 | ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" |
1304 | " when quota turned on"); | 1304 | " when quota turned on"); |
1305 | return -1; | 1305 | return -1; |
1306 | } | 1306 | } |
1307 | kfree(sbi->s_qf_names[qtype]); | 1307 | kfree(sbi->s_qf_names[qtype]); |
1308 | sbi->s_qf_names[qtype] = NULL; | 1308 | sbi->s_qf_names[qtype] = NULL; |
1309 | return 1; | 1309 | return 1; |
1310 | } | 1310 | } |
1311 | #endif | 1311 | #endif |
1312 | 1312 | ||
1313 | #define MOPT_SET 0x0001 | 1313 | #define MOPT_SET 0x0001 |
1314 | #define MOPT_CLEAR 0x0002 | 1314 | #define MOPT_CLEAR 0x0002 |
1315 | #define MOPT_NOSUPPORT 0x0004 | 1315 | #define MOPT_NOSUPPORT 0x0004 |
1316 | #define MOPT_EXPLICIT 0x0008 | 1316 | #define MOPT_EXPLICIT 0x0008 |
1317 | #define MOPT_CLEAR_ERR 0x0010 | 1317 | #define MOPT_CLEAR_ERR 0x0010 |
1318 | #define MOPT_GTE0 0x0020 | 1318 | #define MOPT_GTE0 0x0020 |
1319 | #ifdef CONFIG_QUOTA | 1319 | #ifdef CONFIG_QUOTA |
1320 | #define MOPT_Q 0 | 1320 | #define MOPT_Q 0 |
1321 | #define MOPT_QFMT 0x0040 | 1321 | #define MOPT_QFMT 0x0040 |
1322 | #else | 1322 | #else |
1323 | #define MOPT_Q MOPT_NOSUPPORT | 1323 | #define MOPT_Q MOPT_NOSUPPORT |
1324 | #define MOPT_QFMT MOPT_NOSUPPORT | 1324 | #define MOPT_QFMT MOPT_NOSUPPORT |
1325 | #endif | 1325 | #endif |
1326 | #define MOPT_DATAJ 0x0080 | 1326 | #define MOPT_DATAJ 0x0080 |
1327 | #define MOPT_NO_EXT2 0x0100 | 1327 | #define MOPT_NO_EXT2 0x0100 |
1328 | #define MOPT_NO_EXT3 0x0200 | 1328 | #define MOPT_NO_EXT3 0x0200 |
1329 | #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) | 1329 | #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) |
1330 | #define MOPT_STRING 0x0400 | 1330 | #define MOPT_STRING 0x0400 |
1331 | 1331 | ||
1332 | static const struct mount_opts { | 1332 | static const struct mount_opts { |
1333 | int token; | 1333 | int token; |
1334 | int mount_opt; | 1334 | int mount_opt; |
1335 | int flags; | 1335 | int flags; |
1336 | } ext4_mount_opts[] = { | 1336 | } ext4_mount_opts[] = { |
1337 | {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, | 1337 | {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, |
1338 | {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, | 1338 | {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, |
1339 | {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, | 1339 | {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, |
1340 | {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, | 1340 | {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, |
1341 | {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, | 1341 | {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, |
1342 | {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, | 1342 | {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, |
1343 | {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, | 1343 | {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, |
1344 | MOPT_EXT4_ONLY | MOPT_SET}, | 1344 | MOPT_EXT4_ONLY | MOPT_SET}, |
1345 | {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, | 1345 | {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, |
1346 | MOPT_EXT4_ONLY | MOPT_CLEAR}, | 1346 | MOPT_EXT4_ONLY | MOPT_CLEAR}, |
1347 | {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, | 1347 | {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, |
1348 | {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, | 1348 | {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, |
1349 | {Opt_delalloc, EXT4_MOUNT_DELALLOC, | 1349 | {Opt_delalloc, EXT4_MOUNT_DELALLOC, |
1350 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, | 1350 | MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, |
1351 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, | 1351 | {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, |
1352 | MOPT_EXT4_ONLY | MOPT_CLEAR}, | 1352 | MOPT_EXT4_ONLY | MOPT_CLEAR}, |
1353 | {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, | 1353 | {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, |
1354 | MOPT_EXT4_ONLY | MOPT_CLEAR}, | 1354 | MOPT_EXT4_ONLY | MOPT_CLEAR}, |
1355 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, | 1355 | {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, |
1356 | MOPT_EXT4_ONLY | MOPT_SET}, | 1356 | MOPT_EXT4_ONLY | MOPT_SET}, |
1357 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | | 1357 | {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | |
1358 | EXT4_MOUNT_JOURNAL_CHECKSUM), | 1358 | EXT4_MOUNT_JOURNAL_CHECKSUM), |
1359 | MOPT_EXT4_ONLY | MOPT_SET}, | 1359 | MOPT_EXT4_ONLY | MOPT_SET}, |
1360 | {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, | 1360 | {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, |
1361 | {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, | 1361 | {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, |
1362 | {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, | 1362 | {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, |
1363 | {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, | 1363 | {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, |
1364 | {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, | 1364 | {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, |
1365 | MOPT_NO_EXT2 | MOPT_SET}, | 1365 | MOPT_NO_EXT2 | MOPT_SET}, |
1366 | {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, | 1366 | {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, |
1367 | MOPT_NO_EXT2 | MOPT_CLEAR}, | 1367 | MOPT_NO_EXT2 | MOPT_CLEAR}, |
1368 | {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, | 1368 | {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, |
1369 | {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, | 1369 | {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, |
1370 | {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, | 1370 | {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, |
1371 | {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, | 1371 | {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, |
1372 | {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, | 1372 | {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, |
1373 | {Opt_commit, 0, MOPT_GTE0}, | 1373 | {Opt_commit, 0, MOPT_GTE0}, |
1374 | {Opt_max_batch_time, 0, MOPT_GTE0}, | 1374 | {Opt_max_batch_time, 0, MOPT_GTE0}, |
1375 | {Opt_min_batch_time, 0, MOPT_GTE0}, | 1375 | {Opt_min_batch_time, 0, MOPT_GTE0}, |
1376 | {Opt_inode_readahead_blks, 0, MOPT_GTE0}, | 1376 | {Opt_inode_readahead_blks, 0, MOPT_GTE0}, |
1377 | {Opt_init_itable, 0, MOPT_GTE0}, | 1377 | {Opt_init_itable, 0, MOPT_GTE0}, |
1378 | {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET}, | 1378 | {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET}, |
1379 | {Opt_stripe, 0, MOPT_GTE0}, | 1379 | {Opt_stripe, 0, MOPT_GTE0}, |
1380 | {Opt_resuid, 0, MOPT_GTE0}, | 1380 | {Opt_resuid, 0, MOPT_GTE0}, |
1381 | {Opt_resgid, 0, MOPT_GTE0}, | 1381 | {Opt_resgid, 0, MOPT_GTE0}, |
1382 | {Opt_journal_dev, 0, MOPT_GTE0}, | 1382 | {Opt_journal_dev, 0, MOPT_GTE0}, |
1383 | {Opt_journal_path, 0, MOPT_STRING}, | 1383 | {Opt_journal_path, 0, MOPT_STRING}, |
1384 | {Opt_journal_ioprio, 0, MOPT_GTE0}, | 1384 | {Opt_journal_ioprio, 0, MOPT_GTE0}, |
1385 | {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, | 1385 | {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, |
1386 | {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, | 1386 | {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, |
1387 | {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, | 1387 | {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, |
1388 | MOPT_NO_EXT2 | MOPT_DATAJ}, | 1388 | MOPT_NO_EXT2 | MOPT_DATAJ}, |
1389 | {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, | 1389 | {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, |
1390 | {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, | 1390 | {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, |
1391 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1391 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1392 | {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, | 1392 | {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, |
1393 | {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, | 1393 | {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, |
1394 | #else | 1394 | #else |
1395 | {Opt_acl, 0, MOPT_NOSUPPORT}, | 1395 | {Opt_acl, 0, MOPT_NOSUPPORT}, |
1396 | {Opt_noacl, 0, MOPT_NOSUPPORT}, | 1396 | {Opt_noacl, 0, MOPT_NOSUPPORT}, |
1397 | #endif | 1397 | #endif |
1398 | {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, | 1398 | {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, |
1399 | {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, | 1399 | {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, |
1400 | {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, | 1400 | {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, |
1401 | {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, | 1401 | {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, |
1402 | MOPT_SET | MOPT_Q}, | 1402 | MOPT_SET | MOPT_Q}, |
1403 | {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, | 1403 | {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, |
1404 | MOPT_SET | MOPT_Q}, | 1404 | MOPT_SET | MOPT_Q}, |
1405 | {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | | 1405 | {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | |
1406 | EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, | 1406 | EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, |
1407 | {Opt_usrjquota, 0, MOPT_Q}, | 1407 | {Opt_usrjquota, 0, MOPT_Q}, |
1408 | {Opt_grpjquota, 0, MOPT_Q}, | 1408 | {Opt_grpjquota, 0, MOPT_Q}, |
1409 | {Opt_offusrjquota, 0, MOPT_Q}, | 1409 | {Opt_offusrjquota, 0, MOPT_Q}, |
1410 | {Opt_offgrpjquota, 0, MOPT_Q}, | 1410 | {Opt_offgrpjquota, 0, MOPT_Q}, |
1411 | {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, | 1411 | {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, |
1412 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, | 1412 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, |
1413 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, | 1413 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, |
1414 | {Opt_max_dir_size_kb, 0, MOPT_GTE0}, | 1414 | {Opt_max_dir_size_kb, 0, MOPT_GTE0}, |
1415 | {Opt_err, 0, 0} | 1415 | {Opt_err, 0, 0} |
1416 | }; | 1416 | }; |
1417 | 1417 | ||
1418 | static int handle_mount_opt(struct super_block *sb, char *opt, int token, | 1418 | static int handle_mount_opt(struct super_block *sb, char *opt, int token, |
1419 | substring_t *args, unsigned long *journal_devnum, | 1419 | substring_t *args, unsigned long *journal_devnum, |
1420 | unsigned int *journal_ioprio, int is_remount) | 1420 | unsigned int *journal_ioprio, int is_remount) |
1421 | { | 1421 | { |
1422 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1422 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1423 | const struct mount_opts *m; | 1423 | const struct mount_opts *m; |
1424 | kuid_t uid; | 1424 | kuid_t uid; |
1425 | kgid_t gid; | 1425 | kgid_t gid; |
1426 | int arg = 0; | 1426 | int arg = 0; |
1427 | 1427 | ||
1428 | #ifdef CONFIG_QUOTA | 1428 | #ifdef CONFIG_QUOTA |
1429 | if (token == Opt_usrjquota) | 1429 | if (token == Opt_usrjquota) |
1430 | return set_qf_name(sb, USRQUOTA, &args[0]); | 1430 | return set_qf_name(sb, USRQUOTA, &args[0]); |
1431 | else if (token == Opt_grpjquota) | 1431 | else if (token == Opt_grpjquota) |
1432 | return set_qf_name(sb, GRPQUOTA, &args[0]); | 1432 | return set_qf_name(sb, GRPQUOTA, &args[0]); |
1433 | else if (token == Opt_offusrjquota) | 1433 | else if (token == Opt_offusrjquota) |
1434 | return clear_qf_name(sb, USRQUOTA); | 1434 | return clear_qf_name(sb, USRQUOTA); |
1435 | else if (token == Opt_offgrpjquota) | 1435 | else if (token == Opt_offgrpjquota) |
1436 | return clear_qf_name(sb, GRPQUOTA); | 1436 | return clear_qf_name(sb, GRPQUOTA); |
1437 | #endif | 1437 | #endif |
1438 | switch (token) { | 1438 | switch (token) { |
1439 | case Opt_noacl: | 1439 | case Opt_noacl: |
1440 | case Opt_nouser_xattr: | 1440 | case Opt_nouser_xattr: |
1441 | ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); | 1441 | ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); |
1442 | break; | 1442 | break; |
1443 | case Opt_sb: | 1443 | case Opt_sb: |
1444 | return 1; /* handled by get_sb_block() */ | 1444 | return 1; /* handled by get_sb_block() */ |
1445 | case Opt_removed: | 1445 | case Opt_removed: |
1446 | ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); | 1446 | ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); |
1447 | return 1; | 1447 | return 1; |
1448 | case Opt_abort: | 1448 | case Opt_abort: |
1449 | sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; | 1449 | sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; |
1450 | return 1; | 1450 | return 1; |
1451 | case Opt_i_version: | 1451 | case Opt_i_version: |
1452 | sb->s_flags |= MS_I_VERSION; | 1452 | sb->s_flags |= MS_I_VERSION; |
1453 | return 1; | 1453 | return 1; |
1454 | case Opt_lazytime: | 1454 | case Opt_lazytime: |
1455 | sb->s_flags |= MS_LAZYTIME; | 1455 | sb->s_flags |= MS_LAZYTIME; |
1456 | return 1; | 1456 | return 1; |
1457 | case Opt_nolazytime: | 1457 | case Opt_nolazytime: |
1458 | sb->s_flags &= ~MS_LAZYTIME; | 1458 | sb->s_flags &= ~MS_LAZYTIME; |
1459 | return 1; | 1459 | return 1; |
1460 | } | 1460 | } |
1461 | 1461 | ||
1462 | for (m = ext4_mount_opts; m->token != Opt_err; m++) | 1462 | for (m = ext4_mount_opts; m->token != Opt_err; m++) |
1463 | if (token == m->token) | 1463 | if (token == m->token) |
1464 | break; | 1464 | break; |
1465 | 1465 | ||
1466 | if (m->token == Opt_err) { | 1466 | if (m->token == Opt_err) { |
1467 | ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " | 1467 | ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " |
1468 | "or missing value", opt); | 1468 | "or missing value", opt); |
1469 | return -1; | 1469 | return -1; |
1470 | } | 1470 | } |
1471 | 1471 | ||
1472 | if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { | 1472 | if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { |
1473 | ext4_msg(sb, KERN_ERR, | 1473 | ext4_msg(sb, KERN_ERR, |
1474 | "Mount option \"%s\" incompatible with ext2", opt); | 1474 | "Mount option \"%s\" incompatible with ext2", opt); |
1475 | return -1; | 1475 | return -1; |
1476 | } | 1476 | } |
1477 | if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { | 1477 | if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { |
1478 | ext4_msg(sb, KERN_ERR, | 1478 | ext4_msg(sb, KERN_ERR, |
1479 | "Mount option \"%s\" incompatible with ext3", opt); | 1479 | "Mount option \"%s\" incompatible with ext3", opt); |
1480 | return -1; | 1480 | return -1; |
1481 | } | 1481 | } |
1482 | 1482 | ||
1483 | if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) | 1483 | if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) |
1484 | return -1; | 1484 | return -1; |
1485 | if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) | 1485 | if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) |
1486 | return -1; | 1486 | return -1; |
1487 | if (m->flags & MOPT_EXPLICIT) | 1487 | if (m->flags & MOPT_EXPLICIT) |
1488 | set_opt2(sb, EXPLICIT_DELALLOC); | 1488 | set_opt2(sb, EXPLICIT_DELALLOC); |
1489 | if (m->flags & MOPT_CLEAR_ERR) | 1489 | if (m->flags & MOPT_CLEAR_ERR) |
1490 | clear_opt(sb, ERRORS_MASK); | 1490 | clear_opt(sb, ERRORS_MASK); |
1491 | if (token == Opt_noquota && sb_any_quota_loaded(sb)) { | 1491 | if (token == Opt_noquota && sb_any_quota_loaded(sb)) { |
1492 | ext4_msg(sb, KERN_ERR, "Cannot change quota " | 1492 | ext4_msg(sb, KERN_ERR, "Cannot change quota " |
1493 | "options when quota turned on"); | 1493 | "options when quota turned on"); |
1494 | return -1; | 1494 | return -1; |
1495 | } | 1495 | } |
1496 | 1496 | ||
1497 | if (m->flags & MOPT_NOSUPPORT) { | 1497 | if (m->flags & MOPT_NOSUPPORT) { |
1498 | ext4_msg(sb, KERN_ERR, "%s option not supported", opt); | 1498 | ext4_msg(sb, KERN_ERR, "%s option not supported", opt); |
1499 | } else if (token == Opt_commit) { | 1499 | } else if (token == Opt_commit) { |
1500 | if (arg == 0) | 1500 | if (arg == 0) |
1501 | arg = JBD2_DEFAULT_MAX_COMMIT_AGE; | 1501 | arg = JBD2_DEFAULT_MAX_COMMIT_AGE; |
1502 | sbi->s_commit_interval = HZ * arg; | 1502 | sbi->s_commit_interval = HZ * arg; |
1503 | } else if (token == Opt_max_batch_time) { | 1503 | } else if (token == Opt_max_batch_time) { |
1504 | sbi->s_max_batch_time = arg; | 1504 | sbi->s_max_batch_time = arg; |
1505 | } else if (token == Opt_min_batch_time) { | 1505 | } else if (token == Opt_min_batch_time) { |
1506 | sbi->s_min_batch_time = arg; | 1506 | sbi->s_min_batch_time = arg; |
1507 | } else if (token == Opt_inode_readahead_blks) { | 1507 | } else if (token == Opt_inode_readahead_blks) { |
1508 | if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) { | 1508 | if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) { |
1509 | ext4_msg(sb, KERN_ERR, | 1509 | ext4_msg(sb, KERN_ERR, |
1510 | "EXT4-fs: inode_readahead_blks must be " | 1510 | "EXT4-fs: inode_readahead_blks must be " |
1511 | "0 or a power of 2 smaller than 2^31"); | 1511 | "0 or a power of 2 smaller than 2^31"); |
1512 | return -1; | 1512 | return -1; |
1513 | } | 1513 | } |
1514 | sbi->s_inode_readahead_blks = arg; | 1514 | sbi->s_inode_readahead_blks = arg; |
1515 | } else if (token == Opt_init_itable) { | 1515 | } else if (token == Opt_init_itable) { |
1516 | set_opt(sb, INIT_INODE_TABLE); | 1516 | set_opt(sb, INIT_INODE_TABLE); |
1517 | if (!args->from) | 1517 | if (!args->from) |
1518 | arg = EXT4_DEF_LI_WAIT_MULT; | 1518 | arg = EXT4_DEF_LI_WAIT_MULT; |
1519 | sbi->s_li_wait_mult = arg; | 1519 | sbi->s_li_wait_mult = arg; |
1520 | } else if (token == Opt_max_dir_size_kb) { | 1520 | } else if (token == Opt_max_dir_size_kb) { |
1521 | sbi->s_max_dir_size_kb = arg; | 1521 | sbi->s_max_dir_size_kb = arg; |
1522 | } else if (token == Opt_stripe) { | 1522 | } else if (token == Opt_stripe) { |
1523 | sbi->s_stripe = arg; | 1523 | sbi->s_stripe = arg; |
1524 | } else if (token == Opt_resuid) { | 1524 | } else if (token == Opt_resuid) { |
1525 | uid = make_kuid(current_user_ns(), arg); | 1525 | uid = make_kuid(current_user_ns(), arg); |
1526 | if (!uid_valid(uid)) { | 1526 | if (!uid_valid(uid)) { |
1527 | ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); | 1527 | ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); |
1528 | return -1; | 1528 | return -1; |
1529 | } | 1529 | } |
1530 | sbi->s_resuid = uid; | 1530 | sbi->s_resuid = uid; |
1531 | } else if (token == Opt_resgid) { | 1531 | } else if (token == Opt_resgid) { |
1532 | gid = make_kgid(current_user_ns(), arg); | 1532 | gid = make_kgid(current_user_ns(), arg); |
1533 | if (!gid_valid(gid)) { | 1533 | if (!gid_valid(gid)) { |
1534 | ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); | 1534 | ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); |
1535 | return -1; | 1535 | return -1; |
1536 | } | 1536 | } |
1537 | sbi->s_resgid = gid; | 1537 | sbi->s_resgid = gid; |
1538 | } else if (token == Opt_journal_dev) { | 1538 | } else if (token == Opt_journal_dev) { |
1539 | if (is_remount) { | 1539 | if (is_remount) { |
1540 | ext4_msg(sb, KERN_ERR, | 1540 | ext4_msg(sb, KERN_ERR, |
1541 | "Cannot specify journal on remount"); | 1541 | "Cannot specify journal on remount"); |
1542 | return -1; | 1542 | return -1; |
1543 | } | 1543 | } |
1544 | *journal_devnum = arg; | 1544 | *journal_devnum = arg; |
1545 | } else if (token == Opt_journal_path) { | 1545 | } else if (token == Opt_journal_path) { |
1546 | char *journal_path; | 1546 | char *journal_path; |
1547 | struct inode *journal_inode; | 1547 | struct inode *journal_inode; |
1548 | struct path path; | 1548 | struct path path; |
1549 | int error; | 1549 | int error; |
1550 | 1550 | ||
1551 | if (is_remount) { | 1551 | if (is_remount) { |
1552 | ext4_msg(sb, KERN_ERR, | 1552 | ext4_msg(sb, KERN_ERR, |
1553 | "Cannot specify journal on remount"); | 1553 | "Cannot specify journal on remount"); |
1554 | return -1; | 1554 | return -1; |
1555 | } | 1555 | } |
1556 | journal_path = match_strdup(&args[0]); | 1556 | journal_path = match_strdup(&args[0]); |
1557 | if (!journal_path) { | 1557 | if (!journal_path) { |
1558 | ext4_msg(sb, KERN_ERR, "error: could not dup " | 1558 | ext4_msg(sb, KERN_ERR, "error: could not dup " |
1559 | "journal device string"); | 1559 | "journal device string"); |
1560 | return -1; | 1560 | return -1; |
1561 | } | 1561 | } |
1562 | 1562 | ||
1563 | error = kern_path(journal_path, LOOKUP_FOLLOW, &path); | 1563 | error = kern_path(journal_path, LOOKUP_FOLLOW, &path); |
1564 | if (error) { | 1564 | if (error) { |
1565 | ext4_msg(sb, KERN_ERR, "error: could not find " | 1565 | ext4_msg(sb, KERN_ERR, "error: could not find " |
1566 | "journal device path: error %d", error); | 1566 | "journal device path: error %d", error); |
1567 | kfree(journal_path); | 1567 | kfree(journal_path); |
1568 | return -1; | 1568 | return -1; |
1569 | } | 1569 | } |
1570 | 1570 | ||
1571 | journal_inode = path.dentry->d_inode; | 1571 | journal_inode = path.dentry->d_inode; |
1572 | if (!S_ISBLK(journal_inode->i_mode)) { | 1572 | if (!S_ISBLK(journal_inode->i_mode)) { |
1573 | ext4_msg(sb, KERN_ERR, "error: journal path %s " | 1573 | ext4_msg(sb, KERN_ERR, "error: journal path %s " |
1574 | "is not a block device", journal_path); | 1574 | "is not a block device", journal_path); |
1575 | path_put(&path); | 1575 | path_put(&path); |
1576 | kfree(journal_path); | 1576 | kfree(journal_path); |
1577 | return -1; | 1577 | return -1; |
1578 | } | 1578 | } |
1579 | 1579 | ||
1580 | *journal_devnum = new_encode_dev(journal_inode->i_rdev); | 1580 | *journal_devnum = new_encode_dev(journal_inode->i_rdev); |
1581 | path_put(&path); | 1581 | path_put(&path); |
1582 | kfree(journal_path); | 1582 | kfree(journal_path); |
1583 | } else if (token == Opt_journal_ioprio) { | 1583 | } else if (token == Opt_journal_ioprio) { |
1584 | if (arg > 7) { | 1584 | if (arg > 7) { |
1585 | ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" | 1585 | ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" |
1586 | " (must be 0-7)"); | 1586 | " (must be 0-7)"); |
1587 | return -1; | 1587 | return -1; |
1588 | } | 1588 | } |
1589 | *journal_ioprio = | 1589 | *journal_ioprio = |
1590 | IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); | 1590 | IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); |
1591 | } else if (m->flags & MOPT_DATAJ) { | 1591 | } else if (m->flags & MOPT_DATAJ) { |
1592 | if (is_remount) { | 1592 | if (is_remount) { |
1593 | if (!sbi->s_journal) | 1593 | if (!sbi->s_journal) |
1594 | ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); | 1594 | ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); |
1595 | else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) { | 1595 | else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) { |
1596 | ext4_msg(sb, KERN_ERR, | 1596 | ext4_msg(sb, KERN_ERR, |
1597 | "Cannot change data mode on remount"); | 1597 | "Cannot change data mode on remount"); |
1598 | return -1; | 1598 | return -1; |
1599 | } | 1599 | } |
1600 | } else { | 1600 | } else { |
1601 | clear_opt(sb, DATA_FLAGS); | 1601 | clear_opt(sb, DATA_FLAGS); |
1602 | sbi->s_mount_opt |= m->mount_opt; | 1602 | sbi->s_mount_opt |= m->mount_opt; |
1603 | } | 1603 | } |
1604 | #ifdef CONFIG_QUOTA | 1604 | #ifdef CONFIG_QUOTA |
1605 | } else if (m->flags & MOPT_QFMT) { | 1605 | } else if (m->flags & MOPT_QFMT) { |
1606 | if (sb_any_quota_loaded(sb) && | 1606 | if (sb_any_quota_loaded(sb) && |
1607 | sbi->s_jquota_fmt != m->mount_opt) { | 1607 | sbi->s_jquota_fmt != m->mount_opt) { |
1608 | ext4_msg(sb, KERN_ERR, "Cannot change journaled " | 1608 | ext4_msg(sb, KERN_ERR, "Cannot change journaled " |
1609 | "quota options when quota turned on"); | 1609 | "quota options when quota turned on"); |
1610 | return -1; | 1610 | return -1; |
1611 | } | 1611 | } |
1612 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1612 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
1613 | EXT4_FEATURE_RO_COMPAT_QUOTA)) { | 1613 | EXT4_FEATURE_RO_COMPAT_QUOTA)) { |
1614 | ext4_msg(sb, KERN_ERR, | 1614 | ext4_msg(sb, KERN_ERR, |
1615 | "Cannot set journaled quota options " | 1615 | "Cannot set journaled quota options " |
1616 | "when QUOTA feature is enabled"); | 1616 | "when QUOTA feature is enabled"); |
1617 | return -1; | 1617 | return -1; |
1618 | } | 1618 | } |
1619 | sbi->s_jquota_fmt = m->mount_opt; | 1619 | sbi->s_jquota_fmt = m->mount_opt; |
1620 | #endif | 1620 | #endif |
1621 | #ifndef CONFIG_FS_DAX | 1621 | #ifndef CONFIG_FS_DAX |
1622 | } else if (token == Opt_dax) { | 1622 | } else if (token == Opt_dax) { |
1623 | ext4_msg(sb, KERN_INFO, "dax option not supported"); | 1623 | ext4_msg(sb, KERN_INFO, "dax option not supported"); |
1624 | return -1; | 1624 | return -1; |
1625 | #endif | 1625 | #endif |
1626 | } else { | 1626 | } else { |
1627 | if (!args->from) | 1627 | if (!args->from) |
1628 | arg = 1; | 1628 | arg = 1; |
1629 | if (m->flags & MOPT_CLEAR) | 1629 | if (m->flags & MOPT_CLEAR) |
1630 | arg = !arg; | 1630 | arg = !arg; |
1631 | else if (unlikely(!(m->flags & MOPT_SET))) { | 1631 | else if (unlikely(!(m->flags & MOPT_SET))) { |
1632 | ext4_msg(sb, KERN_WARNING, | 1632 | ext4_msg(sb, KERN_WARNING, |
1633 | "buggy handling of option %s", opt); | 1633 | "buggy handling of option %s", opt); |
1634 | WARN_ON(1); | 1634 | WARN_ON(1); |
1635 | return -1; | 1635 | return -1; |
1636 | } | 1636 | } |
1637 | if (arg != 0) | 1637 | if (arg != 0) |
1638 | sbi->s_mount_opt |= m->mount_opt; | 1638 | sbi->s_mount_opt |= m->mount_opt; |
1639 | else | 1639 | else |
1640 | sbi->s_mount_opt &= ~m->mount_opt; | 1640 | sbi->s_mount_opt &= ~m->mount_opt; |
1641 | } | 1641 | } |
1642 | return 1; | 1642 | return 1; |
1643 | } | 1643 | } |
1644 | 1644 | ||
1645 | static int parse_options(char *options, struct super_block *sb, | 1645 | static int parse_options(char *options, struct super_block *sb, |
1646 | unsigned long *journal_devnum, | 1646 | unsigned long *journal_devnum, |
1647 | unsigned int *journal_ioprio, | 1647 | unsigned int *journal_ioprio, |
1648 | int is_remount) | 1648 | int is_remount) |
1649 | { | 1649 | { |
1650 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1650 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1651 | char *p; | 1651 | char *p; |
1652 | substring_t args[MAX_OPT_ARGS]; | 1652 | substring_t args[MAX_OPT_ARGS]; |
1653 | int token; | 1653 | int token; |
1654 | 1654 | ||
1655 | if (!options) | 1655 | if (!options) |
1656 | return 1; | 1656 | return 1; |
1657 | 1657 | ||
1658 | while ((p = strsep(&options, ",")) != NULL) { | 1658 | while ((p = strsep(&options, ",")) != NULL) { |
1659 | if (!*p) | 1659 | if (!*p) |
1660 | continue; | 1660 | continue; |
1661 | /* | 1661 | /* |
1662 | * Initialize args struct so we know whether arg was | 1662 | * Initialize args struct so we know whether arg was |
1663 | * found; some options take optional arguments. | 1663 | * found; some options take optional arguments. |
1664 | */ | 1664 | */ |
1665 | args[0].to = args[0].from = NULL; | 1665 | args[0].to = args[0].from = NULL; |
1666 | token = match_token(p, tokens, args); | 1666 | token = match_token(p, tokens, args); |
1667 | if (handle_mount_opt(sb, p, token, args, journal_devnum, | 1667 | if (handle_mount_opt(sb, p, token, args, journal_devnum, |
1668 | journal_ioprio, is_remount) < 0) | 1668 | journal_ioprio, is_remount) < 0) |
1669 | return 0; | 1669 | return 0; |
1670 | } | 1670 | } |
1671 | #ifdef CONFIG_QUOTA | 1671 | #ifdef CONFIG_QUOTA |
1672 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && | 1672 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && |
1673 | (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { | 1673 | (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { |
1674 | ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " | 1674 | ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " |
1675 | "feature is enabled"); | 1675 | "feature is enabled"); |
1676 | return 0; | 1676 | return 0; |
1677 | } | 1677 | } |
1678 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { | 1678 | if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { |
1679 | if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) | 1679 | if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) |
1680 | clear_opt(sb, USRQUOTA); | 1680 | clear_opt(sb, USRQUOTA); |
1681 | 1681 | ||
1682 | if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) | 1682 | if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) |
1683 | clear_opt(sb, GRPQUOTA); | 1683 | clear_opt(sb, GRPQUOTA); |
1684 | 1684 | ||
1685 | if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { | 1685 | if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { |
1686 | ext4_msg(sb, KERN_ERR, "old and new quota " | 1686 | ext4_msg(sb, KERN_ERR, "old and new quota " |
1687 | "format mixing"); | 1687 | "format mixing"); |
1688 | return 0; | 1688 | return 0; |
1689 | } | 1689 | } |
1690 | 1690 | ||
1691 | if (!sbi->s_jquota_fmt) { | 1691 | if (!sbi->s_jquota_fmt) { |
1692 | ext4_msg(sb, KERN_ERR, "journaled quota format " | 1692 | ext4_msg(sb, KERN_ERR, "journaled quota format " |
1693 | "not specified"); | 1693 | "not specified"); |
1694 | return 0; | 1694 | return 0; |
1695 | } | 1695 | } |
1696 | } | 1696 | } |
1697 | #endif | 1697 | #endif |
1698 | if (test_opt(sb, DIOREAD_NOLOCK)) { | 1698 | if (test_opt(sb, DIOREAD_NOLOCK)) { |
1699 | int blocksize = | 1699 | int blocksize = |
1700 | BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); | 1700 | BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); |
1701 | 1701 | ||
1702 | if (blocksize < PAGE_CACHE_SIZE) { | 1702 | if (blocksize < PAGE_CACHE_SIZE) { |
1703 | ext4_msg(sb, KERN_ERR, "can't mount with " | 1703 | ext4_msg(sb, KERN_ERR, "can't mount with " |
1704 | "dioread_nolock if block size != PAGE_SIZE"); | 1704 | "dioread_nolock if block size != PAGE_SIZE"); |
1705 | return 0; | 1705 | return 0; |
1706 | } | 1706 | } |
1707 | } | 1707 | } |
1708 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && | 1708 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && |
1709 | test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 1709 | test_opt(sb, JOURNAL_ASYNC_COMMIT)) { |
1710 | ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit " | 1710 | ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit " |
1711 | "in data=ordered mode"); | 1711 | "in data=ordered mode"); |
1712 | return 0; | 1712 | return 0; |
1713 | } | 1713 | } |
1714 | return 1; | 1714 | return 1; |
1715 | } | 1715 | } |
1716 | 1716 | ||
1717 | static inline void ext4_show_quota_options(struct seq_file *seq, | 1717 | static inline void ext4_show_quota_options(struct seq_file *seq, |
1718 | struct super_block *sb) | 1718 | struct super_block *sb) |
1719 | { | 1719 | { |
1720 | #if defined(CONFIG_QUOTA) | 1720 | #if defined(CONFIG_QUOTA) |
1721 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1721 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1722 | 1722 | ||
1723 | if (sbi->s_jquota_fmt) { | 1723 | if (sbi->s_jquota_fmt) { |
1724 | char *fmtname = ""; | 1724 | char *fmtname = ""; |
1725 | 1725 | ||
1726 | switch (sbi->s_jquota_fmt) { | 1726 | switch (sbi->s_jquota_fmt) { |
1727 | case QFMT_VFS_OLD: | 1727 | case QFMT_VFS_OLD: |
1728 | fmtname = "vfsold"; | 1728 | fmtname = "vfsold"; |
1729 | break; | 1729 | break; |
1730 | case QFMT_VFS_V0: | 1730 | case QFMT_VFS_V0: |
1731 | fmtname = "vfsv0"; | 1731 | fmtname = "vfsv0"; |
1732 | break; | 1732 | break; |
1733 | case QFMT_VFS_V1: | 1733 | case QFMT_VFS_V1: |
1734 | fmtname = "vfsv1"; | 1734 | fmtname = "vfsv1"; |
1735 | break; | 1735 | break; |
1736 | } | 1736 | } |
1737 | seq_printf(seq, ",jqfmt=%s", fmtname); | 1737 | seq_printf(seq, ",jqfmt=%s", fmtname); |
1738 | } | 1738 | } |
1739 | 1739 | ||
1740 | if (sbi->s_qf_names[USRQUOTA]) | 1740 | if (sbi->s_qf_names[USRQUOTA]) |
1741 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | 1741 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); |
1742 | 1742 | ||
1743 | if (sbi->s_qf_names[GRPQUOTA]) | 1743 | if (sbi->s_qf_names[GRPQUOTA]) |
1744 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); | 1744 | seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); |
1745 | #endif | 1745 | #endif |
1746 | } | 1746 | } |
1747 | 1747 | ||
1748 | static const char *token2str(int token) | 1748 | static const char *token2str(int token) |
1749 | { | 1749 | { |
1750 | const struct match_token *t; | 1750 | const struct match_token *t; |
1751 | 1751 | ||
1752 | for (t = tokens; t->token != Opt_err; t++) | 1752 | for (t = tokens; t->token != Opt_err; t++) |
1753 | if (t->token == token && !strchr(t->pattern, '=')) | 1753 | if (t->token == token && !strchr(t->pattern, '=')) |
1754 | break; | 1754 | break; |
1755 | return t->pattern; | 1755 | return t->pattern; |
1756 | } | 1756 | } |
1757 | 1757 | ||
1758 | /* | 1758 | /* |
1759 | * Show an option if | 1759 | * Show an option if |
1760 | * - it's set to a non-default value OR | 1760 | * - it's set to a non-default value OR |
1761 | * - if the per-sb default is different from the global default | 1761 | * - if the per-sb default is different from the global default |
1762 | */ | 1762 | */ |
1763 | static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, | 1763 | static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, |
1764 | int nodefs) | 1764 | int nodefs) |
1765 | { | 1765 | { |
1766 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1766 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1767 | struct ext4_super_block *es = sbi->s_es; | 1767 | struct ext4_super_block *es = sbi->s_es; |
1768 | int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; | 1768 | int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; |
1769 | const struct mount_opts *m; | 1769 | const struct mount_opts *m; |
1770 | char sep = nodefs ? '\n' : ','; | 1770 | char sep = nodefs ? '\n' : ','; |
1771 | 1771 | ||
1772 | #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) | 1772 | #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) |
1773 | #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) | 1773 | #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) |
1774 | 1774 | ||
1775 | if (sbi->s_sb_block != 1) | 1775 | if (sbi->s_sb_block != 1) |
1776 | SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); | 1776 | SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); |
1777 | 1777 | ||
1778 | for (m = ext4_mount_opts; m->token != Opt_err; m++) { | 1778 | for (m = ext4_mount_opts; m->token != Opt_err; m++) { |
1779 | int want_set = m->flags & MOPT_SET; | 1779 | int want_set = m->flags & MOPT_SET; |
1780 | if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || | 1780 | if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || |
1781 | (m->flags & MOPT_CLEAR_ERR)) | 1781 | (m->flags & MOPT_CLEAR_ERR)) |
1782 | continue; | 1782 | continue; |
1783 | if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) | 1783 | if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) |
1784 | continue; /* skip if same as the default */ | 1784 | continue; /* skip if same as the default */ |
1785 | if ((want_set && | 1785 | if ((want_set && |
1786 | (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || | 1786 | (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || |
1787 | (!want_set && (sbi->s_mount_opt & m->mount_opt))) | 1787 | (!want_set && (sbi->s_mount_opt & m->mount_opt))) |
1788 | continue; /* select Opt_noFoo vs Opt_Foo */ | 1788 | continue; /* select Opt_noFoo vs Opt_Foo */ |
1789 | SEQ_OPTS_PRINT("%s", token2str(m->token)); | 1789 | SEQ_OPTS_PRINT("%s", token2str(m->token)); |
1790 | } | 1790 | } |
1791 | 1791 | ||
1792 | if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) || | 1792 | if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) || |
1793 | le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) | 1793 | le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) |
1794 | SEQ_OPTS_PRINT("resuid=%u", | 1794 | SEQ_OPTS_PRINT("resuid=%u", |
1795 | from_kuid_munged(&init_user_ns, sbi->s_resuid)); | 1795 | from_kuid_munged(&init_user_ns, sbi->s_resuid)); |
1796 | if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) || | 1796 | if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) || |
1797 | le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) | 1797 | le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) |
1798 | SEQ_OPTS_PRINT("resgid=%u", | 1798 | SEQ_OPTS_PRINT("resgid=%u", |
1799 | from_kgid_munged(&init_user_ns, sbi->s_resgid)); | 1799 | from_kgid_munged(&init_user_ns, sbi->s_resgid)); |
1800 | def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); | 1800 | def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); |
1801 | if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) | 1801 | if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) |
1802 | SEQ_OPTS_PUTS("errors=remount-ro"); | 1802 | SEQ_OPTS_PUTS("errors=remount-ro"); |
1803 | if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) | 1803 | if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) |
1804 | SEQ_OPTS_PUTS("errors=continue"); | 1804 | SEQ_OPTS_PUTS("errors=continue"); |
1805 | if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) | 1805 | if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) |
1806 | SEQ_OPTS_PUTS("errors=panic"); | 1806 | SEQ_OPTS_PUTS("errors=panic"); |
1807 | if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) | 1807 | if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) |
1808 | SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); | 1808 | SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); |
1809 | if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) | 1809 | if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) |
1810 | SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); | 1810 | SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); |
1811 | if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) | 1811 | if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) |
1812 | SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); | 1812 | SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); |
1813 | if (sb->s_flags & MS_I_VERSION) | 1813 | if (sb->s_flags & MS_I_VERSION) |
1814 | SEQ_OPTS_PUTS("i_version"); | 1814 | SEQ_OPTS_PUTS("i_version"); |
1815 | if (nodefs || sbi->s_stripe) | 1815 | if (nodefs || sbi->s_stripe) |
1816 | SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); | 1816 | SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); |
1817 | if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { | 1817 | if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { |
1818 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | 1818 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) |
1819 | SEQ_OPTS_PUTS("data=journal"); | 1819 | SEQ_OPTS_PUTS("data=journal"); |
1820 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | 1820 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) |
1821 | SEQ_OPTS_PUTS("data=ordered"); | 1821 | SEQ_OPTS_PUTS("data=ordered"); |
1822 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 1822 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
1823 | SEQ_OPTS_PUTS("data=writeback"); | 1823 | SEQ_OPTS_PUTS("data=writeback"); |
1824 | } | 1824 | } |
1825 | if (nodefs || | 1825 | if (nodefs || |
1826 | sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | 1826 | sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) |
1827 | SEQ_OPTS_PRINT("inode_readahead_blks=%u", | 1827 | SEQ_OPTS_PRINT("inode_readahead_blks=%u", |
1828 | sbi->s_inode_readahead_blks); | 1828 | sbi->s_inode_readahead_blks); |
1829 | 1829 | ||
1830 | if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && | 1830 | if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && |
1831 | (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) | 1831 | (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) |
1832 | SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); | 1832 | SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); |
1833 | if (nodefs || sbi->s_max_dir_size_kb) | 1833 | if (nodefs || sbi->s_max_dir_size_kb) |
1834 | SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); | 1834 | SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); |
1835 | 1835 | ||
1836 | ext4_show_quota_options(seq, sb); | 1836 | ext4_show_quota_options(seq, sb); |
1837 | return 0; | 1837 | return 0; |
1838 | } | 1838 | } |
1839 | 1839 | ||
1840 | static int ext4_show_options(struct seq_file *seq, struct dentry *root) | 1840 | static int ext4_show_options(struct seq_file *seq, struct dentry *root) |
1841 | { | 1841 | { |
1842 | return _ext4_show_options(seq, root->d_sb, 0); | 1842 | return _ext4_show_options(seq, root->d_sb, 0); |
1843 | } | 1843 | } |
1844 | 1844 | ||
1845 | static int options_seq_show(struct seq_file *seq, void *offset) | 1845 | static int options_seq_show(struct seq_file *seq, void *offset) |
1846 | { | 1846 | { |
1847 | struct super_block *sb = seq->private; | 1847 | struct super_block *sb = seq->private; |
1848 | int rc; | 1848 | int rc; |
1849 | 1849 | ||
1850 | seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); | 1850 | seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); |
1851 | rc = _ext4_show_options(seq, sb, 1); | 1851 | rc = _ext4_show_options(seq, sb, 1); |
1852 | seq_puts(seq, "\n"); | 1852 | seq_puts(seq, "\n"); |
1853 | return rc; | 1853 | return rc; |
1854 | } | 1854 | } |
1855 | 1855 | ||
1856 | static int options_open_fs(struct inode *inode, struct file *file) | 1856 | static int options_open_fs(struct inode *inode, struct file *file) |
1857 | { | 1857 | { |
1858 | return single_open(file, options_seq_show, PDE_DATA(inode)); | 1858 | return single_open(file, options_seq_show, PDE_DATA(inode)); |
1859 | } | 1859 | } |
1860 | 1860 | ||
1861 | static const struct file_operations ext4_seq_options_fops = { | 1861 | static const struct file_operations ext4_seq_options_fops = { |
1862 | .owner = THIS_MODULE, | 1862 | .owner = THIS_MODULE, |
1863 | .open = options_open_fs, | 1863 | .open = options_open_fs, |
1864 | .read = seq_read, | 1864 | .read = seq_read, |
1865 | .llseek = seq_lseek, | 1865 | .llseek = seq_lseek, |
1866 | .release = single_release, | 1866 | .release = single_release, |
1867 | }; | 1867 | }; |
1868 | 1868 | ||
1869 | static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | 1869 | static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, |
1870 | int read_only) | 1870 | int read_only) |
1871 | { | 1871 | { |
1872 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1872 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1873 | int res = 0; | 1873 | int res = 0; |
1874 | 1874 | ||
1875 | if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { | 1875 | if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { |
1876 | ext4_msg(sb, KERN_ERR, "revision level too high, " | 1876 | ext4_msg(sb, KERN_ERR, "revision level too high, " |
1877 | "forcing read-only mode"); | 1877 | "forcing read-only mode"); |
1878 | res = MS_RDONLY; | 1878 | res = MS_RDONLY; |
1879 | } | 1879 | } |
1880 | if (read_only) | 1880 | if (read_only) |
1881 | goto done; | 1881 | goto done; |
1882 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) | 1882 | if (!(sbi->s_mount_state & EXT4_VALID_FS)) |
1883 | ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " | 1883 | ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " |
1884 | "running e2fsck is recommended"); | 1884 | "running e2fsck is recommended"); |
1885 | else if (sbi->s_mount_state & EXT4_ERROR_FS) | 1885 | else if (sbi->s_mount_state & EXT4_ERROR_FS) |
1886 | ext4_msg(sb, KERN_WARNING, | 1886 | ext4_msg(sb, KERN_WARNING, |
1887 | "warning: mounting fs with errors, " | 1887 | "warning: mounting fs with errors, " |
1888 | "running e2fsck is recommended"); | 1888 | "running e2fsck is recommended"); |
1889 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && | 1889 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && |
1890 | le16_to_cpu(es->s_mnt_count) >= | 1890 | le16_to_cpu(es->s_mnt_count) >= |
1891 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) | 1891 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) |
1892 | ext4_msg(sb, KERN_WARNING, | 1892 | ext4_msg(sb, KERN_WARNING, |
1893 | "warning: maximal mount count reached, " | 1893 | "warning: maximal mount count reached, " |
1894 | "running e2fsck is recommended"); | 1894 | "running e2fsck is recommended"); |
1895 | else if (le32_to_cpu(es->s_checkinterval) && | 1895 | else if (le32_to_cpu(es->s_checkinterval) && |
1896 | (le32_to_cpu(es->s_lastcheck) + | 1896 | (le32_to_cpu(es->s_lastcheck) + |
1897 | le32_to_cpu(es->s_checkinterval) <= get_seconds())) | 1897 | le32_to_cpu(es->s_checkinterval) <= get_seconds())) |
1898 | ext4_msg(sb, KERN_WARNING, | 1898 | ext4_msg(sb, KERN_WARNING, |
1899 | "warning: checktime reached, " | 1899 | "warning: checktime reached, " |
1900 | "running e2fsck is recommended"); | 1900 | "running e2fsck is recommended"); |
1901 | if (!sbi->s_journal) | 1901 | if (!sbi->s_journal) |
1902 | es->s_state &= cpu_to_le16(~EXT4_VALID_FS); | 1902 | es->s_state &= cpu_to_le16(~EXT4_VALID_FS); |
1903 | if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) | 1903 | if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) |
1904 | es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); | 1904 | es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); |
1905 | le16_add_cpu(&es->s_mnt_count, 1); | 1905 | le16_add_cpu(&es->s_mnt_count, 1); |
1906 | es->s_mtime = cpu_to_le32(get_seconds()); | 1906 | es->s_mtime = cpu_to_le32(get_seconds()); |
1907 | ext4_update_dynamic_rev(sb); | 1907 | ext4_update_dynamic_rev(sb); |
1908 | if (sbi->s_journal) | 1908 | if (sbi->s_journal) |
1909 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 1909 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
1910 | 1910 | ||
1911 | ext4_commit_super(sb, 1); | 1911 | ext4_commit_super(sb, 1); |
1912 | done: | 1912 | done: |
1913 | if (test_opt(sb, DEBUG)) | 1913 | if (test_opt(sb, DEBUG)) |
1914 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " | 1914 | printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " |
1915 | "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", | 1915 | "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", |
1916 | sb->s_blocksize, | 1916 | sb->s_blocksize, |
1917 | sbi->s_groups_count, | 1917 | sbi->s_groups_count, |
1918 | EXT4_BLOCKS_PER_GROUP(sb), | 1918 | EXT4_BLOCKS_PER_GROUP(sb), |
1919 | EXT4_INODES_PER_GROUP(sb), | 1919 | EXT4_INODES_PER_GROUP(sb), |
1920 | sbi->s_mount_opt, sbi->s_mount_opt2); | 1920 | sbi->s_mount_opt, sbi->s_mount_opt2); |
1921 | 1921 | ||
1922 | cleancache_init_fs(sb); | 1922 | cleancache_init_fs(sb); |
1923 | return res; | 1923 | return res; |
1924 | } | 1924 | } |
1925 | 1925 | ||
1926 | int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) | 1926 | int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) |
1927 | { | 1927 | { |
1928 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1928 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1929 | struct flex_groups *new_groups; | 1929 | struct flex_groups *new_groups; |
1930 | int size; | 1930 | int size; |
1931 | 1931 | ||
1932 | if (!sbi->s_log_groups_per_flex) | 1932 | if (!sbi->s_log_groups_per_flex) |
1933 | return 0; | 1933 | return 0; |
1934 | 1934 | ||
1935 | size = ext4_flex_group(sbi, ngroup - 1) + 1; | 1935 | size = ext4_flex_group(sbi, ngroup - 1) + 1; |
1936 | if (size <= sbi->s_flex_groups_allocated) | 1936 | if (size <= sbi->s_flex_groups_allocated) |
1937 | return 0; | 1937 | return 0; |
1938 | 1938 | ||
1939 | size = roundup_pow_of_two(size * sizeof(struct flex_groups)); | 1939 | size = roundup_pow_of_two(size * sizeof(struct flex_groups)); |
1940 | new_groups = ext4_kvzalloc(size, GFP_KERNEL); | 1940 | new_groups = ext4_kvzalloc(size, GFP_KERNEL); |
1941 | if (!new_groups) { | 1941 | if (!new_groups) { |
1942 | ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", | 1942 | ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", |
1943 | size / (int) sizeof(struct flex_groups)); | 1943 | size / (int) sizeof(struct flex_groups)); |
1944 | return -ENOMEM; | 1944 | return -ENOMEM; |
1945 | } | 1945 | } |
1946 | 1946 | ||
1947 | if (sbi->s_flex_groups) { | 1947 | if (sbi->s_flex_groups) { |
1948 | memcpy(new_groups, sbi->s_flex_groups, | 1948 | memcpy(new_groups, sbi->s_flex_groups, |
1949 | (sbi->s_flex_groups_allocated * | 1949 | (sbi->s_flex_groups_allocated * |
1950 | sizeof(struct flex_groups))); | 1950 | sizeof(struct flex_groups))); |
1951 | kvfree(sbi->s_flex_groups); | 1951 | kvfree(sbi->s_flex_groups); |
1952 | } | 1952 | } |
1953 | sbi->s_flex_groups = new_groups; | 1953 | sbi->s_flex_groups = new_groups; |
1954 | sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); | 1954 | sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); |
1955 | return 0; | 1955 | return 0; |
1956 | } | 1956 | } |
1957 | 1957 | ||
1958 | static int ext4_fill_flex_info(struct super_block *sb) | 1958 | static int ext4_fill_flex_info(struct super_block *sb) |
1959 | { | 1959 | { |
1960 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1960 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1961 | struct ext4_group_desc *gdp = NULL; | 1961 | struct ext4_group_desc *gdp = NULL; |
1962 | ext4_group_t flex_group; | 1962 | ext4_group_t flex_group; |
1963 | int i, err; | 1963 | int i, err; |
1964 | 1964 | ||
1965 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1965 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
1966 | if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { | 1966 | if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { |
1967 | sbi->s_log_groups_per_flex = 0; | 1967 | sbi->s_log_groups_per_flex = 0; |
1968 | return 1; | 1968 | return 1; |
1969 | } | 1969 | } |
1970 | 1970 | ||
1971 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); | 1971 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); |
1972 | if (err) | 1972 | if (err) |
1973 | goto failed; | 1973 | goto failed; |
1974 | 1974 | ||
1975 | for (i = 0; i < sbi->s_groups_count; i++) { | 1975 | for (i = 0; i < sbi->s_groups_count; i++) { |
1976 | gdp = ext4_get_group_desc(sb, i, NULL); | 1976 | gdp = ext4_get_group_desc(sb, i, NULL); |
1977 | 1977 | ||
1978 | flex_group = ext4_flex_group(sbi, i); | 1978 | flex_group = ext4_flex_group(sbi, i); |
1979 | atomic_add(ext4_free_inodes_count(sb, gdp), | 1979 | atomic_add(ext4_free_inodes_count(sb, gdp), |
1980 | &sbi->s_flex_groups[flex_group].free_inodes); | 1980 | &sbi->s_flex_groups[flex_group].free_inodes); |
1981 | atomic64_add(ext4_free_group_clusters(sb, gdp), | 1981 | atomic64_add(ext4_free_group_clusters(sb, gdp), |
1982 | &sbi->s_flex_groups[flex_group].free_clusters); | 1982 | &sbi->s_flex_groups[flex_group].free_clusters); |
1983 | atomic_add(ext4_used_dirs_count(sb, gdp), | 1983 | atomic_add(ext4_used_dirs_count(sb, gdp), |
1984 | &sbi->s_flex_groups[flex_group].used_dirs); | 1984 | &sbi->s_flex_groups[flex_group].used_dirs); |
1985 | } | 1985 | } |
1986 | 1986 | ||
1987 | return 1; | 1987 | return 1; |
1988 | failed: | 1988 | failed: |
1989 | return 0; | 1989 | return 0; |
1990 | } | 1990 | } |
1991 | 1991 | ||
1992 | static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, | 1992 | static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, |
1993 | struct ext4_group_desc *gdp) | 1993 | struct ext4_group_desc *gdp) |
1994 | { | 1994 | { |
1995 | int offset; | 1995 | int offset; |
1996 | __u16 crc = 0; | 1996 | __u16 crc = 0; |
1997 | __le32 le_group = cpu_to_le32(block_group); | 1997 | __le32 le_group = cpu_to_le32(block_group); |
1998 | 1998 | ||
1999 | if (ext4_has_metadata_csum(sbi->s_sb)) { | 1999 | if (ext4_has_metadata_csum(sbi->s_sb)) { |
2000 | /* Use new metadata_csum algorithm */ | 2000 | /* Use new metadata_csum algorithm */ |
2001 | __le16 save_csum; | 2001 | __le16 save_csum; |
2002 | __u32 csum32; | 2002 | __u32 csum32; |
2003 | 2003 | ||
2004 | save_csum = gdp->bg_checksum; | 2004 | save_csum = gdp->bg_checksum; |
2005 | gdp->bg_checksum = 0; | 2005 | gdp->bg_checksum = 0; |
2006 | csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, | 2006 | csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, |
2007 | sizeof(le_group)); | 2007 | sizeof(le_group)); |
2008 | csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, | 2008 | csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, |
2009 | sbi->s_desc_size); | 2009 | sbi->s_desc_size); |
2010 | gdp->bg_checksum = save_csum; | 2010 | gdp->bg_checksum = save_csum; |
2011 | 2011 | ||
2012 | crc = csum32 & 0xFFFF; | 2012 | crc = csum32 & 0xFFFF; |
2013 | goto out; | 2013 | goto out; |
2014 | } | 2014 | } |
2015 | 2015 | ||
2016 | /* old crc16 code */ | 2016 | /* old crc16 code */ |
2017 | if (!(sbi->s_es->s_feature_ro_compat & | 2017 | if (!(sbi->s_es->s_feature_ro_compat & |
2018 | cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) | 2018 | cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) |
2019 | return 0; | 2019 | return 0; |
2020 | 2020 | ||
2021 | offset = offsetof(struct ext4_group_desc, bg_checksum); | 2021 | offset = offsetof(struct ext4_group_desc, bg_checksum); |
2022 | 2022 | ||
2023 | crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); | 2023 | crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); |
2024 | crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); | 2024 | crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); |
2025 | crc = crc16(crc, (__u8 *)gdp, offset); | 2025 | crc = crc16(crc, (__u8 *)gdp, offset); |
2026 | offset += sizeof(gdp->bg_checksum); /* skip checksum */ | 2026 | offset += sizeof(gdp->bg_checksum); /* skip checksum */ |
2027 | /* for checksum of struct ext4_group_desc do the rest...*/ | 2027 | /* for checksum of struct ext4_group_desc do the rest...*/ |
2028 | if ((sbi->s_es->s_feature_incompat & | 2028 | if ((sbi->s_es->s_feature_incompat & |
2029 | cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && | 2029 | cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && |
2030 | offset < le16_to_cpu(sbi->s_es->s_desc_size)) | 2030 | offset < le16_to_cpu(sbi->s_es->s_desc_size)) |
2031 | crc = crc16(crc, (__u8 *)gdp + offset, | 2031 | crc = crc16(crc, (__u8 *)gdp + offset, |
2032 | le16_to_cpu(sbi->s_es->s_desc_size) - | 2032 | le16_to_cpu(sbi->s_es->s_desc_size) - |
2033 | offset); | 2033 | offset); |
2034 | 2034 | ||
2035 | out: | 2035 | out: |
2036 | return cpu_to_le16(crc); | 2036 | return cpu_to_le16(crc); |
2037 | } | 2037 | } |
2038 | 2038 | ||
2039 | int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, | 2039 | int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, |
2040 | struct ext4_group_desc *gdp) | 2040 | struct ext4_group_desc *gdp) |
2041 | { | 2041 | { |
2042 | if (ext4_has_group_desc_csum(sb) && | 2042 | if (ext4_has_group_desc_csum(sb) && |
2043 | (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), | 2043 | (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), |
2044 | block_group, gdp))) | 2044 | block_group, gdp))) |
2045 | return 0; | 2045 | return 0; |
2046 | 2046 | ||
2047 | return 1; | 2047 | return 1; |
2048 | } | 2048 | } |
2049 | 2049 | ||
2050 | void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, | 2050 | void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, |
2051 | struct ext4_group_desc *gdp) | 2051 | struct ext4_group_desc *gdp) |
2052 | { | 2052 | { |
2053 | if (!ext4_has_group_desc_csum(sb)) | 2053 | if (!ext4_has_group_desc_csum(sb)) |
2054 | return; | 2054 | return; |
2055 | gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); | 2055 | gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); |
2056 | } | 2056 | } |
2057 | 2057 | ||
2058 | /* Called at mount-time, super-block is locked */ | 2058 | /* Called at mount-time, super-block is locked */ |
2059 | static int ext4_check_descriptors(struct super_block *sb, | 2059 | static int ext4_check_descriptors(struct super_block *sb, |
2060 | ext4_group_t *first_not_zeroed) | 2060 | ext4_group_t *first_not_zeroed) |
2061 | { | 2061 | { |
2062 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2062 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2063 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); | 2063 | ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); |
2064 | ext4_fsblk_t last_block; | 2064 | ext4_fsblk_t last_block; |
2065 | ext4_fsblk_t block_bitmap; | 2065 | ext4_fsblk_t block_bitmap; |
2066 | ext4_fsblk_t inode_bitmap; | 2066 | ext4_fsblk_t inode_bitmap; |
2067 | ext4_fsblk_t inode_table; | 2067 | ext4_fsblk_t inode_table; |
2068 | int flexbg_flag = 0; | 2068 | int flexbg_flag = 0; |
2069 | ext4_group_t i, grp = sbi->s_groups_count; | 2069 | ext4_group_t i, grp = sbi->s_groups_count; |
2070 | 2070 | ||
2071 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 2071 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
2072 | flexbg_flag = 1; | 2072 | flexbg_flag = 1; |
2073 | 2073 | ||
2074 | ext4_debug("Checking group descriptors"); | 2074 | ext4_debug("Checking group descriptors"); |
2075 | 2075 | ||
2076 | for (i = 0; i < sbi->s_groups_count; i++) { | 2076 | for (i = 0; i < sbi->s_groups_count; i++) { |
2077 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); | 2077 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
2078 | 2078 | ||
2079 | if (i == sbi->s_groups_count - 1 || flexbg_flag) | 2079 | if (i == sbi->s_groups_count - 1 || flexbg_flag) |
2080 | last_block = ext4_blocks_count(sbi->s_es) - 1; | 2080 | last_block = ext4_blocks_count(sbi->s_es) - 1; |
2081 | else | 2081 | else |
2082 | last_block = first_block + | 2082 | last_block = first_block + |
2083 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); | 2083 | (EXT4_BLOCKS_PER_GROUP(sb) - 1); |
2084 | 2084 | ||
2085 | if ((grp == sbi->s_groups_count) && | 2085 | if ((grp == sbi->s_groups_count) && |
2086 | !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | 2086 | !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) |
2087 | grp = i; | 2087 | grp = i; |
2088 | 2088 | ||
2089 | block_bitmap = ext4_block_bitmap(sb, gdp); | 2089 | block_bitmap = ext4_block_bitmap(sb, gdp); |
2090 | if (block_bitmap < first_block || block_bitmap > last_block) { | 2090 | if (block_bitmap < first_block || block_bitmap > last_block) { |
2091 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2091 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
2092 | "Block bitmap for group %u not in group " | 2092 | "Block bitmap for group %u not in group " |
2093 | "(block %llu)!", i, block_bitmap); | 2093 | "(block %llu)!", i, block_bitmap); |
2094 | return 0; | 2094 | return 0; |
2095 | } | 2095 | } |
2096 | inode_bitmap = ext4_inode_bitmap(sb, gdp); | 2096 | inode_bitmap = ext4_inode_bitmap(sb, gdp); |
2097 | if (inode_bitmap < first_block || inode_bitmap > last_block) { | 2097 | if (inode_bitmap < first_block || inode_bitmap > last_block) { |
2098 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2098 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
2099 | "Inode bitmap for group %u not in group " | 2099 | "Inode bitmap for group %u not in group " |
2100 | "(block %llu)!", i, inode_bitmap); | 2100 | "(block %llu)!", i, inode_bitmap); |
2101 | return 0; | 2101 | return 0; |
2102 | } | 2102 | } |
2103 | inode_table = ext4_inode_table(sb, gdp); | 2103 | inode_table = ext4_inode_table(sb, gdp); |
2104 | if (inode_table < first_block || | 2104 | if (inode_table < first_block || |
2105 | inode_table + sbi->s_itb_per_group - 1 > last_block) { | 2105 | inode_table + sbi->s_itb_per_group - 1 > last_block) { |
2106 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2106 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
2107 | "Inode table for group %u not in group " | 2107 | "Inode table for group %u not in group " |
2108 | "(block %llu)!", i, inode_table); | 2108 | "(block %llu)!", i, inode_table); |
2109 | return 0; | 2109 | return 0; |
2110 | } | 2110 | } |
2111 | ext4_lock_group(sb, i); | 2111 | ext4_lock_group(sb, i); |
2112 | if (!ext4_group_desc_csum_verify(sb, i, gdp)) { | 2112 | if (!ext4_group_desc_csum_verify(sb, i, gdp)) { |
2113 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2113 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
2114 | "Checksum for group %u failed (%u!=%u)", | 2114 | "Checksum for group %u failed (%u!=%u)", |
2115 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, | 2115 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
2116 | gdp)), le16_to_cpu(gdp->bg_checksum)); | 2116 | gdp)), le16_to_cpu(gdp->bg_checksum)); |
2117 | if (!(sb->s_flags & MS_RDONLY)) { | 2117 | if (!(sb->s_flags & MS_RDONLY)) { |
2118 | ext4_unlock_group(sb, i); | 2118 | ext4_unlock_group(sb, i); |
2119 | return 0; | 2119 | return 0; |
2120 | } | 2120 | } |
2121 | } | 2121 | } |
2122 | ext4_unlock_group(sb, i); | 2122 | ext4_unlock_group(sb, i); |
2123 | if (!flexbg_flag) | 2123 | if (!flexbg_flag) |
2124 | first_block += EXT4_BLOCKS_PER_GROUP(sb); | 2124 | first_block += EXT4_BLOCKS_PER_GROUP(sb); |
2125 | } | 2125 | } |
2126 | if (NULL != first_not_zeroed) | 2126 | if (NULL != first_not_zeroed) |
2127 | *first_not_zeroed = grp; | 2127 | *first_not_zeroed = grp; |
2128 | return 1; | 2128 | return 1; |
2129 | } | 2129 | } |
2130 | 2130 | ||
2131 | /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at | 2131 | /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at |
2132 | * the superblock) which were deleted from all directories, but held open by | 2132 | * the superblock) which were deleted from all directories, but held open by |
2133 | * a process at the time of a crash. We walk the list and try to delete these | 2133 | * a process at the time of a crash. We walk the list and try to delete these |
2134 | * inodes at recovery time (only with a read-write filesystem). | 2134 | * inodes at recovery time (only with a read-write filesystem). |
2135 | * | 2135 | * |
2136 | * In order to keep the orphan inode chain consistent during traversal (in | 2136 | * In order to keep the orphan inode chain consistent during traversal (in |
2137 | * case of crash during recovery), we link each inode into the superblock | 2137 | * case of crash during recovery), we link each inode into the superblock |
2138 | * orphan list_head and handle it the same way as an inode deletion during | 2138 | * orphan list_head and handle it the same way as an inode deletion during |
2139 | * normal operation (which journals the operations for us). | 2139 | * normal operation (which journals the operations for us). |
2140 | * | 2140 | * |
2141 | * We only do an iget() and an iput() on each inode, which is very safe if we | 2141 | * We only do an iget() and an iput() on each inode, which is very safe if we |
2142 | * accidentally point at an in-use or already deleted inode. The worst that | 2142 | * accidentally point at an in-use or already deleted inode. The worst that |
2143 | * can happen in this case is that we get a "bit already cleared" message from | 2143 | * can happen in this case is that we get a "bit already cleared" message from |
2144 | * ext4_free_inode(). The only reason we would point at a wrong inode is if | 2144 | * ext4_free_inode(). The only reason we would point at a wrong inode is if |
2145 | * e2fsck was run on this filesystem, and it must have already done the orphan | 2145 | * e2fsck was run on this filesystem, and it must have already done the orphan |
2146 | * inode cleanup for us, so we can safely abort without any further action. | 2146 | * inode cleanup for us, so we can safely abort without any further action. |
2147 | */ | 2147 | */ |
2148 | static void ext4_orphan_cleanup(struct super_block *sb, | 2148 | static void ext4_orphan_cleanup(struct super_block *sb, |
2149 | struct ext4_super_block *es) | 2149 | struct ext4_super_block *es) |
2150 | { | 2150 | { |
2151 | unsigned int s_flags = sb->s_flags; | 2151 | unsigned int s_flags = sb->s_flags; |
2152 | int nr_orphans = 0, nr_truncates = 0; | 2152 | int nr_orphans = 0, nr_truncates = 0; |
2153 | #ifdef CONFIG_QUOTA | 2153 | #ifdef CONFIG_QUOTA |
2154 | int i; | 2154 | int i; |
2155 | #endif | 2155 | #endif |
2156 | if (!es->s_last_orphan) { | 2156 | if (!es->s_last_orphan) { |
2157 | jbd_debug(4, "no orphan inodes to clean up\n"); | 2157 | jbd_debug(4, "no orphan inodes to clean up\n"); |
2158 | return; | 2158 | return; |
2159 | } | 2159 | } |
2160 | 2160 | ||
2161 | if (bdev_read_only(sb->s_bdev)) { | 2161 | if (bdev_read_only(sb->s_bdev)) { |
2162 | ext4_msg(sb, KERN_ERR, "write access " | 2162 | ext4_msg(sb, KERN_ERR, "write access " |
2163 | "unavailable, skipping orphan cleanup"); | 2163 | "unavailable, skipping orphan cleanup"); |
2164 | return; | 2164 | return; |
2165 | } | 2165 | } |
2166 | 2166 | ||
2167 | /* Check if feature set would not allow a r/w mount */ | 2167 | /* Check if feature set would not allow a r/w mount */ |
2168 | if (!ext4_feature_set_ok(sb, 0)) { | 2168 | if (!ext4_feature_set_ok(sb, 0)) { |
2169 | ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " | 2169 | ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " |
2170 | "unknown ROCOMPAT features"); | 2170 | "unknown ROCOMPAT features"); |
2171 | return; | 2171 | return; |
2172 | } | 2172 | } |
2173 | 2173 | ||
2174 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 2174 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
2175 | /* don't clear list on RO mount w/ errors */ | 2175 | /* don't clear list on RO mount w/ errors */ |
2176 | if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { | 2176 | if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { |
2177 | ext4_msg(sb, KERN_INFO, "Errors on filesystem, " | 2177 | ext4_msg(sb, KERN_INFO, "Errors on filesystem, " |
2178 | "clearing orphan list.\n"); | 2178 | "clearing orphan list.\n"); |
2179 | es->s_last_orphan = 0; | 2179 | es->s_last_orphan = 0; |
2180 | } | 2180 | } |
2181 | jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); | 2181 | jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); |
2182 | return; | 2182 | return; |
2183 | } | 2183 | } |
2184 | 2184 | ||
2185 | if (s_flags & MS_RDONLY) { | 2185 | if (s_flags & MS_RDONLY) { |
2186 | ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); | 2186 | ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); |
2187 | sb->s_flags &= ~MS_RDONLY; | 2187 | sb->s_flags &= ~MS_RDONLY; |
2188 | } | 2188 | } |
2189 | #ifdef CONFIG_QUOTA | 2189 | #ifdef CONFIG_QUOTA |
2190 | /* Needed for iput() to work correctly and not trash data */ | 2190 | /* Needed for iput() to work correctly and not trash data */ |
2191 | sb->s_flags |= MS_ACTIVE; | 2191 | sb->s_flags |= MS_ACTIVE; |
2192 | /* Turn on quotas so that they are updated correctly */ | 2192 | /* Turn on quotas so that they are updated correctly */ |
2193 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { | 2193 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { |
2194 | if (EXT4_SB(sb)->s_qf_names[i]) { | 2194 | if (EXT4_SB(sb)->s_qf_names[i]) { |
2195 | int ret = ext4_quota_on_mount(sb, i); | 2195 | int ret = ext4_quota_on_mount(sb, i); |
2196 | if (ret < 0) | 2196 | if (ret < 0) |
2197 | ext4_msg(sb, KERN_ERR, | 2197 | ext4_msg(sb, KERN_ERR, |
2198 | "Cannot turn on journaled " | 2198 | "Cannot turn on journaled " |
2199 | "quota: error %d", ret); | 2199 | "quota: error %d", ret); |
2200 | } | 2200 | } |
2201 | } | 2201 | } |
2202 | #endif | 2202 | #endif |
2203 | 2203 | ||
2204 | while (es->s_last_orphan) { | 2204 | while (es->s_last_orphan) { |
2205 | struct inode *inode; | 2205 | struct inode *inode; |
2206 | 2206 | ||
2207 | inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); | 2207 | inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); |
2208 | if (IS_ERR(inode)) { | 2208 | if (IS_ERR(inode)) { |
2209 | es->s_last_orphan = 0; | 2209 | es->s_last_orphan = 0; |
2210 | break; | 2210 | break; |
2211 | } | 2211 | } |
2212 | 2212 | ||
2213 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); | 2213 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); |
2214 | dquot_initialize(inode); | 2214 | dquot_initialize(inode); |
2215 | if (inode->i_nlink) { | 2215 | if (inode->i_nlink) { |
2216 | if (test_opt(sb, DEBUG)) | 2216 | if (test_opt(sb, DEBUG)) |
2217 | ext4_msg(sb, KERN_DEBUG, | 2217 | ext4_msg(sb, KERN_DEBUG, |
2218 | "%s: truncating inode %lu to %lld bytes", | 2218 | "%s: truncating inode %lu to %lld bytes", |
2219 | __func__, inode->i_ino, inode->i_size); | 2219 | __func__, inode->i_ino, inode->i_size); |
2220 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", | 2220 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
2221 | inode->i_ino, inode->i_size); | 2221 | inode->i_ino, inode->i_size); |
2222 | mutex_lock(&inode->i_mutex); | 2222 | mutex_lock(&inode->i_mutex); |
2223 | truncate_inode_pages(inode->i_mapping, inode->i_size); | 2223 | truncate_inode_pages(inode->i_mapping, inode->i_size); |
2224 | ext4_truncate(inode); | 2224 | ext4_truncate(inode); |
2225 | mutex_unlock(&inode->i_mutex); | 2225 | mutex_unlock(&inode->i_mutex); |
2226 | nr_truncates++; | 2226 | nr_truncates++; |
2227 | } else { | 2227 | } else { |
2228 | if (test_opt(sb, DEBUG)) | 2228 | if (test_opt(sb, DEBUG)) |
2229 | ext4_msg(sb, KERN_DEBUG, | 2229 | ext4_msg(sb, KERN_DEBUG, |
2230 | "%s: deleting unreferenced inode %lu", | 2230 | "%s: deleting unreferenced inode %lu", |
2231 | __func__, inode->i_ino); | 2231 | __func__, inode->i_ino); |
2232 | jbd_debug(2, "deleting unreferenced inode %lu\n", | 2232 | jbd_debug(2, "deleting unreferenced inode %lu\n", |
2233 | inode->i_ino); | 2233 | inode->i_ino); |
2234 | nr_orphans++; | 2234 | nr_orphans++; |
2235 | } | 2235 | } |
2236 | iput(inode); /* The delete magic happens here! */ | 2236 | iput(inode); /* The delete magic happens here! */ |
2237 | } | 2237 | } |
2238 | 2238 | ||
2239 | #define PLURAL(x) (x), ((x) == 1) ? "" : "s" | 2239 | #define PLURAL(x) (x), ((x) == 1) ? "" : "s" |
2240 | 2240 | ||
2241 | if (nr_orphans) | 2241 | if (nr_orphans) |
2242 | ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", | 2242 | ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", |
2243 | PLURAL(nr_orphans)); | 2243 | PLURAL(nr_orphans)); |
2244 | if (nr_truncates) | 2244 | if (nr_truncates) |
2245 | ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", | 2245 | ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", |
2246 | PLURAL(nr_truncates)); | 2246 | PLURAL(nr_truncates)); |
2247 | #ifdef CONFIG_QUOTA | 2247 | #ifdef CONFIG_QUOTA |
2248 | /* Turn quotas off */ | 2248 | /* Turn quotas off */ |
2249 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { | 2249 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { |
2250 | if (sb_dqopt(sb)->files[i]) | 2250 | if (sb_dqopt(sb)->files[i]) |
2251 | dquot_quota_off(sb, i); | 2251 | dquot_quota_off(sb, i); |
2252 | } | 2252 | } |
2253 | #endif | 2253 | #endif |
2254 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ | 2254 | sb->s_flags = s_flags; /* Restore MS_RDONLY status */ |
2255 | } | 2255 | } |
2256 | 2256 | ||
2257 | /* | 2257 | /* |
2258 | * Maximal extent format file size. | 2258 | * Maximal extent format file size. |
2259 | * Resulting logical blkno at s_maxbytes must fit in our on-disk | 2259 | * Resulting logical blkno at s_maxbytes must fit in our on-disk |
2260 | * extent format containers, within a sector_t, and within i_blocks | 2260 | * extent format containers, within a sector_t, and within i_blocks |
2261 | * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, | 2261 | * in the vfs. ext4 inode has 48 bits of i_block in fsblock units, |
2262 | * so that won't be a limiting factor. | 2262 | * so that won't be a limiting factor. |
2263 | * | 2263 | * |
2264 | * However there is other limiting factor. We do store extents in the form | 2264 | * However there is other limiting factor. We do store extents in the form |
2265 | * of starting block and length, hence the resulting length of the extent | 2265 | * of starting block and length, hence the resulting length of the extent |
2266 | * covering maximum file size must fit into on-disk format containers as | 2266 | * covering maximum file size must fit into on-disk format containers as |
2267 | * well. Given that length is always by 1 unit bigger than max unit (because | 2267 | * well. Given that length is always by 1 unit bigger than max unit (because |
2268 | * we count 0 as well) we have to lower the s_maxbytes by one fs block. | 2268 | * we count 0 as well) we have to lower the s_maxbytes by one fs block. |
2269 | * | 2269 | * |
2270 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. | 2270 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. |
2271 | */ | 2271 | */ |
2272 | static loff_t ext4_max_size(int blkbits, int has_huge_files) | 2272 | static loff_t ext4_max_size(int blkbits, int has_huge_files) |
2273 | { | 2273 | { |
2274 | loff_t res; | 2274 | loff_t res; |
2275 | loff_t upper_limit = MAX_LFS_FILESIZE; | 2275 | loff_t upper_limit = MAX_LFS_FILESIZE; |
2276 | 2276 | ||
2277 | /* small i_blocks in vfs inode? */ | 2277 | /* small i_blocks in vfs inode? */ |
2278 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { | 2278 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
2279 | /* | 2279 | /* |
2280 | * CONFIG_LBDAF is not enabled implies the inode | 2280 | * CONFIG_LBDAF is not enabled implies the inode |
2281 | * i_block represent total blocks in 512 bytes | 2281 | * i_block represent total blocks in 512 bytes |
2282 | * 32 == size of vfs inode i_blocks * 8 | 2282 | * 32 == size of vfs inode i_blocks * 8 |
2283 | */ | 2283 | */ |
2284 | upper_limit = (1LL << 32) - 1; | 2284 | upper_limit = (1LL << 32) - 1; |
2285 | 2285 | ||
2286 | /* total blocks in file system block size */ | 2286 | /* total blocks in file system block size */ |
2287 | upper_limit >>= (blkbits - 9); | 2287 | upper_limit >>= (blkbits - 9); |
2288 | upper_limit <<= blkbits; | 2288 | upper_limit <<= blkbits; |
2289 | } | 2289 | } |
2290 | 2290 | ||
2291 | /* | 2291 | /* |
2292 | * 32-bit extent-start container, ee_block. We lower the maxbytes | 2292 | * 32-bit extent-start container, ee_block. We lower the maxbytes |
2293 | * by one fs block, so ee_len can cover the extent of maximum file | 2293 | * by one fs block, so ee_len can cover the extent of maximum file |
2294 | * size | 2294 | * size |
2295 | */ | 2295 | */ |
2296 | res = (1LL << 32) - 1; | 2296 | res = (1LL << 32) - 1; |
2297 | res <<= blkbits; | 2297 | res <<= blkbits; |
2298 | 2298 | ||
2299 | /* Sanity check against vm- & vfs- imposed limits */ | 2299 | /* Sanity check against vm- & vfs- imposed limits */ |
2300 | if (res > upper_limit) | 2300 | if (res > upper_limit) |
2301 | res = upper_limit; | 2301 | res = upper_limit; |
2302 | 2302 | ||
2303 | return res; | 2303 | return res; |
2304 | } | 2304 | } |
2305 | 2305 | ||
2306 | /* | 2306 | /* |
2307 | * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect | 2307 | * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect |
2308 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. | 2308 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. |
2309 | * We need to be 1 filesystem block less than the 2^48 sector limit. | 2309 | * We need to be 1 filesystem block less than the 2^48 sector limit. |
2310 | */ | 2310 | */ |
2311 | static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) | 2311 | static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) |
2312 | { | 2312 | { |
2313 | loff_t res = EXT4_NDIR_BLOCKS; | 2313 | loff_t res = EXT4_NDIR_BLOCKS; |
2314 | int meta_blocks; | 2314 | int meta_blocks; |
2315 | loff_t upper_limit; | 2315 | loff_t upper_limit; |
2316 | /* This is calculated to be the largest file size for a dense, block | 2316 | /* This is calculated to be the largest file size for a dense, block |
2317 | * mapped file such that the file's total number of 512-byte sectors, | 2317 | * mapped file such that the file's total number of 512-byte sectors, |
2318 | * including data and all indirect blocks, does not exceed (2^48 - 1). | 2318 | * including data and all indirect blocks, does not exceed (2^48 - 1). |
2319 | * | 2319 | * |
2320 | * __u32 i_blocks_lo and _u16 i_blocks_high represent the total | 2320 | * __u32 i_blocks_lo and _u16 i_blocks_high represent the total |
2321 | * number of 512-byte sectors of the file. | 2321 | * number of 512-byte sectors of the file. |
2322 | */ | 2322 | */ |
2323 | 2323 | ||
2324 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { | 2324 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
2325 | /* | 2325 | /* |
2326 | * !has_huge_files or CONFIG_LBDAF not enabled implies that | 2326 | * !has_huge_files or CONFIG_LBDAF not enabled implies that |
2327 | * the inode i_block field represents total file blocks in | 2327 | * the inode i_block field represents total file blocks in |
2328 | * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 | 2328 | * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 |
2329 | */ | 2329 | */ |
2330 | upper_limit = (1LL << 32) - 1; | 2330 | upper_limit = (1LL << 32) - 1; |
2331 | 2331 | ||
2332 | /* total blocks in file system block size */ | 2332 | /* total blocks in file system block size */ |
2333 | upper_limit >>= (bits - 9); | 2333 | upper_limit >>= (bits - 9); |
2334 | 2334 | ||
2335 | } else { | 2335 | } else { |
2336 | /* | 2336 | /* |
2337 | * We use 48 bit ext4_inode i_blocks | 2337 | * We use 48 bit ext4_inode i_blocks |
2338 | * With EXT4_HUGE_FILE_FL set the i_blocks | 2338 | * With EXT4_HUGE_FILE_FL set the i_blocks |
2339 | * represent total number of blocks in | 2339 | * represent total number of blocks in |
2340 | * file system block size | 2340 | * file system block size |
2341 | */ | 2341 | */ |
2342 | upper_limit = (1LL << 48) - 1; | 2342 | upper_limit = (1LL << 48) - 1; |
2343 | 2343 | ||
2344 | } | 2344 | } |
2345 | 2345 | ||
2346 | /* indirect blocks */ | 2346 | /* indirect blocks */ |
2347 | meta_blocks = 1; | 2347 | meta_blocks = 1; |
2348 | /* double indirect blocks */ | 2348 | /* double indirect blocks */ |
2349 | meta_blocks += 1 + (1LL << (bits-2)); | 2349 | meta_blocks += 1 + (1LL << (bits-2)); |
2350 | /* tripple indirect blocks */ | 2350 | /* tripple indirect blocks */ |
2351 | meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); | 2351 | meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); |
2352 | 2352 | ||
2353 | upper_limit -= meta_blocks; | 2353 | upper_limit -= meta_blocks; |
2354 | upper_limit <<= bits; | 2354 | upper_limit <<= bits; |
2355 | 2355 | ||
2356 | res += 1LL << (bits-2); | 2356 | res += 1LL << (bits-2); |
2357 | res += 1LL << (2*(bits-2)); | 2357 | res += 1LL << (2*(bits-2)); |
2358 | res += 1LL << (3*(bits-2)); | 2358 | res += 1LL << (3*(bits-2)); |
2359 | res <<= bits; | 2359 | res <<= bits; |
2360 | if (res > upper_limit) | 2360 | if (res > upper_limit) |
2361 | res = upper_limit; | 2361 | res = upper_limit; |
2362 | 2362 | ||
2363 | if (res > MAX_LFS_FILESIZE) | 2363 | if (res > MAX_LFS_FILESIZE) |
2364 | res = MAX_LFS_FILESIZE; | 2364 | res = MAX_LFS_FILESIZE; |
2365 | 2365 | ||
2366 | return res; | 2366 | return res; |
2367 | } | 2367 | } |
2368 | 2368 | ||
2369 | static ext4_fsblk_t descriptor_loc(struct super_block *sb, | 2369 | static ext4_fsblk_t descriptor_loc(struct super_block *sb, |
2370 | ext4_fsblk_t logical_sb_block, int nr) | 2370 | ext4_fsblk_t logical_sb_block, int nr) |
2371 | { | 2371 | { |
2372 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2372 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2373 | ext4_group_t bg, first_meta_bg; | 2373 | ext4_group_t bg, first_meta_bg; |
2374 | int has_super = 0; | 2374 | int has_super = 0; |
2375 | 2375 | ||
2376 | first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); | 2376 | first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); |
2377 | 2377 | ||
2378 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || | 2378 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || |
2379 | nr < first_meta_bg) | 2379 | nr < first_meta_bg) |
2380 | return logical_sb_block + nr + 1; | 2380 | return logical_sb_block + nr + 1; |
2381 | bg = sbi->s_desc_per_block * nr; | 2381 | bg = sbi->s_desc_per_block * nr; |
2382 | if (ext4_bg_has_super(sb, bg)) | 2382 | if (ext4_bg_has_super(sb, bg)) |
2383 | has_super = 1; | 2383 | has_super = 1; |
2384 | 2384 | ||
2385 | /* | 2385 | /* |
2386 | * If we have a meta_bg fs with 1k blocks, group 0's GDT is at | 2386 | * If we have a meta_bg fs with 1k blocks, group 0's GDT is at |
2387 | * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled | 2387 | * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled |
2388 | * on modern mke2fs or blksize > 1k on older mke2fs) then we must | 2388 | * on modern mke2fs or blksize > 1k on older mke2fs) then we must |
2389 | * compensate. | 2389 | * compensate. |
2390 | */ | 2390 | */ |
2391 | if (sb->s_blocksize == 1024 && nr == 0 && | 2391 | if (sb->s_blocksize == 1024 && nr == 0 && |
2392 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) | 2392 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) |
2393 | has_super++; | 2393 | has_super++; |
2394 | 2394 | ||
2395 | return (has_super + ext4_group_first_block_no(sb, bg)); | 2395 | return (has_super + ext4_group_first_block_no(sb, bg)); |
2396 | } | 2396 | } |
2397 | 2397 | ||
2398 | /** | 2398 | /** |
2399 | * ext4_get_stripe_size: Get the stripe size. | 2399 | * ext4_get_stripe_size: Get the stripe size. |
2400 | * @sbi: In memory super block info | 2400 | * @sbi: In memory super block info |
2401 | * | 2401 | * |
2402 | * If we have specified it via mount option, then | 2402 | * If we have specified it via mount option, then |
2403 | * use the mount option value. If the value specified at mount time is | 2403 | * use the mount option value. If the value specified at mount time is |
2404 | * greater than the blocks per group use the super block value. | 2404 | * greater than the blocks per group use the super block value. |
2405 | * If the super block value is greater than blocks per group return 0. | 2405 | * If the super block value is greater than blocks per group return 0. |
2406 | * Allocator needs it be less than blocks per group. | 2406 | * Allocator needs it be less than blocks per group. |
2407 | * | 2407 | * |
2408 | */ | 2408 | */ |
2409 | static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) | 2409 | static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) |
2410 | { | 2410 | { |
2411 | unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); | 2411 | unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); |
2412 | unsigned long stripe_width = | 2412 | unsigned long stripe_width = |
2413 | le32_to_cpu(sbi->s_es->s_raid_stripe_width); | 2413 | le32_to_cpu(sbi->s_es->s_raid_stripe_width); |
2414 | int ret; | 2414 | int ret; |
2415 | 2415 | ||
2416 | if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) | 2416 | if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) |
2417 | ret = sbi->s_stripe; | 2417 | ret = sbi->s_stripe; |
2418 | else if (stripe_width <= sbi->s_blocks_per_group) | 2418 | else if (stripe_width <= sbi->s_blocks_per_group) |
2419 | ret = stripe_width; | 2419 | ret = stripe_width; |
2420 | else if (stride <= sbi->s_blocks_per_group) | 2420 | else if (stride <= sbi->s_blocks_per_group) |
2421 | ret = stride; | 2421 | ret = stride; |
2422 | else | 2422 | else |
2423 | ret = 0; | 2423 | ret = 0; |
2424 | 2424 | ||
2425 | /* | 2425 | /* |
2426 | * If the stripe width is 1, this makes no sense and | 2426 | * If the stripe width is 1, this makes no sense and |
2427 | * we set it to 0 to turn off stripe handling code. | 2427 | * we set it to 0 to turn off stripe handling code. |
2428 | */ | 2428 | */ |
2429 | if (ret <= 1) | 2429 | if (ret <= 1) |
2430 | ret = 0; | 2430 | ret = 0; |
2431 | 2431 | ||
2432 | return ret; | 2432 | return ret; |
2433 | } | 2433 | } |
2434 | 2434 | ||
2435 | /* sysfs supprt */ | 2435 | /* sysfs supprt */ |
2436 | 2436 | ||
2437 | struct ext4_attr { | 2437 | struct ext4_attr { |
2438 | struct attribute attr; | 2438 | struct attribute attr; |
2439 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); | 2439 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); |
2440 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, | 2440 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, |
2441 | const char *, size_t); | 2441 | const char *, size_t); |
2442 | union { | 2442 | union { |
2443 | int offset; | 2443 | int offset; |
2444 | int deprecated_val; | 2444 | int deprecated_val; |
2445 | } u; | 2445 | } u; |
2446 | }; | 2446 | }; |
2447 | 2447 | ||
2448 | static int parse_strtoull(const char *buf, | 2448 | static int parse_strtoull(const char *buf, |
2449 | unsigned long long max, unsigned long long *value) | 2449 | unsigned long long max, unsigned long long *value) |
2450 | { | 2450 | { |
2451 | int ret; | 2451 | int ret; |
2452 | 2452 | ||
2453 | ret = kstrtoull(skip_spaces(buf), 0, value); | 2453 | ret = kstrtoull(skip_spaces(buf), 0, value); |
2454 | if (!ret && *value > max) | 2454 | if (!ret && *value > max) |
2455 | ret = -EINVAL; | 2455 | ret = -EINVAL; |
2456 | return ret; | 2456 | return ret; |
2457 | } | 2457 | } |
2458 | 2458 | ||
2459 | static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, | 2459 | static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, |
2460 | struct ext4_sb_info *sbi, | 2460 | struct ext4_sb_info *sbi, |
2461 | char *buf) | 2461 | char *buf) |
2462 | { | 2462 | { |
2463 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2463 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
2464 | (s64) EXT4_C2B(sbi, | 2464 | (s64) EXT4_C2B(sbi, |
2465 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | 2465 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); |
2466 | } | 2466 | } |
2467 | 2467 | ||
2468 | static ssize_t session_write_kbytes_show(struct ext4_attr *a, | 2468 | static ssize_t session_write_kbytes_show(struct ext4_attr *a, |
2469 | struct ext4_sb_info *sbi, char *buf) | 2469 | struct ext4_sb_info *sbi, char *buf) |
2470 | { | 2470 | { |
2471 | struct super_block *sb = sbi->s_buddy_cache->i_sb; | 2471 | struct super_block *sb = sbi->s_buddy_cache->i_sb; |
2472 | 2472 | ||
2473 | if (!sb->s_bdev->bd_part) | 2473 | if (!sb->s_bdev->bd_part) |
2474 | return snprintf(buf, PAGE_SIZE, "0\n"); | 2474 | return snprintf(buf, PAGE_SIZE, "0\n"); |
2475 | return snprintf(buf, PAGE_SIZE, "%lu\n", | 2475 | return snprintf(buf, PAGE_SIZE, "%lu\n", |
2476 | (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 2476 | (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
2477 | sbi->s_sectors_written_start) >> 1); | 2477 | sbi->s_sectors_written_start) >> 1); |
2478 | } | 2478 | } |
2479 | 2479 | ||
2480 | static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, | 2480 | static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, |
2481 | struct ext4_sb_info *sbi, char *buf) | 2481 | struct ext4_sb_info *sbi, char *buf) |
2482 | { | 2482 | { |
2483 | struct super_block *sb = sbi->s_buddy_cache->i_sb; | 2483 | struct super_block *sb = sbi->s_buddy_cache->i_sb; |
2484 | 2484 | ||
2485 | if (!sb->s_bdev->bd_part) | 2485 | if (!sb->s_bdev->bd_part) |
2486 | return snprintf(buf, PAGE_SIZE, "0\n"); | 2486 | return snprintf(buf, PAGE_SIZE, "0\n"); |
2487 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2487 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
2488 | (unsigned long long)(sbi->s_kbytes_written + | 2488 | (unsigned long long)(sbi->s_kbytes_written + |
2489 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 2489 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
2490 | EXT4_SB(sb)->s_sectors_written_start) >> 1))); | 2490 | EXT4_SB(sb)->s_sectors_written_start) >> 1))); |
2491 | } | 2491 | } |
2492 | 2492 | ||
2493 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | 2493 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, |
2494 | struct ext4_sb_info *sbi, | 2494 | struct ext4_sb_info *sbi, |
2495 | const char *buf, size_t count) | 2495 | const char *buf, size_t count) |
2496 | { | 2496 | { |
2497 | unsigned long t; | 2497 | unsigned long t; |
2498 | int ret; | 2498 | int ret; |
2499 | 2499 | ||
2500 | ret = kstrtoul(skip_spaces(buf), 0, &t); | 2500 | ret = kstrtoul(skip_spaces(buf), 0, &t); |
2501 | if (ret) | 2501 | if (ret) |
2502 | return ret; | 2502 | return ret; |
2503 | 2503 | ||
2504 | if (t && (!is_power_of_2(t) || t > 0x40000000)) | 2504 | if (t && (!is_power_of_2(t) || t > 0x40000000)) |
2505 | return -EINVAL; | 2505 | return -EINVAL; |
2506 | 2506 | ||
2507 | sbi->s_inode_readahead_blks = t; | 2507 | sbi->s_inode_readahead_blks = t; |
2508 | return count; | 2508 | return count; |
2509 | } | 2509 | } |
2510 | 2510 | ||
2511 | static ssize_t sbi_ui_show(struct ext4_attr *a, | 2511 | static ssize_t sbi_ui_show(struct ext4_attr *a, |
2512 | struct ext4_sb_info *sbi, char *buf) | 2512 | struct ext4_sb_info *sbi, char *buf) |
2513 | { | 2513 | { |
2514 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); | 2514 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
2515 | 2515 | ||
2516 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); | 2516 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); |
2517 | } | 2517 | } |
2518 | 2518 | ||
2519 | static ssize_t sbi_ui_store(struct ext4_attr *a, | 2519 | static ssize_t sbi_ui_store(struct ext4_attr *a, |
2520 | struct ext4_sb_info *sbi, | 2520 | struct ext4_sb_info *sbi, |
2521 | const char *buf, size_t count) | 2521 | const char *buf, size_t count) |
2522 | { | 2522 | { |
2523 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); | 2523 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
2524 | unsigned long t; | 2524 | unsigned long t; |
2525 | int ret; | 2525 | int ret; |
2526 | 2526 | ||
2527 | ret = kstrtoul(skip_spaces(buf), 0, &t); | 2527 | ret = kstrtoul(skip_spaces(buf), 0, &t); |
2528 | if (ret) | 2528 | if (ret) |
2529 | return ret; | 2529 | return ret; |
2530 | *ui = t; | 2530 | *ui = t; |
2531 | return count; | 2531 | return count; |
2532 | } | 2532 | } |
2533 | 2533 | ||
2534 | static ssize_t es_ui_show(struct ext4_attr *a, | 2534 | static ssize_t es_ui_show(struct ext4_attr *a, |
2535 | struct ext4_sb_info *sbi, char *buf) | 2535 | struct ext4_sb_info *sbi, char *buf) |
2536 | { | 2536 | { |
2537 | 2537 | ||
2538 | unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + | 2538 | unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + |
2539 | a->u.offset); | 2539 | a->u.offset); |
2540 | 2540 | ||
2541 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); | 2541 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); |
2542 | } | 2542 | } |
2543 | 2543 | ||
2544 | static ssize_t reserved_clusters_show(struct ext4_attr *a, | 2544 | static ssize_t reserved_clusters_show(struct ext4_attr *a, |
2545 | struct ext4_sb_info *sbi, char *buf) | 2545 | struct ext4_sb_info *sbi, char *buf) |
2546 | { | 2546 | { |
2547 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2547 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
2548 | (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); | 2548 | (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); |
2549 | } | 2549 | } |
2550 | 2550 | ||
2551 | static ssize_t reserved_clusters_store(struct ext4_attr *a, | 2551 | static ssize_t reserved_clusters_store(struct ext4_attr *a, |
2552 | struct ext4_sb_info *sbi, | 2552 | struct ext4_sb_info *sbi, |
2553 | const char *buf, size_t count) | 2553 | const char *buf, size_t count) |
2554 | { | 2554 | { |
2555 | unsigned long long val; | 2555 | unsigned long long val; |
2556 | int ret; | 2556 | int ret; |
2557 | 2557 | ||
2558 | if (parse_strtoull(buf, -1ULL, &val)) | 2558 | if (parse_strtoull(buf, -1ULL, &val)) |
2559 | return -EINVAL; | 2559 | return -EINVAL; |
2560 | ret = ext4_reserve_clusters(sbi, val); | 2560 | ret = ext4_reserve_clusters(sbi, val); |
2561 | 2561 | ||
2562 | return ret ? ret : count; | 2562 | return ret ? ret : count; |
2563 | } | 2563 | } |
2564 | 2564 | ||
2565 | static ssize_t trigger_test_error(struct ext4_attr *a, | 2565 | static ssize_t trigger_test_error(struct ext4_attr *a, |
2566 | struct ext4_sb_info *sbi, | 2566 | struct ext4_sb_info *sbi, |
2567 | const char *buf, size_t count) | 2567 | const char *buf, size_t count) |
2568 | { | 2568 | { |
2569 | int len = count; | 2569 | int len = count; |
2570 | 2570 | ||
2571 | if (!capable(CAP_SYS_ADMIN)) | 2571 | if (!capable(CAP_SYS_ADMIN)) |
2572 | return -EPERM; | 2572 | return -EPERM; |
2573 | 2573 | ||
2574 | if (len && buf[len-1] == '\n') | 2574 | if (len && buf[len-1] == '\n') |
2575 | len--; | 2575 | len--; |
2576 | 2576 | ||
2577 | if (len) | 2577 | if (len) |
2578 | ext4_error(sbi->s_sb, "%.*s", len, buf); | 2578 | ext4_error(sbi->s_sb, "%.*s", len, buf); |
2579 | return count; | 2579 | return count; |
2580 | } | 2580 | } |
2581 | 2581 | ||
2582 | static ssize_t sbi_deprecated_show(struct ext4_attr *a, | 2582 | static ssize_t sbi_deprecated_show(struct ext4_attr *a, |
2583 | struct ext4_sb_info *sbi, char *buf) | 2583 | struct ext4_sb_info *sbi, char *buf) |
2584 | { | 2584 | { |
2585 | return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); | 2585 | return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); |
2586 | } | 2586 | } |
2587 | 2587 | ||
2588 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ | 2588 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ |
2589 | static struct ext4_attr ext4_attr_##_name = { \ | 2589 | static struct ext4_attr ext4_attr_##_name = { \ |
2590 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 2590 | .attr = {.name = __stringify(_name), .mode = _mode }, \ |
2591 | .show = _show, \ | 2591 | .show = _show, \ |
2592 | .store = _store, \ | 2592 | .store = _store, \ |
2593 | .u = { \ | 2593 | .u = { \ |
2594 | .offset = offsetof(struct ext4_sb_info, _elname),\ | 2594 | .offset = offsetof(struct ext4_sb_info, _elname),\ |
2595 | }, \ | 2595 | }, \ |
2596 | } | 2596 | } |
2597 | 2597 | ||
2598 | #define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ | 2598 | #define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ |
2599 | static struct ext4_attr ext4_attr_##_name = { \ | 2599 | static struct ext4_attr ext4_attr_##_name = { \ |
2600 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 2600 | .attr = {.name = __stringify(_name), .mode = _mode }, \ |
2601 | .show = _show, \ | 2601 | .show = _show, \ |
2602 | .store = _store, \ | 2602 | .store = _store, \ |
2603 | .u = { \ | 2603 | .u = { \ |
2604 | .offset = offsetof(struct ext4_super_block, _elname), \ | 2604 | .offset = offsetof(struct ext4_super_block, _elname), \ |
2605 | }, \ | 2605 | }, \ |
2606 | } | 2606 | } |
2607 | 2607 | ||
2608 | #define EXT4_ATTR(name, mode, show, store) \ | 2608 | #define EXT4_ATTR(name, mode, show, store) \ |
2609 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2609 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
2610 | 2610 | ||
2611 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) | 2611 | #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) |
2612 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) | 2612 | #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) |
2613 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) | 2613 | #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) |
2614 | 2614 | ||
2615 | #define EXT4_RO_ATTR_ES_UI(name, elname) \ | 2615 | #define EXT4_RO_ATTR_ES_UI(name, elname) \ |
2616 | EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) | 2616 | EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) |
2617 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2617 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
2618 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) | 2618 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) |
2619 | 2619 | ||
2620 | #define ATTR_LIST(name) &ext4_attr_##name.attr | 2620 | #define ATTR_LIST(name) &ext4_attr_##name.attr |
2621 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ | 2621 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ |
2622 | static struct ext4_attr ext4_attr_##_name = { \ | 2622 | static struct ext4_attr ext4_attr_##_name = { \ |
2623 | .attr = {.name = __stringify(_name), .mode = 0444 }, \ | 2623 | .attr = {.name = __stringify(_name), .mode = 0444 }, \ |
2624 | .show = sbi_deprecated_show, \ | 2624 | .show = sbi_deprecated_show, \ |
2625 | .u = { \ | 2625 | .u = { \ |
2626 | .deprecated_val = _val, \ | 2626 | .deprecated_val = _val, \ |
2627 | }, \ | 2627 | }, \ |
2628 | } | 2628 | } |
2629 | 2629 | ||
2630 | EXT4_RO_ATTR(delayed_allocation_blocks); | 2630 | EXT4_RO_ATTR(delayed_allocation_blocks); |
2631 | EXT4_RO_ATTR(session_write_kbytes); | 2631 | EXT4_RO_ATTR(session_write_kbytes); |
2632 | EXT4_RO_ATTR(lifetime_write_kbytes); | 2632 | EXT4_RO_ATTR(lifetime_write_kbytes); |
2633 | EXT4_RW_ATTR(reserved_clusters); | 2633 | EXT4_RW_ATTR(reserved_clusters); |
2634 | EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, | 2634 | EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, |
2635 | inode_readahead_blks_store, s_inode_readahead_blks); | 2635 | inode_readahead_blks_store, s_inode_readahead_blks); |
2636 | EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); | 2636 | EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); |
2637 | EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); | 2637 | EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); |
2638 | EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); | 2638 | EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); |
2639 | EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); | 2639 | EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); |
2640 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | 2640 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); |
2641 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2641 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
2642 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2642 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
2643 | EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); | 2643 | EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); |
2644 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); | 2644 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); |
2645 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); | 2645 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); |
2646 | EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); | 2646 | EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); |
2647 | EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); | 2647 | EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); |
2648 | EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); | 2648 | EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); |
2649 | EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); | 2649 | EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); |
2650 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); | 2650 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); |
2651 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); | 2651 | EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); |
2652 | EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); | 2652 | EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); |
2653 | EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); | 2653 | EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); |
2654 | EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); | 2654 | EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); |
2655 | 2655 | ||
2656 | static struct attribute *ext4_attrs[] = { | 2656 | static struct attribute *ext4_attrs[] = { |
2657 | ATTR_LIST(delayed_allocation_blocks), | 2657 | ATTR_LIST(delayed_allocation_blocks), |
2658 | ATTR_LIST(session_write_kbytes), | 2658 | ATTR_LIST(session_write_kbytes), |
2659 | ATTR_LIST(lifetime_write_kbytes), | 2659 | ATTR_LIST(lifetime_write_kbytes), |
2660 | ATTR_LIST(reserved_clusters), | 2660 | ATTR_LIST(reserved_clusters), |
2661 | ATTR_LIST(inode_readahead_blks), | 2661 | ATTR_LIST(inode_readahead_blks), |
2662 | ATTR_LIST(inode_goal), | 2662 | ATTR_LIST(inode_goal), |
2663 | ATTR_LIST(mb_stats), | 2663 | ATTR_LIST(mb_stats), |
2664 | ATTR_LIST(mb_max_to_scan), | 2664 | ATTR_LIST(mb_max_to_scan), |
2665 | ATTR_LIST(mb_min_to_scan), | 2665 | ATTR_LIST(mb_min_to_scan), |
2666 | ATTR_LIST(mb_order2_req), | 2666 | ATTR_LIST(mb_order2_req), |
2667 | ATTR_LIST(mb_stream_req), | 2667 | ATTR_LIST(mb_stream_req), |
2668 | ATTR_LIST(mb_group_prealloc), | 2668 | ATTR_LIST(mb_group_prealloc), |
2669 | ATTR_LIST(max_writeback_mb_bump), | 2669 | ATTR_LIST(max_writeback_mb_bump), |
2670 | ATTR_LIST(extent_max_zeroout_kb), | 2670 | ATTR_LIST(extent_max_zeroout_kb), |
2671 | ATTR_LIST(trigger_fs_error), | 2671 | ATTR_LIST(trigger_fs_error), |
2672 | ATTR_LIST(err_ratelimit_interval_ms), | 2672 | ATTR_LIST(err_ratelimit_interval_ms), |
2673 | ATTR_LIST(err_ratelimit_burst), | 2673 | ATTR_LIST(err_ratelimit_burst), |
2674 | ATTR_LIST(warning_ratelimit_interval_ms), | 2674 | ATTR_LIST(warning_ratelimit_interval_ms), |
2675 | ATTR_LIST(warning_ratelimit_burst), | 2675 | ATTR_LIST(warning_ratelimit_burst), |
2676 | ATTR_LIST(msg_ratelimit_interval_ms), | 2676 | ATTR_LIST(msg_ratelimit_interval_ms), |
2677 | ATTR_LIST(msg_ratelimit_burst), | 2677 | ATTR_LIST(msg_ratelimit_burst), |
2678 | ATTR_LIST(errors_count), | 2678 | ATTR_LIST(errors_count), |
2679 | ATTR_LIST(first_error_time), | 2679 | ATTR_LIST(first_error_time), |
2680 | ATTR_LIST(last_error_time), | 2680 | ATTR_LIST(last_error_time), |
2681 | NULL, | 2681 | NULL, |
2682 | }; | 2682 | }; |
2683 | 2683 | ||
2684 | /* Features this copy of ext4 supports */ | 2684 | /* Features this copy of ext4 supports */ |
2685 | EXT4_INFO_ATTR(lazy_itable_init); | 2685 | EXT4_INFO_ATTR(lazy_itable_init); |
2686 | EXT4_INFO_ATTR(batched_discard); | 2686 | EXT4_INFO_ATTR(batched_discard); |
2687 | EXT4_INFO_ATTR(meta_bg_resize); | 2687 | EXT4_INFO_ATTR(meta_bg_resize); |
2688 | 2688 | ||
2689 | static struct attribute *ext4_feat_attrs[] = { | 2689 | static struct attribute *ext4_feat_attrs[] = { |
2690 | ATTR_LIST(lazy_itable_init), | 2690 | ATTR_LIST(lazy_itable_init), |
2691 | ATTR_LIST(batched_discard), | 2691 | ATTR_LIST(batched_discard), |
2692 | ATTR_LIST(meta_bg_resize), | 2692 | ATTR_LIST(meta_bg_resize), |
2693 | NULL, | 2693 | NULL, |
2694 | }; | 2694 | }; |
2695 | 2695 | ||
2696 | static ssize_t ext4_attr_show(struct kobject *kobj, | 2696 | static ssize_t ext4_attr_show(struct kobject *kobj, |
2697 | struct attribute *attr, char *buf) | 2697 | struct attribute *attr, char *buf) |
2698 | { | 2698 | { |
2699 | struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, | 2699 | struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, |
2700 | s_kobj); | 2700 | s_kobj); |
2701 | struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); | 2701 | struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); |
2702 | 2702 | ||
2703 | return a->show ? a->show(a, sbi, buf) : 0; | 2703 | return a->show ? a->show(a, sbi, buf) : 0; |
2704 | } | 2704 | } |
2705 | 2705 | ||
2706 | static ssize_t ext4_attr_store(struct kobject *kobj, | 2706 | static ssize_t ext4_attr_store(struct kobject *kobj, |
2707 | struct attribute *attr, | 2707 | struct attribute *attr, |
2708 | const char *buf, size_t len) | 2708 | const char *buf, size_t len) |
2709 | { | 2709 | { |
2710 | struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, | 2710 | struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, |
2711 | s_kobj); | 2711 | s_kobj); |
2712 | struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); | 2712 | struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); |
2713 | 2713 | ||
2714 | return a->store ? a->store(a, sbi, buf, len) : 0; | 2714 | return a->store ? a->store(a, sbi, buf, len) : 0; |
2715 | } | 2715 | } |
2716 | 2716 | ||
2717 | static void ext4_sb_release(struct kobject *kobj) | 2717 | static void ext4_sb_release(struct kobject *kobj) |
2718 | { | 2718 | { |
2719 | struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, | 2719 | struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, |
2720 | s_kobj); | 2720 | s_kobj); |
2721 | complete(&sbi->s_kobj_unregister); | 2721 | complete(&sbi->s_kobj_unregister); |
2722 | } | 2722 | } |
2723 | 2723 | ||
2724 | static const struct sysfs_ops ext4_attr_ops = { | 2724 | static const struct sysfs_ops ext4_attr_ops = { |
2725 | .show = ext4_attr_show, | 2725 | .show = ext4_attr_show, |
2726 | .store = ext4_attr_store, | 2726 | .store = ext4_attr_store, |
2727 | }; | 2727 | }; |
2728 | 2728 | ||
2729 | static struct kobj_type ext4_ktype = { | 2729 | static struct kobj_type ext4_ktype = { |
2730 | .default_attrs = ext4_attrs, | 2730 | .default_attrs = ext4_attrs, |
2731 | .sysfs_ops = &ext4_attr_ops, | 2731 | .sysfs_ops = &ext4_attr_ops, |
2732 | .release = ext4_sb_release, | 2732 | .release = ext4_sb_release, |
2733 | }; | 2733 | }; |
2734 | 2734 | ||
2735 | static void ext4_feat_release(struct kobject *kobj) | 2735 | static void ext4_feat_release(struct kobject *kobj) |
2736 | { | 2736 | { |
2737 | complete(&ext4_feat->f_kobj_unregister); | 2737 | complete(&ext4_feat->f_kobj_unregister); |
2738 | } | 2738 | } |
2739 | 2739 | ||
2740 | static ssize_t ext4_feat_show(struct kobject *kobj, | 2740 | static ssize_t ext4_feat_show(struct kobject *kobj, |
2741 | struct attribute *attr, char *buf) | 2741 | struct attribute *attr, char *buf) |
2742 | { | 2742 | { |
2743 | return snprintf(buf, PAGE_SIZE, "supported\n"); | 2743 | return snprintf(buf, PAGE_SIZE, "supported\n"); |
2744 | } | 2744 | } |
2745 | 2745 | ||
2746 | /* | 2746 | /* |
2747 | * We can not use ext4_attr_show/store because it relies on the kobject | 2747 | * We can not use ext4_attr_show/store because it relies on the kobject |
2748 | * being embedded in the ext4_sb_info structure which is definitely not | 2748 | * being embedded in the ext4_sb_info structure which is definitely not |
2749 | * true in this case. | 2749 | * true in this case. |
2750 | */ | 2750 | */ |
2751 | static const struct sysfs_ops ext4_feat_ops = { | 2751 | static const struct sysfs_ops ext4_feat_ops = { |
2752 | .show = ext4_feat_show, | 2752 | .show = ext4_feat_show, |
2753 | .store = NULL, | 2753 | .store = NULL, |
2754 | }; | 2754 | }; |
2755 | 2755 | ||
2756 | static struct kobj_type ext4_feat_ktype = { | 2756 | static struct kobj_type ext4_feat_ktype = { |
2757 | .default_attrs = ext4_feat_attrs, | 2757 | .default_attrs = ext4_feat_attrs, |
2758 | .sysfs_ops = &ext4_feat_ops, | 2758 | .sysfs_ops = &ext4_feat_ops, |
2759 | .release = ext4_feat_release, | 2759 | .release = ext4_feat_release, |
2760 | }; | 2760 | }; |
2761 | 2761 | ||
2762 | /* | 2762 | /* |
2763 | * Check whether this filesystem can be mounted based on | 2763 | * Check whether this filesystem can be mounted based on |
2764 | * the features present and the RDONLY/RDWR mount requested. | 2764 | * the features present and the RDONLY/RDWR mount requested. |
2765 | * Returns 1 if this filesystem can be mounted as requested, | 2765 | * Returns 1 if this filesystem can be mounted as requested, |
2766 | * 0 if it cannot be. | 2766 | * 0 if it cannot be. |
2767 | */ | 2767 | */ |
2768 | static int ext4_feature_set_ok(struct super_block *sb, int readonly) | 2768 | static int ext4_feature_set_ok(struct super_block *sb, int readonly) |
2769 | { | 2769 | { |
2770 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { | 2770 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { |
2771 | ext4_msg(sb, KERN_ERR, | 2771 | ext4_msg(sb, KERN_ERR, |
2772 | "Couldn't mount because of " | 2772 | "Couldn't mount because of " |
2773 | "unsupported optional features (%x)", | 2773 | "unsupported optional features (%x)", |
2774 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & | 2774 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & |
2775 | ~EXT4_FEATURE_INCOMPAT_SUPP)); | 2775 | ~EXT4_FEATURE_INCOMPAT_SUPP)); |
2776 | return 0; | 2776 | return 0; |
2777 | } | 2777 | } |
2778 | 2778 | ||
2779 | if (readonly) | 2779 | if (readonly) |
2780 | return 1; | 2780 | return 1; |
2781 | 2781 | ||
2782 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_READONLY)) { | ||
2783 | ext4_msg(sb, KERN_INFO, "filesystem is read-only"); | ||
2784 | sb->s_flags |= MS_RDONLY; | ||
2785 | return 1; | ||
2786 | } | ||
2787 | |||
2782 | /* Check that feature set is OK for a read-write mount */ | 2788 | /* Check that feature set is OK for a read-write mount */ |
2783 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { | 2789 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { |
2784 | ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " | 2790 | ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " |
2785 | "unsupported optional features (%x)", | 2791 | "unsupported optional features (%x)", |
2786 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & | 2792 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & |
2787 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); | 2793 | ~EXT4_FEATURE_RO_COMPAT_SUPP)); |
2788 | return 0; | 2794 | return 0; |
2789 | } | 2795 | } |
2790 | /* | 2796 | /* |
2791 | * Large file size enabled file system can only be mounted | 2797 | * Large file size enabled file system can only be mounted |
2792 | * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF | 2798 | * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF |
2793 | */ | 2799 | */ |
2794 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | 2800 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { |
2795 | if (sizeof(blkcnt_t) < sizeof(u64)) { | 2801 | if (sizeof(blkcnt_t) < sizeof(u64)) { |
2796 | ext4_msg(sb, KERN_ERR, "Filesystem with huge files " | 2802 | ext4_msg(sb, KERN_ERR, "Filesystem with huge files " |
2797 | "cannot be mounted RDWR without " | 2803 | "cannot be mounted RDWR without " |
2798 | "CONFIG_LBDAF"); | 2804 | "CONFIG_LBDAF"); |
2799 | return 0; | 2805 | return 0; |
2800 | } | 2806 | } |
2801 | } | 2807 | } |
2802 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && | 2808 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && |
2803 | !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { | 2809 | !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { |
2804 | ext4_msg(sb, KERN_ERR, | 2810 | ext4_msg(sb, KERN_ERR, |
2805 | "Can't support bigalloc feature without " | 2811 | "Can't support bigalloc feature without " |
2806 | "extents feature\n"); | 2812 | "extents feature\n"); |
2807 | return 0; | 2813 | return 0; |
2808 | } | 2814 | } |
2809 | 2815 | ||
2810 | #ifndef CONFIG_QUOTA | 2816 | #ifndef CONFIG_QUOTA |
2811 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && | 2817 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && |
2812 | !readonly) { | 2818 | !readonly) { |
2813 | ext4_msg(sb, KERN_ERR, | 2819 | ext4_msg(sb, KERN_ERR, |
2814 | "Filesystem with quota feature cannot be mounted RDWR " | 2820 | "Filesystem with quota feature cannot be mounted RDWR " |
2815 | "without CONFIG_QUOTA"); | 2821 | "without CONFIG_QUOTA"); |
2816 | return 0; | 2822 | return 0; |
2817 | } | 2823 | } |
2818 | #endif /* CONFIG_QUOTA */ | 2824 | #endif /* CONFIG_QUOTA */ |
2819 | return 1; | 2825 | return 1; |
2820 | } | 2826 | } |
2821 | 2827 | ||
2822 | /* | 2828 | /* |
2823 | * This function is called once a day if we have errors logged | 2829 | * This function is called once a day if we have errors logged |
2824 | * on the file system | 2830 | * on the file system |
2825 | */ | 2831 | */ |
2826 | static void print_daily_error_info(unsigned long arg) | 2832 | static void print_daily_error_info(unsigned long arg) |
2827 | { | 2833 | { |
2828 | struct super_block *sb = (struct super_block *) arg; | 2834 | struct super_block *sb = (struct super_block *) arg; |
2829 | struct ext4_sb_info *sbi; | 2835 | struct ext4_sb_info *sbi; |
2830 | struct ext4_super_block *es; | 2836 | struct ext4_super_block *es; |
2831 | 2837 | ||
2832 | sbi = EXT4_SB(sb); | 2838 | sbi = EXT4_SB(sb); |
2833 | es = sbi->s_es; | 2839 | es = sbi->s_es; |
2834 | 2840 | ||
2835 | if (es->s_error_count) | 2841 | if (es->s_error_count) |
2836 | /* fsck newer than v1.41.13 is needed to clean this condition. */ | 2842 | /* fsck newer than v1.41.13 is needed to clean this condition. */ |
2837 | ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u", | 2843 | ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u", |
2838 | le32_to_cpu(es->s_error_count)); | 2844 | le32_to_cpu(es->s_error_count)); |
2839 | if (es->s_first_error_time) { | 2845 | if (es->s_first_error_time) { |
2840 | printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d", | 2846 | printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d", |
2841 | sb->s_id, le32_to_cpu(es->s_first_error_time), | 2847 | sb->s_id, le32_to_cpu(es->s_first_error_time), |
2842 | (int) sizeof(es->s_first_error_func), | 2848 | (int) sizeof(es->s_first_error_func), |
2843 | es->s_first_error_func, | 2849 | es->s_first_error_func, |
2844 | le32_to_cpu(es->s_first_error_line)); | 2850 | le32_to_cpu(es->s_first_error_line)); |
2845 | if (es->s_first_error_ino) | 2851 | if (es->s_first_error_ino) |
2846 | printk(": inode %u", | 2852 | printk(": inode %u", |
2847 | le32_to_cpu(es->s_first_error_ino)); | 2853 | le32_to_cpu(es->s_first_error_ino)); |
2848 | if (es->s_first_error_block) | 2854 | if (es->s_first_error_block) |
2849 | printk(": block %llu", (unsigned long long) | 2855 | printk(": block %llu", (unsigned long long) |
2850 | le64_to_cpu(es->s_first_error_block)); | 2856 | le64_to_cpu(es->s_first_error_block)); |
2851 | printk("\n"); | 2857 | printk("\n"); |
2852 | } | 2858 | } |
2853 | if (es->s_last_error_time) { | 2859 | if (es->s_last_error_time) { |
2854 | printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", | 2860 | printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", |
2855 | sb->s_id, le32_to_cpu(es->s_last_error_time), | 2861 | sb->s_id, le32_to_cpu(es->s_last_error_time), |
2856 | (int) sizeof(es->s_last_error_func), | 2862 | (int) sizeof(es->s_last_error_func), |
2857 | es->s_last_error_func, | 2863 | es->s_last_error_func, |
2858 | le32_to_cpu(es->s_last_error_line)); | 2864 | le32_to_cpu(es->s_last_error_line)); |
2859 | if (es->s_last_error_ino) | 2865 | if (es->s_last_error_ino) |
2860 | printk(": inode %u", | 2866 | printk(": inode %u", |
2861 | le32_to_cpu(es->s_last_error_ino)); | 2867 | le32_to_cpu(es->s_last_error_ino)); |
2862 | if (es->s_last_error_block) | 2868 | if (es->s_last_error_block) |
2863 | printk(": block %llu", (unsigned long long) | 2869 | printk(": block %llu", (unsigned long long) |
2864 | le64_to_cpu(es->s_last_error_block)); | 2870 | le64_to_cpu(es->s_last_error_block)); |
2865 | printk("\n"); | 2871 | printk("\n"); |
2866 | } | 2872 | } |
2867 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ | 2873 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ |
2868 | } | 2874 | } |
2869 | 2875 | ||
2870 | /* Find next suitable group and run ext4_init_inode_table */ | 2876 | /* Find next suitable group and run ext4_init_inode_table */ |
2871 | static int ext4_run_li_request(struct ext4_li_request *elr) | 2877 | static int ext4_run_li_request(struct ext4_li_request *elr) |
2872 | { | 2878 | { |
2873 | struct ext4_group_desc *gdp = NULL; | 2879 | struct ext4_group_desc *gdp = NULL; |
2874 | ext4_group_t group, ngroups; | 2880 | ext4_group_t group, ngroups; |
2875 | struct super_block *sb; | 2881 | struct super_block *sb; |
2876 | unsigned long timeout = 0; | 2882 | unsigned long timeout = 0; |
2877 | int ret = 0; | 2883 | int ret = 0; |
2878 | 2884 | ||
2879 | sb = elr->lr_super; | 2885 | sb = elr->lr_super; |
2880 | ngroups = EXT4_SB(sb)->s_groups_count; | 2886 | ngroups = EXT4_SB(sb)->s_groups_count; |
2881 | 2887 | ||
2882 | sb_start_write(sb); | 2888 | sb_start_write(sb); |
2883 | for (group = elr->lr_next_group; group < ngroups; group++) { | 2889 | for (group = elr->lr_next_group; group < ngroups; group++) { |
2884 | gdp = ext4_get_group_desc(sb, group, NULL); | 2890 | gdp = ext4_get_group_desc(sb, group, NULL); |
2885 | if (!gdp) { | 2891 | if (!gdp) { |
2886 | ret = 1; | 2892 | ret = 1; |
2887 | break; | 2893 | break; |
2888 | } | 2894 | } |
2889 | 2895 | ||
2890 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | 2896 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) |
2891 | break; | 2897 | break; |
2892 | } | 2898 | } |
2893 | 2899 | ||
2894 | if (group >= ngroups) | 2900 | if (group >= ngroups) |
2895 | ret = 1; | 2901 | ret = 1; |
2896 | 2902 | ||
2897 | if (!ret) { | 2903 | if (!ret) { |
2898 | timeout = jiffies; | 2904 | timeout = jiffies; |
2899 | ret = ext4_init_inode_table(sb, group, | 2905 | ret = ext4_init_inode_table(sb, group, |
2900 | elr->lr_timeout ? 0 : 1); | 2906 | elr->lr_timeout ? 0 : 1); |
2901 | if (elr->lr_timeout == 0) { | 2907 | if (elr->lr_timeout == 0) { |
2902 | timeout = (jiffies - timeout) * | 2908 | timeout = (jiffies - timeout) * |
2903 | elr->lr_sbi->s_li_wait_mult; | 2909 | elr->lr_sbi->s_li_wait_mult; |
2904 | elr->lr_timeout = timeout; | 2910 | elr->lr_timeout = timeout; |
2905 | } | 2911 | } |
2906 | elr->lr_next_sched = jiffies + elr->lr_timeout; | 2912 | elr->lr_next_sched = jiffies + elr->lr_timeout; |
2907 | elr->lr_next_group = group + 1; | 2913 | elr->lr_next_group = group + 1; |
2908 | } | 2914 | } |
2909 | sb_end_write(sb); | 2915 | sb_end_write(sb); |
2910 | 2916 | ||
2911 | return ret; | 2917 | return ret; |
2912 | } | 2918 | } |
2913 | 2919 | ||
2914 | /* | 2920 | /* |
2915 | * Remove lr_request from the list_request and free the | 2921 | * Remove lr_request from the list_request and free the |
2916 | * request structure. Should be called with li_list_mtx held | 2922 | * request structure. Should be called with li_list_mtx held |
2917 | */ | 2923 | */ |
2918 | static void ext4_remove_li_request(struct ext4_li_request *elr) | 2924 | static void ext4_remove_li_request(struct ext4_li_request *elr) |
2919 | { | 2925 | { |
2920 | struct ext4_sb_info *sbi; | 2926 | struct ext4_sb_info *sbi; |
2921 | 2927 | ||
2922 | if (!elr) | 2928 | if (!elr) |
2923 | return; | 2929 | return; |
2924 | 2930 | ||
2925 | sbi = elr->lr_sbi; | 2931 | sbi = elr->lr_sbi; |
2926 | 2932 | ||
2927 | list_del(&elr->lr_request); | 2933 | list_del(&elr->lr_request); |
2928 | sbi->s_li_request = NULL; | 2934 | sbi->s_li_request = NULL; |
2929 | kfree(elr); | 2935 | kfree(elr); |
2930 | } | 2936 | } |
2931 | 2937 | ||
2932 | static void ext4_unregister_li_request(struct super_block *sb) | 2938 | static void ext4_unregister_li_request(struct super_block *sb) |
2933 | { | 2939 | { |
2934 | mutex_lock(&ext4_li_mtx); | 2940 | mutex_lock(&ext4_li_mtx); |
2935 | if (!ext4_li_info) { | 2941 | if (!ext4_li_info) { |
2936 | mutex_unlock(&ext4_li_mtx); | 2942 | mutex_unlock(&ext4_li_mtx); |
2937 | return; | 2943 | return; |
2938 | } | 2944 | } |
2939 | 2945 | ||
2940 | mutex_lock(&ext4_li_info->li_list_mtx); | 2946 | mutex_lock(&ext4_li_info->li_list_mtx); |
2941 | ext4_remove_li_request(EXT4_SB(sb)->s_li_request); | 2947 | ext4_remove_li_request(EXT4_SB(sb)->s_li_request); |
2942 | mutex_unlock(&ext4_li_info->li_list_mtx); | 2948 | mutex_unlock(&ext4_li_info->li_list_mtx); |
2943 | mutex_unlock(&ext4_li_mtx); | 2949 | mutex_unlock(&ext4_li_mtx); |
2944 | } | 2950 | } |
2945 | 2951 | ||
2946 | static struct task_struct *ext4_lazyinit_task; | 2952 | static struct task_struct *ext4_lazyinit_task; |
2947 | 2953 | ||
2948 | /* | 2954 | /* |
2949 | * This is the function where ext4lazyinit thread lives. It walks | 2955 | * This is the function where ext4lazyinit thread lives. It walks |
2950 | * through the request list searching for next scheduled filesystem. | 2956 | * through the request list searching for next scheduled filesystem. |
2951 | * When such a fs is found, run the lazy initialization request | 2957 | * When such a fs is found, run the lazy initialization request |
2952 | * (ext4_rn_li_request) and keep track of the time spend in this | 2958 | * (ext4_rn_li_request) and keep track of the time spend in this |
2953 | * function. Based on that time we compute next schedule time of | 2959 | * function. Based on that time we compute next schedule time of |
2954 | * the request. When walking through the list is complete, compute | 2960 | * the request. When walking through the list is complete, compute |
2955 | * next waking time and put itself into sleep. | 2961 | * next waking time and put itself into sleep. |
2956 | */ | 2962 | */ |
2957 | static int ext4_lazyinit_thread(void *arg) | 2963 | static int ext4_lazyinit_thread(void *arg) |
2958 | { | 2964 | { |
2959 | struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; | 2965 | struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; |
2960 | struct list_head *pos, *n; | 2966 | struct list_head *pos, *n; |
2961 | struct ext4_li_request *elr; | 2967 | struct ext4_li_request *elr; |
2962 | unsigned long next_wakeup, cur; | 2968 | unsigned long next_wakeup, cur; |
2963 | 2969 | ||
2964 | BUG_ON(NULL == eli); | 2970 | BUG_ON(NULL == eli); |
2965 | 2971 | ||
2966 | cont_thread: | 2972 | cont_thread: |
2967 | while (true) { | 2973 | while (true) { |
2968 | next_wakeup = MAX_JIFFY_OFFSET; | 2974 | next_wakeup = MAX_JIFFY_OFFSET; |
2969 | 2975 | ||
2970 | mutex_lock(&eli->li_list_mtx); | 2976 | mutex_lock(&eli->li_list_mtx); |
2971 | if (list_empty(&eli->li_request_list)) { | 2977 | if (list_empty(&eli->li_request_list)) { |
2972 | mutex_unlock(&eli->li_list_mtx); | 2978 | mutex_unlock(&eli->li_list_mtx); |
2973 | goto exit_thread; | 2979 | goto exit_thread; |
2974 | } | 2980 | } |
2975 | 2981 | ||
2976 | list_for_each_safe(pos, n, &eli->li_request_list) { | 2982 | list_for_each_safe(pos, n, &eli->li_request_list) { |
2977 | elr = list_entry(pos, struct ext4_li_request, | 2983 | elr = list_entry(pos, struct ext4_li_request, |
2978 | lr_request); | 2984 | lr_request); |
2979 | 2985 | ||
2980 | if (time_after_eq(jiffies, elr->lr_next_sched)) { | 2986 | if (time_after_eq(jiffies, elr->lr_next_sched)) { |
2981 | if (ext4_run_li_request(elr) != 0) { | 2987 | if (ext4_run_li_request(elr) != 0) { |
2982 | /* error, remove the lazy_init job */ | 2988 | /* error, remove the lazy_init job */ |
2983 | ext4_remove_li_request(elr); | 2989 | ext4_remove_li_request(elr); |
2984 | continue; | 2990 | continue; |
2985 | } | 2991 | } |
2986 | } | 2992 | } |
2987 | 2993 | ||
2988 | if (time_before(elr->lr_next_sched, next_wakeup)) | 2994 | if (time_before(elr->lr_next_sched, next_wakeup)) |
2989 | next_wakeup = elr->lr_next_sched; | 2995 | next_wakeup = elr->lr_next_sched; |
2990 | } | 2996 | } |
2991 | mutex_unlock(&eli->li_list_mtx); | 2997 | mutex_unlock(&eli->li_list_mtx); |
2992 | 2998 | ||
2993 | try_to_freeze(); | 2999 | try_to_freeze(); |
2994 | 3000 | ||
2995 | cur = jiffies; | 3001 | cur = jiffies; |
2996 | if ((time_after_eq(cur, next_wakeup)) || | 3002 | if ((time_after_eq(cur, next_wakeup)) || |
2997 | (MAX_JIFFY_OFFSET == next_wakeup)) { | 3003 | (MAX_JIFFY_OFFSET == next_wakeup)) { |
2998 | cond_resched(); | 3004 | cond_resched(); |
2999 | continue; | 3005 | continue; |
3000 | } | 3006 | } |
3001 | 3007 | ||
3002 | schedule_timeout_interruptible(next_wakeup - cur); | 3008 | schedule_timeout_interruptible(next_wakeup - cur); |
3003 | 3009 | ||
3004 | if (kthread_should_stop()) { | 3010 | if (kthread_should_stop()) { |
3005 | ext4_clear_request_list(); | 3011 | ext4_clear_request_list(); |
3006 | goto exit_thread; | 3012 | goto exit_thread; |
3007 | } | 3013 | } |
3008 | } | 3014 | } |
3009 | 3015 | ||
3010 | exit_thread: | 3016 | exit_thread: |
3011 | /* | 3017 | /* |
3012 | * It looks like the request list is empty, but we need | 3018 | * It looks like the request list is empty, but we need |
3013 | * to check it under the li_list_mtx lock, to prevent any | 3019 | * to check it under the li_list_mtx lock, to prevent any |
3014 | * additions into it, and of course we should lock ext4_li_mtx | 3020 | * additions into it, and of course we should lock ext4_li_mtx |
3015 | * to atomically free the list and ext4_li_info, because at | 3021 | * to atomically free the list and ext4_li_info, because at |
3016 | * this point another ext4 filesystem could be registering | 3022 | * this point another ext4 filesystem could be registering |
3017 | * new one. | 3023 | * new one. |
3018 | */ | 3024 | */ |
3019 | mutex_lock(&ext4_li_mtx); | 3025 | mutex_lock(&ext4_li_mtx); |
3020 | mutex_lock(&eli->li_list_mtx); | 3026 | mutex_lock(&eli->li_list_mtx); |
3021 | if (!list_empty(&eli->li_request_list)) { | 3027 | if (!list_empty(&eli->li_request_list)) { |
3022 | mutex_unlock(&eli->li_list_mtx); | 3028 | mutex_unlock(&eli->li_list_mtx); |
3023 | mutex_unlock(&ext4_li_mtx); | 3029 | mutex_unlock(&ext4_li_mtx); |
3024 | goto cont_thread; | 3030 | goto cont_thread; |
3025 | } | 3031 | } |
3026 | mutex_unlock(&eli->li_list_mtx); | 3032 | mutex_unlock(&eli->li_list_mtx); |
3027 | kfree(ext4_li_info); | 3033 | kfree(ext4_li_info); |
3028 | ext4_li_info = NULL; | 3034 | ext4_li_info = NULL; |
3029 | mutex_unlock(&ext4_li_mtx); | 3035 | mutex_unlock(&ext4_li_mtx); |
3030 | 3036 | ||
3031 | return 0; | 3037 | return 0; |
3032 | } | 3038 | } |
3033 | 3039 | ||
3034 | static void ext4_clear_request_list(void) | 3040 | static void ext4_clear_request_list(void) |
3035 | { | 3041 | { |
3036 | struct list_head *pos, *n; | 3042 | struct list_head *pos, *n; |
3037 | struct ext4_li_request *elr; | 3043 | struct ext4_li_request *elr; |
3038 | 3044 | ||
3039 | mutex_lock(&ext4_li_info->li_list_mtx); | 3045 | mutex_lock(&ext4_li_info->li_list_mtx); |
3040 | list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { | 3046 | list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { |
3041 | elr = list_entry(pos, struct ext4_li_request, | 3047 | elr = list_entry(pos, struct ext4_li_request, |
3042 | lr_request); | 3048 | lr_request); |
3043 | ext4_remove_li_request(elr); | 3049 | ext4_remove_li_request(elr); |
3044 | } | 3050 | } |
3045 | mutex_unlock(&ext4_li_info->li_list_mtx); | 3051 | mutex_unlock(&ext4_li_info->li_list_mtx); |
3046 | } | 3052 | } |
3047 | 3053 | ||
3048 | static int ext4_run_lazyinit_thread(void) | 3054 | static int ext4_run_lazyinit_thread(void) |
3049 | { | 3055 | { |
3050 | ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, | 3056 | ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, |
3051 | ext4_li_info, "ext4lazyinit"); | 3057 | ext4_li_info, "ext4lazyinit"); |
3052 | if (IS_ERR(ext4_lazyinit_task)) { | 3058 | if (IS_ERR(ext4_lazyinit_task)) { |
3053 | int err = PTR_ERR(ext4_lazyinit_task); | 3059 | int err = PTR_ERR(ext4_lazyinit_task); |
3054 | ext4_clear_request_list(); | 3060 | ext4_clear_request_list(); |
3055 | kfree(ext4_li_info); | 3061 | kfree(ext4_li_info); |
3056 | ext4_li_info = NULL; | 3062 | ext4_li_info = NULL; |
3057 | printk(KERN_CRIT "EXT4-fs: error %d creating inode table " | 3063 | printk(KERN_CRIT "EXT4-fs: error %d creating inode table " |
3058 | "initialization thread\n", | 3064 | "initialization thread\n", |
3059 | err); | 3065 | err); |
3060 | return err; | 3066 | return err; |
3061 | } | 3067 | } |
3062 | ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; | 3068 | ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; |
3063 | return 0; | 3069 | return 0; |
3064 | } | 3070 | } |
3065 | 3071 | ||
3066 | /* | 3072 | /* |
3067 | * Check whether it make sense to run itable init. thread or not. | 3073 | * Check whether it make sense to run itable init. thread or not. |
3068 | * If there is at least one uninitialized inode table, return | 3074 | * If there is at least one uninitialized inode table, return |
3069 | * corresponding group number, else the loop goes through all | 3075 | * corresponding group number, else the loop goes through all |
3070 | * groups and return total number of groups. | 3076 | * groups and return total number of groups. |
3071 | */ | 3077 | */ |
3072 | static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) | 3078 | static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) |
3073 | { | 3079 | { |
3074 | ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; | 3080 | ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; |
3075 | struct ext4_group_desc *gdp = NULL; | 3081 | struct ext4_group_desc *gdp = NULL; |
3076 | 3082 | ||
3077 | for (group = 0; group < ngroups; group++) { | 3083 | for (group = 0; group < ngroups; group++) { |
3078 | gdp = ext4_get_group_desc(sb, group, NULL); | 3084 | gdp = ext4_get_group_desc(sb, group, NULL); |
3079 | if (!gdp) | 3085 | if (!gdp) |
3080 | continue; | 3086 | continue; |
3081 | 3087 | ||
3082 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) | 3088 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) |
3083 | break; | 3089 | break; |
3084 | } | 3090 | } |
3085 | 3091 | ||
3086 | return group; | 3092 | return group; |
3087 | } | 3093 | } |
3088 | 3094 | ||
3089 | static int ext4_li_info_new(void) | 3095 | static int ext4_li_info_new(void) |
3090 | { | 3096 | { |
3091 | struct ext4_lazy_init *eli = NULL; | 3097 | struct ext4_lazy_init *eli = NULL; |
3092 | 3098 | ||
3093 | eli = kzalloc(sizeof(*eli), GFP_KERNEL); | 3099 | eli = kzalloc(sizeof(*eli), GFP_KERNEL); |
3094 | if (!eli) | 3100 | if (!eli) |
3095 | return -ENOMEM; | 3101 | return -ENOMEM; |
3096 | 3102 | ||
3097 | INIT_LIST_HEAD(&eli->li_request_list); | 3103 | INIT_LIST_HEAD(&eli->li_request_list); |
3098 | mutex_init(&eli->li_list_mtx); | 3104 | mutex_init(&eli->li_list_mtx); |
3099 | 3105 | ||
3100 | eli->li_state |= EXT4_LAZYINIT_QUIT; | 3106 | eli->li_state |= EXT4_LAZYINIT_QUIT; |
3101 | 3107 | ||
3102 | ext4_li_info = eli; | 3108 | ext4_li_info = eli; |
3103 | 3109 | ||
3104 | return 0; | 3110 | return 0; |
3105 | } | 3111 | } |
3106 | 3112 | ||
3107 | static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, | 3113 | static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, |
3108 | ext4_group_t start) | 3114 | ext4_group_t start) |
3109 | { | 3115 | { |
3110 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3116 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3111 | struct ext4_li_request *elr; | 3117 | struct ext4_li_request *elr; |
3112 | 3118 | ||
3113 | elr = kzalloc(sizeof(*elr), GFP_KERNEL); | 3119 | elr = kzalloc(sizeof(*elr), GFP_KERNEL); |
3114 | if (!elr) | 3120 | if (!elr) |
3115 | return NULL; | 3121 | return NULL; |
3116 | 3122 | ||
3117 | elr->lr_super = sb; | 3123 | elr->lr_super = sb; |
3118 | elr->lr_sbi = sbi; | 3124 | elr->lr_sbi = sbi; |
3119 | elr->lr_next_group = start; | 3125 | elr->lr_next_group = start; |
3120 | 3126 | ||
3121 | /* | 3127 | /* |
3122 | * Randomize first schedule time of the request to | 3128 | * Randomize first schedule time of the request to |
3123 | * spread the inode table initialization requests | 3129 | * spread the inode table initialization requests |
3124 | * better. | 3130 | * better. |
3125 | */ | 3131 | */ |
3126 | elr->lr_next_sched = jiffies + (prandom_u32() % | 3132 | elr->lr_next_sched = jiffies + (prandom_u32() % |
3127 | (EXT4_DEF_LI_MAX_START_DELAY * HZ)); | 3133 | (EXT4_DEF_LI_MAX_START_DELAY * HZ)); |
3128 | return elr; | 3134 | return elr; |
3129 | } | 3135 | } |
3130 | 3136 | ||
3131 | int ext4_register_li_request(struct super_block *sb, | 3137 | int ext4_register_li_request(struct super_block *sb, |
3132 | ext4_group_t first_not_zeroed) | 3138 | ext4_group_t first_not_zeroed) |
3133 | { | 3139 | { |
3134 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3140 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3135 | struct ext4_li_request *elr = NULL; | 3141 | struct ext4_li_request *elr = NULL; |
3136 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 3142 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; |
3137 | int ret = 0; | 3143 | int ret = 0; |
3138 | 3144 | ||
3139 | mutex_lock(&ext4_li_mtx); | 3145 | mutex_lock(&ext4_li_mtx); |
3140 | if (sbi->s_li_request != NULL) { | 3146 | if (sbi->s_li_request != NULL) { |
3141 | /* | 3147 | /* |
3142 | * Reset timeout so it can be computed again, because | 3148 | * Reset timeout so it can be computed again, because |
3143 | * s_li_wait_mult might have changed. | 3149 | * s_li_wait_mult might have changed. |
3144 | */ | 3150 | */ |
3145 | sbi->s_li_request->lr_timeout = 0; | 3151 | sbi->s_li_request->lr_timeout = 0; |
3146 | goto out; | 3152 | goto out; |
3147 | } | 3153 | } |
3148 | 3154 | ||
3149 | if (first_not_zeroed == ngroups || | 3155 | if (first_not_zeroed == ngroups || |
3150 | (sb->s_flags & MS_RDONLY) || | 3156 | (sb->s_flags & MS_RDONLY) || |
3151 | !test_opt(sb, INIT_INODE_TABLE)) | 3157 | !test_opt(sb, INIT_INODE_TABLE)) |
3152 | goto out; | 3158 | goto out; |
3153 | 3159 | ||
3154 | elr = ext4_li_request_new(sb, first_not_zeroed); | 3160 | elr = ext4_li_request_new(sb, first_not_zeroed); |
3155 | if (!elr) { | 3161 | if (!elr) { |
3156 | ret = -ENOMEM; | 3162 | ret = -ENOMEM; |
3157 | goto out; | 3163 | goto out; |
3158 | } | 3164 | } |
3159 | 3165 | ||
3160 | if (NULL == ext4_li_info) { | 3166 | if (NULL == ext4_li_info) { |
3161 | ret = ext4_li_info_new(); | 3167 | ret = ext4_li_info_new(); |
3162 | if (ret) | 3168 | if (ret) |
3163 | goto out; | 3169 | goto out; |
3164 | } | 3170 | } |
3165 | 3171 | ||
3166 | mutex_lock(&ext4_li_info->li_list_mtx); | 3172 | mutex_lock(&ext4_li_info->li_list_mtx); |
3167 | list_add(&elr->lr_request, &ext4_li_info->li_request_list); | 3173 | list_add(&elr->lr_request, &ext4_li_info->li_request_list); |
3168 | mutex_unlock(&ext4_li_info->li_list_mtx); | 3174 | mutex_unlock(&ext4_li_info->li_list_mtx); |
3169 | 3175 | ||
3170 | sbi->s_li_request = elr; | 3176 | sbi->s_li_request = elr; |
3171 | /* | 3177 | /* |
3172 | * set elr to NULL here since it has been inserted to | 3178 | * set elr to NULL here since it has been inserted to |
3173 | * the request_list and the removal and free of it is | 3179 | * the request_list and the removal and free of it is |
3174 | * handled by ext4_clear_request_list from now on. | 3180 | * handled by ext4_clear_request_list from now on. |
3175 | */ | 3181 | */ |
3176 | elr = NULL; | 3182 | elr = NULL; |
3177 | 3183 | ||
3178 | if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { | 3184 | if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { |
3179 | ret = ext4_run_lazyinit_thread(); | 3185 | ret = ext4_run_lazyinit_thread(); |
3180 | if (ret) | 3186 | if (ret) |
3181 | goto out; | 3187 | goto out; |
3182 | } | 3188 | } |
3183 | out: | 3189 | out: |
3184 | mutex_unlock(&ext4_li_mtx); | 3190 | mutex_unlock(&ext4_li_mtx); |
3185 | if (ret) | 3191 | if (ret) |
3186 | kfree(elr); | 3192 | kfree(elr); |
3187 | return ret; | 3193 | return ret; |
3188 | } | 3194 | } |
3189 | 3195 | ||
3190 | /* | 3196 | /* |
3191 | * We do not need to lock anything since this is called on | 3197 | * We do not need to lock anything since this is called on |
3192 | * module unload. | 3198 | * module unload. |
3193 | */ | 3199 | */ |
3194 | static void ext4_destroy_lazyinit_thread(void) | 3200 | static void ext4_destroy_lazyinit_thread(void) |
3195 | { | 3201 | { |
3196 | /* | 3202 | /* |
3197 | * If thread exited earlier | 3203 | * If thread exited earlier |
3198 | * there's nothing to be done. | 3204 | * there's nothing to be done. |
3199 | */ | 3205 | */ |
3200 | if (!ext4_li_info || !ext4_lazyinit_task) | 3206 | if (!ext4_li_info || !ext4_lazyinit_task) |
3201 | return; | 3207 | return; |
3202 | 3208 | ||
3203 | kthread_stop(ext4_lazyinit_task); | 3209 | kthread_stop(ext4_lazyinit_task); |
3204 | } | 3210 | } |
3205 | 3211 | ||
3206 | static int set_journal_csum_feature_set(struct super_block *sb) | 3212 | static int set_journal_csum_feature_set(struct super_block *sb) |
3207 | { | 3213 | { |
3208 | int ret = 1; | 3214 | int ret = 1; |
3209 | int compat, incompat; | 3215 | int compat, incompat; |
3210 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3216 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3211 | 3217 | ||
3212 | if (ext4_has_metadata_csum(sb)) { | 3218 | if (ext4_has_metadata_csum(sb)) { |
3213 | /* journal checksum v3 */ | 3219 | /* journal checksum v3 */ |
3214 | compat = 0; | 3220 | compat = 0; |
3215 | incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; | 3221 | incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; |
3216 | } else { | 3222 | } else { |
3217 | /* journal checksum v1 */ | 3223 | /* journal checksum v1 */ |
3218 | compat = JBD2_FEATURE_COMPAT_CHECKSUM; | 3224 | compat = JBD2_FEATURE_COMPAT_CHECKSUM; |
3219 | incompat = 0; | 3225 | incompat = 0; |
3220 | } | 3226 | } |
3221 | 3227 | ||
3222 | jbd2_journal_clear_features(sbi->s_journal, | 3228 | jbd2_journal_clear_features(sbi->s_journal, |
3223 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | 3229 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, |
3224 | JBD2_FEATURE_INCOMPAT_CSUM_V3 | | 3230 | JBD2_FEATURE_INCOMPAT_CSUM_V3 | |
3225 | JBD2_FEATURE_INCOMPAT_CSUM_V2); | 3231 | JBD2_FEATURE_INCOMPAT_CSUM_V2); |
3226 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 3232 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { |
3227 | ret = jbd2_journal_set_features(sbi->s_journal, | 3233 | ret = jbd2_journal_set_features(sbi->s_journal, |
3228 | compat, 0, | 3234 | compat, 0, |
3229 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | | 3235 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | |
3230 | incompat); | 3236 | incompat); |
3231 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { | 3237 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { |
3232 | ret = jbd2_journal_set_features(sbi->s_journal, | 3238 | ret = jbd2_journal_set_features(sbi->s_journal, |
3233 | compat, 0, | 3239 | compat, 0, |
3234 | incompat); | 3240 | incompat); |
3235 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | 3241 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
3236 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 3242 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
3237 | } else { | 3243 | } else { |
3238 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | 3244 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, |
3239 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 3245 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); |
3240 | } | 3246 | } |
3241 | 3247 | ||
3242 | return ret; | 3248 | return ret; |
3243 | } | 3249 | } |
3244 | 3250 | ||
3245 | /* | 3251 | /* |
3246 | * Note: calculating the overhead so we can be compatible with | 3252 | * Note: calculating the overhead so we can be compatible with |
3247 | * historical BSD practice is quite difficult in the face of | 3253 | * historical BSD practice is quite difficult in the face of |
3248 | * clusters/bigalloc. This is because multiple metadata blocks from | 3254 | * clusters/bigalloc. This is because multiple metadata blocks from |
3249 | * different block group can end up in the same allocation cluster. | 3255 | * different block group can end up in the same allocation cluster. |
3250 | * Calculating the exact overhead in the face of clustered allocation | 3256 | * Calculating the exact overhead in the face of clustered allocation |
3251 | * requires either O(all block bitmaps) in memory or O(number of block | 3257 | * requires either O(all block bitmaps) in memory or O(number of block |
3252 | * groups**2) in time. We will still calculate the superblock for | 3258 | * groups**2) in time. We will still calculate the superblock for |
3253 | * older file systems --- and if we come across with a bigalloc file | 3259 | * older file systems --- and if we come across with a bigalloc file |
3254 | * system with zero in s_overhead_clusters the estimate will be close to | 3260 | * system with zero in s_overhead_clusters the estimate will be close to |
3255 | * correct especially for very large cluster sizes --- but for newer | 3261 | * correct especially for very large cluster sizes --- but for newer |
3256 | * file systems, it's better to calculate this figure once at mkfs | 3262 | * file systems, it's better to calculate this figure once at mkfs |
3257 | * time, and store it in the superblock. If the superblock value is | 3263 | * time, and store it in the superblock. If the superblock value is |
3258 | * present (even for non-bigalloc file systems), we will use it. | 3264 | * present (even for non-bigalloc file systems), we will use it. |
3259 | */ | 3265 | */ |
3260 | static int count_overhead(struct super_block *sb, ext4_group_t grp, | 3266 | static int count_overhead(struct super_block *sb, ext4_group_t grp, |
3261 | char *buf) | 3267 | char *buf) |
3262 | { | 3268 | { |
3263 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3269 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3264 | struct ext4_group_desc *gdp; | 3270 | struct ext4_group_desc *gdp; |
3265 | ext4_fsblk_t first_block, last_block, b; | 3271 | ext4_fsblk_t first_block, last_block, b; |
3266 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); | 3272 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
3267 | int s, j, count = 0; | 3273 | int s, j, count = 0; |
3268 | 3274 | ||
3269 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) | 3275 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) |
3270 | return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + | 3276 | return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + |
3271 | sbi->s_itb_per_group + 2); | 3277 | sbi->s_itb_per_group + 2); |
3272 | 3278 | ||
3273 | first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + | 3279 | first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + |
3274 | (grp * EXT4_BLOCKS_PER_GROUP(sb)); | 3280 | (grp * EXT4_BLOCKS_PER_GROUP(sb)); |
3275 | last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; | 3281 | last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; |
3276 | for (i = 0; i < ngroups; i++) { | 3282 | for (i = 0; i < ngroups; i++) { |
3277 | gdp = ext4_get_group_desc(sb, i, NULL); | 3283 | gdp = ext4_get_group_desc(sb, i, NULL); |
3278 | b = ext4_block_bitmap(sb, gdp); | 3284 | b = ext4_block_bitmap(sb, gdp); |
3279 | if (b >= first_block && b <= last_block) { | 3285 | if (b >= first_block && b <= last_block) { |
3280 | ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); | 3286 | ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); |
3281 | count++; | 3287 | count++; |
3282 | } | 3288 | } |
3283 | b = ext4_inode_bitmap(sb, gdp); | 3289 | b = ext4_inode_bitmap(sb, gdp); |
3284 | if (b >= first_block && b <= last_block) { | 3290 | if (b >= first_block && b <= last_block) { |
3285 | ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); | 3291 | ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); |
3286 | count++; | 3292 | count++; |
3287 | } | 3293 | } |
3288 | b = ext4_inode_table(sb, gdp); | 3294 | b = ext4_inode_table(sb, gdp); |
3289 | if (b >= first_block && b + sbi->s_itb_per_group <= last_block) | 3295 | if (b >= first_block && b + sbi->s_itb_per_group <= last_block) |
3290 | for (j = 0; j < sbi->s_itb_per_group; j++, b++) { | 3296 | for (j = 0; j < sbi->s_itb_per_group; j++, b++) { |
3291 | int c = EXT4_B2C(sbi, b - first_block); | 3297 | int c = EXT4_B2C(sbi, b - first_block); |
3292 | ext4_set_bit(c, buf); | 3298 | ext4_set_bit(c, buf); |
3293 | count++; | 3299 | count++; |
3294 | } | 3300 | } |
3295 | if (i != grp) | 3301 | if (i != grp) |
3296 | continue; | 3302 | continue; |
3297 | s = 0; | 3303 | s = 0; |
3298 | if (ext4_bg_has_super(sb, grp)) { | 3304 | if (ext4_bg_has_super(sb, grp)) { |
3299 | ext4_set_bit(s++, buf); | 3305 | ext4_set_bit(s++, buf); |
3300 | count++; | 3306 | count++; |
3301 | } | 3307 | } |
3302 | for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { | 3308 | for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { |
3303 | ext4_set_bit(EXT4_B2C(sbi, s++), buf); | 3309 | ext4_set_bit(EXT4_B2C(sbi, s++), buf); |
3304 | count++; | 3310 | count++; |
3305 | } | 3311 | } |
3306 | } | 3312 | } |
3307 | if (!count) | 3313 | if (!count) |
3308 | return 0; | 3314 | return 0; |
3309 | return EXT4_CLUSTERS_PER_GROUP(sb) - | 3315 | return EXT4_CLUSTERS_PER_GROUP(sb) - |
3310 | ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8); | 3316 | ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8); |
3311 | } | 3317 | } |
3312 | 3318 | ||
3313 | /* | 3319 | /* |
3314 | * Compute the overhead and stash it in sbi->s_overhead | 3320 | * Compute the overhead and stash it in sbi->s_overhead |
3315 | */ | 3321 | */ |
3316 | int ext4_calculate_overhead(struct super_block *sb) | 3322 | int ext4_calculate_overhead(struct super_block *sb) |
3317 | { | 3323 | { |
3318 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3324 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3319 | struct ext4_super_block *es = sbi->s_es; | 3325 | struct ext4_super_block *es = sbi->s_es; |
3320 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); | 3326 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
3321 | ext4_fsblk_t overhead = 0; | 3327 | ext4_fsblk_t overhead = 0; |
3322 | char *buf = (char *) get_zeroed_page(GFP_NOFS); | 3328 | char *buf = (char *) get_zeroed_page(GFP_NOFS); |
3323 | 3329 | ||
3324 | if (!buf) | 3330 | if (!buf) |
3325 | return -ENOMEM; | 3331 | return -ENOMEM; |
3326 | 3332 | ||
3327 | /* | 3333 | /* |
3328 | * Compute the overhead (FS structures). This is constant | 3334 | * Compute the overhead (FS structures). This is constant |
3329 | * for a given filesystem unless the number of block groups | 3335 | * for a given filesystem unless the number of block groups |
3330 | * changes so we cache the previous value until it does. | 3336 | * changes so we cache the previous value until it does. |
3331 | */ | 3337 | */ |
3332 | 3338 | ||
3333 | /* | 3339 | /* |
3334 | * All of the blocks before first_data_block are overhead | 3340 | * All of the blocks before first_data_block are overhead |
3335 | */ | 3341 | */ |
3336 | overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); | 3342 | overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); |
3337 | 3343 | ||
3338 | /* | 3344 | /* |
3339 | * Add the overhead found in each block group | 3345 | * Add the overhead found in each block group |
3340 | */ | 3346 | */ |
3341 | for (i = 0; i < ngroups; i++) { | 3347 | for (i = 0; i < ngroups; i++) { |
3342 | int blks; | 3348 | int blks; |
3343 | 3349 | ||
3344 | blks = count_overhead(sb, i, buf); | 3350 | blks = count_overhead(sb, i, buf); |
3345 | overhead += blks; | 3351 | overhead += blks; |
3346 | if (blks) | 3352 | if (blks) |
3347 | memset(buf, 0, PAGE_SIZE); | 3353 | memset(buf, 0, PAGE_SIZE); |
3348 | cond_resched(); | 3354 | cond_resched(); |
3349 | } | 3355 | } |
3350 | /* Add the internal journal blocks as well */ | 3356 | /* Add the internal journal blocks as well */ |
3351 | if (sbi->s_journal && !sbi->journal_bdev) | 3357 | if (sbi->s_journal && !sbi->journal_bdev) |
3352 | overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); | 3358 | overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); |
3353 | 3359 | ||
3354 | sbi->s_overhead = overhead; | 3360 | sbi->s_overhead = overhead; |
3355 | smp_wmb(); | 3361 | smp_wmb(); |
3356 | free_page((unsigned long) buf); | 3362 | free_page((unsigned long) buf); |
3357 | return 0; | 3363 | return 0; |
3358 | } | 3364 | } |
3359 | 3365 | ||
3360 | 3366 | ||
3361 | static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) | 3367 | static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) |
3362 | { | 3368 | { |
3363 | ext4_fsblk_t resv_clusters; | 3369 | ext4_fsblk_t resv_clusters; |
3364 | 3370 | ||
3365 | /* | 3371 | /* |
3366 | * There's no need to reserve anything when we aren't using extents. | 3372 | * There's no need to reserve anything when we aren't using extents. |
3367 | * The space estimates are exact, there are no unwritten extents, | 3373 | * The space estimates are exact, there are no unwritten extents, |
3368 | * hole punching doesn't need new metadata... This is needed especially | 3374 | * hole punching doesn't need new metadata... This is needed especially |
3369 | * to keep ext2/3 backward compatibility. | 3375 | * to keep ext2/3 backward compatibility. |
3370 | */ | 3376 | */ |
3371 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) | 3377 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) |
3372 | return 0; | 3378 | return 0; |
3373 | /* | 3379 | /* |
3374 | * By default we reserve 2% or 4096 clusters, whichever is smaller. | 3380 | * By default we reserve 2% or 4096 clusters, whichever is smaller. |
3375 | * This should cover the situations where we can not afford to run | 3381 | * This should cover the situations where we can not afford to run |
3376 | * out of space like for example punch hole, or converting | 3382 | * out of space like for example punch hole, or converting |
3377 | * unwritten extents in delalloc path. In most cases such | 3383 | * unwritten extents in delalloc path. In most cases such |
3378 | * allocation would require 1, or 2 blocks, higher numbers are | 3384 | * allocation would require 1, or 2 blocks, higher numbers are |
3379 | * very rare. | 3385 | * very rare. |
3380 | */ | 3386 | */ |
3381 | resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> | 3387 | resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> |
3382 | EXT4_SB(sb)->s_cluster_bits; | 3388 | EXT4_SB(sb)->s_cluster_bits; |
3383 | 3389 | ||
3384 | do_div(resv_clusters, 50); | 3390 | do_div(resv_clusters, 50); |
3385 | resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); | 3391 | resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); |
3386 | 3392 | ||
3387 | return resv_clusters; | 3393 | return resv_clusters; |
3388 | } | 3394 | } |
3389 | 3395 | ||
3390 | 3396 | ||
3391 | static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count) | 3397 | static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count) |
3392 | { | 3398 | { |
3393 | ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >> | 3399 | ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >> |
3394 | sbi->s_cluster_bits; | 3400 | sbi->s_cluster_bits; |
3395 | 3401 | ||
3396 | if (count >= clusters) | 3402 | if (count >= clusters) |
3397 | return -EINVAL; | 3403 | return -EINVAL; |
3398 | 3404 | ||
3399 | atomic64_set(&sbi->s_resv_clusters, count); | 3405 | atomic64_set(&sbi->s_resv_clusters, count); |
3400 | return 0; | 3406 | return 0; |
3401 | } | 3407 | } |
3402 | 3408 | ||
3403 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 3409 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
3404 | { | 3410 | { |
3405 | char *orig_data = kstrdup(data, GFP_KERNEL); | 3411 | char *orig_data = kstrdup(data, GFP_KERNEL); |
3406 | struct buffer_head *bh; | 3412 | struct buffer_head *bh; |
3407 | struct ext4_super_block *es = NULL; | 3413 | struct ext4_super_block *es = NULL; |
3408 | struct ext4_sb_info *sbi; | 3414 | struct ext4_sb_info *sbi; |
3409 | ext4_fsblk_t block; | 3415 | ext4_fsblk_t block; |
3410 | ext4_fsblk_t sb_block = get_sb_block(&data); | 3416 | ext4_fsblk_t sb_block = get_sb_block(&data); |
3411 | ext4_fsblk_t logical_sb_block; | 3417 | ext4_fsblk_t logical_sb_block; |
3412 | unsigned long offset = 0; | 3418 | unsigned long offset = 0; |
3413 | unsigned long journal_devnum = 0; | 3419 | unsigned long journal_devnum = 0; |
3414 | unsigned long def_mount_opts; | 3420 | unsigned long def_mount_opts; |
3415 | struct inode *root; | 3421 | struct inode *root; |
3416 | char *cp; | 3422 | char *cp; |
3417 | const char *descr; | 3423 | const char *descr; |
3418 | int ret = -ENOMEM; | 3424 | int ret = -ENOMEM; |
3419 | int blocksize, clustersize; | 3425 | int blocksize, clustersize; |
3420 | unsigned int db_count; | 3426 | unsigned int db_count; |
3421 | unsigned int i; | 3427 | unsigned int i; |
3422 | int needs_recovery, has_huge_files, has_bigalloc; | 3428 | int needs_recovery, has_huge_files, has_bigalloc; |
3423 | __u64 blocks_count; | 3429 | __u64 blocks_count; |
3424 | int err = 0; | 3430 | int err = 0; |
3425 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 3431 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
3426 | ext4_group_t first_not_zeroed; | 3432 | ext4_group_t first_not_zeroed; |
3427 | 3433 | ||
3428 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 3434 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
3429 | if (!sbi) | 3435 | if (!sbi) |
3430 | goto out_free_orig; | 3436 | goto out_free_orig; |
3431 | 3437 | ||
3432 | sbi->s_blockgroup_lock = | 3438 | sbi->s_blockgroup_lock = |
3433 | kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); | 3439 | kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); |
3434 | if (!sbi->s_blockgroup_lock) { | 3440 | if (!sbi->s_blockgroup_lock) { |
3435 | kfree(sbi); | 3441 | kfree(sbi); |
3436 | goto out_free_orig; | 3442 | goto out_free_orig; |
3437 | } | 3443 | } |
3438 | sb->s_fs_info = sbi; | 3444 | sb->s_fs_info = sbi; |
3439 | sbi->s_sb = sb; | 3445 | sbi->s_sb = sb; |
3440 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; | 3446 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; |
3441 | sbi->s_sb_block = sb_block; | 3447 | sbi->s_sb_block = sb_block; |
3442 | if (sb->s_bdev->bd_part) | 3448 | if (sb->s_bdev->bd_part) |
3443 | sbi->s_sectors_written_start = | 3449 | sbi->s_sectors_written_start = |
3444 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); | 3450 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); |
3445 | 3451 | ||
3446 | /* Cleanup superblock name */ | 3452 | /* Cleanup superblock name */ |
3447 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | 3453 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) |
3448 | *cp = '!'; | 3454 | *cp = '!'; |
3449 | 3455 | ||
3450 | /* -EINVAL is default */ | 3456 | /* -EINVAL is default */ |
3451 | ret = -EINVAL; | 3457 | ret = -EINVAL; |
3452 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 3458 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
3453 | if (!blocksize) { | 3459 | if (!blocksize) { |
3454 | ext4_msg(sb, KERN_ERR, "unable to set blocksize"); | 3460 | ext4_msg(sb, KERN_ERR, "unable to set blocksize"); |
3455 | goto out_fail; | 3461 | goto out_fail; |
3456 | } | 3462 | } |
3457 | 3463 | ||
3458 | /* | 3464 | /* |
3459 | * The ext4 superblock will not be buffer aligned for other than 1kB | 3465 | * The ext4 superblock will not be buffer aligned for other than 1kB |
3460 | * block sizes. We need to calculate the offset from buffer start. | 3466 | * block sizes. We need to calculate the offset from buffer start. |
3461 | */ | 3467 | */ |
3462 | if (blocksize != EXT4_MIN_BLOCK_SIZE) { | 3468 | if (blocksize != EXT4_MIN_BLOCK_SIZE) { |
3463 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; | 3469 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; |
3464 | offset = do_div(logical_sb_block, blocksize); | 3470 | offset = do_div(logical_sb_block, blocksize); |
3465 | } else { | 3471 | } else { |
3466 | logical_sb_block = sb_block; | 3472 | logical_sb_block = sb_block; |
3467 | } | 3473 | } |
3468 | 3474 | ||
3469 | if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) { | 3475 | if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) { |
3470 | ext4_msg(sb, KERN_ERR, "unable to read superblock"); | 3476 | ext4_msg(sb, KERN_ERR, "unable to read superblock"); |
3471 | goto out_fail; | 3477 | goto out_fail; |
3472 | } | 3478 | } |
3473 | /* | 3479 | /* |
3474 | * Note: s_es must be initialized as soon as possible because | 3480 | * Note: s_es must be initialized as soon as possible because |
3475 | * some ext4 macro-instructions depend on its value | 3481 | * some ext4 macro-instructions depend on its value |
3476 | */ | 3482 | */ |
3477 | es = (struct ext4_super_block *) (bh->b_data + offset); | 3483 | es = (struct ext4_super_block *) (bh->b_data + offset); |
3478 | sbi->s_es = es; | 3484 | sbi->s_es = es; |
3479 | sb->s_magic = le16_to_cpu(es->s_magic); | 3485 | sb->s_magic = le16_to_cpu(es->s_magic); |
3480 | if (sb->s_magic != EXT4_SUPER_MAGIC) | 3486 | if (sb->s_magic != EXT4_SUPER_MAGIC) |
3481 | goto cantfind_ext4; | 3487 | goto cantfind_ext4; |
3482 | sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); | 3488 | sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); |
3483 | 3489 | ||
3484 | /* Warn if metadata_csum and gdt_csum are both set. */ | 3490 | /* Warn if metadata_csum and gdt_csum are both set. */ |
3485 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3491 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
3486 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | 3492 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && |
3487 | EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | 3493 | EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) |
3488 | ext4_warning(sb, "metadata_csum and uninit_bg are " | 3494 | ext4_warning(sb, "metadata_csum and uninit_bg are " |
3489 | "redundant flags; please run fsck."); | 3495 | "redundant flags; please run fsck."); |
3490 | 3496 | ||
3491 | /* Check for a known checksum algorithm */ | 3497 | /* Check for a known checksum algorithm */ |
3492 | if (!ext4_verify_csum_type(sb, es)) { | 3498 | if (!ext4_verify_csum_type(sb, es)) { |
3493 | ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " | 3499 | ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " |
3494 | "unknown checksum algorithm."); | 3500 | "unknown checksum algorithm."); |
3495 | silent = 1; | 3501 | silent = 1; |
3496 | goto cantfind_ext4; | 3502 | goto cantfind_ext4; |
3497 | } | 3503 | } |
3498 | 3504 | ||
3499 | /* Load the checksum driver */ | 3505 | /* Load the checksum driver */ |
3500 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3506 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
3501 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | 3507 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { |
3502 | sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); | 3508 | sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); |
3503 | if (IS_ERR(sbi->s_chksum_driver)) { | 3509 | if (IS_ERR(sbi->s_chksum_driver)) { |
3504 | ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); | 3510 | ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); |
3505 | ret = PTR_ERR(sbi->s_chksum_driver); | 3511 | ret = PTR_ERR(sbi->s_chksum_driver); |
3506 | sbi->s_chksum_driver = NULL; | 3512 | sbi->s_chksum_driver = NULL; |
3507 | goto failed_mount; | 3513 | goto failed_mount; |
3508 | } | 3514 | } |
3509 | } | 3515 | } |
3510 | 3516 | ||
3511 | /* Check superblock checksum */ | 3517 | /* Check superblock checksum */ |
3512 | if (!ext4_superblock_csum_verify(sb, es)) { | 3518 | if (!ext4_superblock_csum_verify(sb, es)) { |
3513 | ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " | 3519 | ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " |
3514 | "invalid superblock checksum. Run e2fsck?"); | 3520 | "invalid superblock checksum. Run e2fsck?"); |
3515 | silent = 1; | 3521 | silent = 1; |
3516 | goto cantfind_ext4; | 3522 | goto cantfind_ext4; |
3517 | } | 3523 | } |
3518 | 3524 | ||
3519 | /* Precompute checksum seed for all metadata */ | 3525 | /* Precompute checksum seed for all metadata */ |
3520 | if (ext4_has_metadata_csum(sb)) | 3526 | if (ext4_has_metadata_csum(sb)) |
3521 | sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, | 3527 | sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, |
3522 | sizeof(es->s_uuid)); | 3528 | sizeof(es->s_uuid)); |
3523 | 3529 | ||
3524 | /* Set defaults before we parse the mount options */ | 3530 | /* Set defaults before we parse the mount options */ |
3525 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 3531 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
3526 | set_opt(sb, INIT_INODE_TABLE); | 3532 | set_opt(sb, INIT_INODE_TABLE); |
3527 | if (def_mount_opts & EXT4_DEFM_DEBUG) | 3533 | if (def_mount_opts & EXT4_DEFM_DEBUG) |
3528 | set_opt(sb, DEBUG); | 3534 | set_opt(sb, DEBUG); |
3529 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) | 3535 | if (def_mount_opts & EXT4_DEFM_BSDGROUPS) |
3530 | set_opt(sb, GRPID); | 3536 | set_opt(sb, GRPID); |
3531 | if (def_mount_opts & EXT4_DEFM_UID16) | 3537 | if (def_mount_opts & EXT4_DEFM_UID16) |
3532 | set_opt(sb, NO_UID32); | 3538 | set_opt(sb, NO_UID32); |
3533 | /* xattr user namespace & acls are now defaulted on */ | 3539 | /* xattr user namespace & acls are now defaulted on */ |
3534 | set_opt(sb, XATTR_USER); | 3540 | set_opt(sb, XATTR_USER); |
3535 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 3541 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
3536 | set_opt(sb, POSIX_ACL); | 3542 | set_opt(sb, POSIX_ACL); |
3537 | #endif | 3543 | #endif |
3538 | /* don't forget to enable journal_csum when metadata_csum is enabled. */ | 3544 | /* don't forget to enable journal_csum when metadata_csum is enabled. */ |
3539 | if (ext4_has_metadata_csum(sb)) | 3545 | if (ext4_has_metadata_csum(sb)) |
3540 | set_opt(sb, JOURNAL_CHECKSUM); | 3546 | set_opt(sb, JOURNAL_CHECKSUM); |
3541 | 3547 | ||
3542 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) | 3548 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) |
3543 | set_opt(sb, JOURNAL_DATA); | 3549 | set_opt(sb, JOURNAL_DATA); |
3544 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) | 3550 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) |
3545 | set_opt(sb, ORDERED_DATA); | 3551 | set_opt(sb, ORDERED_DATA); |
3546 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) | 3552 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) |
3547 | set_opt(sb, WRITEBACK_DATA); | 3553 | set_opt(sb, WRITEBACK_DATA); |
3548 | 3554 | ||
3549 | if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) | 3555 | if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) |
3550 | set_opt(sb, ERRORS_PANIC); | 3556 | set_opt(sb, ERRORS_PANIC); |
3551 | else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) | 3557 | else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) |
3552 | set_opt(sb, ERRORS_CONT); | 3558 | set_opt(sb, ERRORS_CONT); |
3553 | else | 3559 | else |
3554 | set_opt(sb, ERRORS_RO); | 3560 | set_opt(sb, ERRORS_RO); |
3555 | /* block_validity enabled by default; disable with noblock_validity */ | 3561 | /* block_validity enabled by default; disable with noblock_validity */ |
3556 | set_opt(sb, BLOCK_VALIDITY); | 3562 | set_opt(sb, BLOCK_VALIDITY); |
3557 | if (def_mount_opts & EXT4_DEFM_DISCARD) | 3563 | if (def_mount_opts & EXT4_DEFM_DISCARD) |
3558 | set_opt(sb, DISCARD); | 3564 | set_opt(sb, DISCARD); |
3559 | 3565 | ||
3560 | sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); | 3566 | sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); |
3561 | sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); | 3567 | sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); |
3562 | sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; | 3568 | sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; |
3563 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; | 3569 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; |
3564 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; | 3570 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; |
3565 | 3571 | ||
3566 | if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) | 3572 | if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) |
3567 | set_opt(sb, BARRIER); | 3573 | set_opt(sb, BARRIER); |
3568 | 3574 | ||
3569 | /* | 3575 | /* |
3570 | * enable delayed allocation by default | 3576 | * enable delayed allocation by default |
3571 | * Use -o nodelalloc to turn it off | 3577 | * Use -o nodelalloc to turn it off |
3572 | */ | 3578 | */ |
3573 | if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && | 3579 | if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) && |
3574 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) | 3580 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) |
3575 | set_opt(sb, DELALLOC); | 3581 | set_opt(sb, DELALLOC); |
3576 | 3582 | ||
3577 | /* | 3583 | /* |
3578 | * set default s_li_wait_mult for lazyinit, for the case there is | 3584 | * set default s_li_wait_mult for lazyinit, for the case there is |
3579 | * no mount option specified. | 3585 | * no mount option specified. |
3580 | */ | 3586 | */ |
3581 | sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; | 3587 | sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; |
3582 | 3588 | ||
3583 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, | 3589 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, |
3584 | &journal_devnum, &journal_ioprio, 0)) { | 3590 | &journal_devnum, &journal_ioprio, 0)) { |
3585 | ext4_msg(sb, KERN_WARNING, | 3591 | ext4_msg(sb, KERN_WARNING, |
3586 | "failed to parse options in superblock: %s", | 3592 | "failed to parse options in superblock: %s", |
3587 | sbi->s_es->s_mount_opts); | 3593 | sbi->s_es->s_mount_opts); |
3588 | } | 3594 | } |
3589 | sbi->s_def_mount_opt = sbi->s_mount_opt; | 3595 | sbi->s_def_mount_opt = sbi->s_mount_opt; |
3590 | if (!parse_options((char *) data, sb, &journal_devnum, | 3596 | if (!parse_options((char *) data, sb, &journal_devnum, |
3591 | &journal_ioprio, 0)) | 3597 | &journal_ioprio, 0)) |
3592 | goto failed_mount; | 3598 | goto failed_mount; |
3593 | 3599 | ||
3594 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 3600 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
3595 | printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " | 3601 | printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " |
3596 | "with data=journal disables delayed " | 3602 | "with data=journal disables delayed " |
3597 | "allocation and O_DIRECT support!\n"); | 3603 | "allocation and O_DIRECT support!\n"); |
3598 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | 3604 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { |
3599 | ext4_msg(sb, KERN_ERR, "can't mount with " | 3605 | ext4_msg(sb, KERN_ERR, "can't mount with " |
3600 | "both data=journal and delalloc"); | 3606 | "both data=journal and delalloc"); |
3601 | goto failed_mount; | 3607 | goto failed_mount; |
3602 | } | 3608 | } |
3603 | if (test_opt(sb, DIOREAD_NOLOCK)) { | 3609 | if (test_opt(sb, DIOREAD_NOLOCK)) { |
3604 | ext4_msg(sb, KERN_ERR, "can't mount with " | 3610 | ext4_msg(sb, KERN_ERR, "can't mount with " |
3605 | "both data=journal and dioread_nolock"); | 3611 | "both data=journal and dioread_nolock"); |
3606 | goto failed_mount; | 3612 | goto failed_mount; |
3607 | } | 3613 | } |
3608 | if (test_opt(sb, DAX)) { | 3614 | if (test_opt(sb, DAX)) { |
3609 | ext4_msg(sb, KERN_ERR, "can't mount with " | 3615 | ext4_msg(sb, KERN_ERR, "can't mount with " |
3610 | "both data=journal and dax"); | 3616 | "both data=journal and dax"); |
3611 | goto failed_mount; | 3617 | goto failed_mount; |
3612 | } | 3618 | } |
3613 | if (test_opt(sb, DELALLOC)) | 3619 | if (test_opt(sb, DELALLOC)) |
3614 | clear_opt(sb, DELALLOC); | 3620 | clear_opt(sb, DELALLOC); |
3615 | } | 3621 | } |
3616 | 3622 | ||
3617 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 3623 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
3618 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); | 3624 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); |
3619 | 3625 | ||
3620 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && | 3626 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && |
3621 | (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || | 3627 | (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || |
3622 | EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || | 3628 | EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || |
3623 | EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) | 3629 | EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) |
3624 | ext4_msg(sb, KERN_WARNING, | 3630 | ext4_msg(sb, KERN_WARNING, |
3625 | "feature flags set on rev 0 fs, " | 3631 | "feature flags set on rev 0 fs, " |
3626 | "running e2fsck is recommended"); | 3632 | "running e2fsck is recommended"); |
3627 | 3633 | ||
3628 | if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { | 3634 | if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { |
3629 | set_opt2(sb, HURD_COMPAT); | 3635 | set_opt2(sb, HURD_COMPAT); |
3630 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | 3636 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, |
3631 | EXT4_FEATURE_INCOMPAT_64BIT)) { | 3637 | EXT4_FEATURE_INCOMPAT_64BIT)) { |
3632 | ext4_msg(sb, KERN_ERR, | 3638 | ext4_msg(sb, KERN_ERR, |
3633 | "The Hurd can't support 64-bit file systems"); | 3639 | "The Hurd can't support 64-bit file systems"); |
3634 | goto failed_mount; | 3640 | goto failed_mount; |
3635 | } | 3641 | } |
3636 | } | 3642 | } |
3637 | 3643 | ||
3638 | if (IS_EXT2_SB(sb)) { | 3644 | if (IS_EXT2_SB(sb)) { |
3639 | if (ext2_feature_set_ok(sb)) | 3645 | if (ext2_feature_set_ok(sb)) |
3640 | ext4_msg(sb, KERN_INFO, "mounting ext2 file system " | 3646 | ext4_msg(sb, KERN_INFO, "mounting ext2 file system " |
3641 | "using the ext4 subsystem"); | 3647 | "using the ext4 subsystem"); |
3642 | else { | 3648 | else { |
3643 | ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " | 3649 | ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " |
3644 | "to feature incompatibilities"); | 3650 | "to feature incompatibilities"); |
3645 | goto failed_mount; | 3651 | goto failed_mount; |
3646 | } | 3652 | } |
3647 | } | 3653 | } |
3648 | 3654 | ||
3649 | if (IS_EXT3_SB(sb)) { | 3655 | if (IS_EXT3_SB(sb)) { |
3650 | if (ext3_feature_set_ok(sb)) | 3656 | if (ext3_feature_set_ok(sb)) |
3651 | ext4_msg(sb, KERN_INFO, "mounting ext3 file system " | 3657 | ext4_msg(sb, KERN_INFO, "mounting ext3 file system " |
3652 | "using the ext4 subsystem"); | 3658 | "using the ext4 subsystem"); |
3653 | else { | 3659 | else { |
3654 | ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " | 3660 | ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " |
3655 | "to feature incompatibilities"); | 3661 | "to feature incompatibilities"); |
3656 | goto failed_mount; | 3662 | goto failed_mount; |
3657 | } | 3663 | } |
3658 | } | 3664 | } |
3659 | 3665 | ||
3660 | /* | 3666 | /* |
3661 | * Check feature flags regardless of the revision level, since we | 3667 | * Check feature flags regardless of the revision level, since we |
3662 | * previously didn't change the revision level when setting the flags, | 3668 | * previously didn't change the revision level when setting the flags, |
3663 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 3669 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
3664 | */ | 3670 | */ |
3665 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) | 3671 | if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) |
3666 | goto failed_mount; | 3672 | goto failed_mount; |
3667 | 3673 | ||
3668 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); | 3674 | blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); |
3669 | if (blocksize < EXT4_MIN_BLOCK_SIZE || | 3675 | if (blocksize < EXT4_MIN_BLOCK_SIZE || |
3670 | blocksize > EXT4_MAX_BLOCK_SIZE) { | 3676 | blocksize > EXT4_MAX_BLOCK_SIZE) { |
3671 | ext4_msg(sb, KERN_ERR, | 3677 | ext4_msg(sb, KERN_ERR, |
3672 | "Unsupported filesystem blocksize %d", blocksize); | 3678 | "Unsupported filesystem blocksize %d", blocksize); |
3673 | goto failed_mount; | 3679 | goto failed_mount; |
3674 | } | 3680 | } |
3675 | 3681 | ||
3676 | if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { | 3682 | if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { |
3677 | if (blocksize != PAGE_SIZE) { | 3683 | if (blocksize != PAGE_SIZE) { |
3678 | ext4_msg(sb, KERN_ERR, | 3684 | ext4_msg(sb, KERN_ERR, |
3679 | "error: unsupported blocksize for dax"); | 3685 | "error: unsupported blocksize for dax"); |
3680 | goto failed_mount; | 3686 | goto failed_mount; |
3681 | } | 3687 | } |
3682 | if (!sb->s_bdev->bd_disk->fops->direct_access) { | 3688 | if (!sb->s_bdev->bd_disk->fops->direct_access) { |
3683 | ext4_msg(sb, KERN_ERR, | 3689 | ext4_msg(sb, KERN_ERR, |
3684 | "error: device does not support dax"); | 3690 | "error: device does not support dax"); |
3685 | goto failed_mount; | 3691 | goto failed_mount; |
3686 | } | 3692 | } |
3687 | } | 3693 | } |
3688 | 3694 | ||
3689 | if (sb->s_blocksize != blocksize) { | 3695 | if (sb->s_blocksize != blocksize) { |
3690 | /* Validate the filesystem blocksize */ | 3696 | /* Validate the filesystem blocksize */ |
3691 | if (!sb_set_blocksize(sb, blocksize)) { | 3697 | if (!sb_set_blocksize(sb, blocksize)) { |
3692 | ext4_msg(sb, KERN_ERR, "bad block size %d", | 3698 | ext4_msg(sb, KERN_ERR, "bad block size %d", |
3693 | blocksize); | 3699 | blocksize); |
3694 | goto failed_mount; | 3700 | goto failed_mount; |
3695 | } | 3701 | } |
3696 | 3702 | ||
3697 | brelse(bh); | 3703 | brelse(bh); |
3698 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; | 3704 | logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; |
3699 | offset = do_div(logical_sb_block, blocksize); | 3705 | offset = do_div(logical_sb_block, blocksize); |
3700 | bh = sb_bread_unmovable(sb, logical_sb_block); | 3706 | bh = sb_bread_unmovable(sb, logical_sb_block); |
3701 | if (!bh) { | 3707 | if (!bh) { |
3702 | ext4_msg(sb, KERN_ERR, | 3708 | ext4_msg(sb, KERN_ERR, |
3703 | "Can't read superblock on 2nd try"); | 3709 | "Can't read superblock on 2nd try"); |
3704 | goto failed_mount; | 3710 | goto failed_mount; |
3705 | } | 3711 | } |
3706 | es = (struct ext4_super_block *)(bh->b_data + offset); | 3712 | es = (struct ext4_super_block *)(bh->b_data + offset); |
3707 | sbi->s_es = es; | 3713 | sbi->s_es = es; |
3708 | if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { | 3714 | if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { |
3709 | ext4_msg(sb, KERN_ERR, | 3715 | ext4_msg(sb, KERN_ERR, |
3710 | "Magic mismatch, very weird!"); | 3716 | "Magic mismatch, very weird!"); |
3711 | goto failed_mount; | 3717 | goto failed_mount; |
3712 | } | 3718 | } |
3713 | } | 3719 | } |
3714 | 3720 | ||
3715 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3721 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
3716 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | 3722 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); |
3717 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, | 3723 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, |
3718 | has_huge_files); | 3724 | has_huge_files); |
3719 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); | 3725 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); |
3720 | 3726 | ||
3721 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { | 3727 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { |
3722 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; | 3728 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; |
3723 | sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; | 3729 | sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; |
3724 | } else { | 3730 | } else { |
3725 | sbi->s_inode_size = le16_to_cpu(es->s_inode_size); | 3731 | sbi->s_inode_size = le16_to_cpu(es->s_inode_size); |
3726 | sbi->s_first_ino = le32_to_cpu(es->s_first_ino); | 3732 | sbi->s_first_ino = le32_to_cpu(es->s_first_ino); |
3727 | if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || | 3733 | if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || |
3728 | (!is_power_of_2(sbi->s_inode_size)) || | 3734 | (!is_power_of_2(sbi->s_inode_size)) || |
3729 | (sbi->s_inode_size > blocksize)) { | 3735 | (sbi->s_inode_size > blocksize)) { |
3730 | ext4_msg(sb, KERN_ERR, | 3736 | ext4_msg(sb, KERN_ERR, |
3731 | "unsupported inode size: %d", | 3737 | "unsupported inode size: %d", |
3732 | sbi->s_inode_size); | 3738 | sbi->s_inode_size); |
3733 | goto failed_mount; | 3739 | goto failed_mount; |
3734 | } | 3740 | } |
3735 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) | 3741 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) |
3736 | sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); | 3742 | sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); |
3737 | } | 3743 | } |
3738 | 3744 | ||
3739 | sbi->s_desc_size = le16_to_cpu(es->s_desc_size); | 3745 | sbi->s_desc_size = le16_to_cpu(es->s_desc_size); |
3740 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { | 3746 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { |
3741 | if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || | 3747 | if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || |
3742 | sbi->s_desc_size > EXT4_MAX_DESC_SIZE || | 3748 | sbi->s_desc_size > EXT4_MAX_DESC_SIZE || |
3743 | !is_power_of_2(sbi->s_desc_size)) { | 3749 | !is_power_of_2(sbi->s_desc_size)) { |
3744 | ext4_msg(sb, KERN_ERR, | 3750 | ext4_msg(sb, KERN_ERR, |
3745 | "unsupported descriptor size %lu", | 3751 | "unsupported descriptor size %lu", |
3746 | sbi->s_desc_size); | 3752 | sbi->s_desc_size); |
3747 | goto failed_mount; | 3753 | goto failed_mount; |
3748 | } | 3754 | } |
3749 | } else | 3755 | } else |
3750 | sbi->s_desc_size = EXT4_MIN_DESC_SIZE; | 3756 | sbi->s_desc_size = EXT4_MIN_DESC_SIZE; |
3751 | 3757 | ||
3752 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); | 3758 | sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); |
3753 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); | 3759 | sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); |
3754 | if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) | 3760 | if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) |
3755 | goto cantfind_ext4; | 3761 | goto cantfind_ext4; |
3756 | 3762 | ||
3757 | sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); | 3763 | sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); |
3758 | if (sbi->s_inodes_per_block == 0) | 3764 | if (sbi->s_inodes_per_block == 0) |
3759 | goto cantfind_ext4; | 3765 | goto cantfind_ext4; |
3760 | sbi->s_itb_per_group = sbi->s_inodes_per_group / | 3766 | sbi->s_itb_per_group = sbi->s_inodes_per_group / |
3761 | sbi->s_inodes_per_block; | 3767 | sbi->s_inodes_per_block; |
3762 | sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); | 3768 | sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); |
3763 | sbi->s_sbh = bh; | 3769 | sbi->s_sbh = bh; |
3764 | sbi->s_mount_state = le16_to_cpu(es->s_state); | 3770 | sbi->s_mount_state = le16_to_cpu(es->s_state); |
3765 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); | 3771 | sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); |
3766 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); | 3772 | sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); |
3767 | 3773 | ||
3768 | for (i = 0; i < 4; i++) | 3774 | for (i = 0; i < 4; i++) |
3769 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); | 3775 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); |
3770 | sbi->s_def_hash_version = es->s_def_hash_version; | 3776 | sbi->s_def_hash_version = es->s_def_hash_version; |
3771 | if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { | 3777 | if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { |
3772 | i = le32_to_cpu(es->s_flags); | 3778 | i = le32_to_cpu(es->s_flags); |
3773 | if (i & EXT2_FLAGS_UNSIGNED_HASH) | 3779 | if (i & EXT2_FLAGS_UNSIGNED_HASH) |
3774 | sbi->s_hash_unsigned = 3; | 3780 | sbi->s_hash_unsigned = 3; |
3775 | else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { | 3781 | else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { |
3776 | #ifdef __CHAR_UNSIGNED__ | 3782 | #ifdef __CHAR_UNSIGNED__ |
3777 | if (!(sb->s_flags & MS_RDONLY)) | 3783 | if (!(sb->s_flags & MS_RDONLY)) |
3778 | es->s_flags |= | 3784 | es->s_flags |= |
3779 | cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); | 3785 | cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); |
3780 | sbi->s_hash_unsigned = 3; | 3786 | sbi->s_hash_unsigned = 3; |
3781 | #else | 3787 | #else |
3782 | if (!(sb->s_flags & MS_RDONLY)) | 3788 | if (!(sb->s_flags & MS_RDONLY)) |
3783 | es->s_flags |= | 3789 | es->s_flags |= |
3784 | cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); | 3790 | cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); |
3785 | #endif | 3791 | #endif |
3786 | } | 3792 | } |
3787 | } | 3793 | } |
3788 | 3794 | ||
3789 | /* Handle clustersize */ | 3795 | /* Handle clustersize */ |
3790 | clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); | 3796 | clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); |
3791 | has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3797 | has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
3792 | EXT4_FEATURE_RO_COMPAT_BIGALLOC); | 3798 | EXT4_FEATURE_RO_COMPAT_BIGALLOC); |
3793 | if (has_bigalloc) { | 3799 | if (has_bigalloc) { |
3794 | if (clustersize < blocksize) { | 3800 | if (clustersize < blocksize) { |
3795 | ext4_msg(sb, KERN_ERR, | 3801 | ext4_msg(sb, KERN_ERR, |
3796 | "cluster size (%d) smaller than " | 3802 | "cluster size (%d) smaller than " |
3797 | "block size (%d)", clustersize, blocksize); | 3803 | "block size (%d)", clustersize, blocksize); |
3798 | goto failed_mount; | 3804 | goto failed_mount; |
3799 | } | 3805 | } |
3800 | sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - | 3806 | sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - |
3801 | le32_to_cpu(es->s_log_block_size); | 3807 | le32_to_cpu(es->s_log_block_size); |
3802 | sbi->s_clusters_per_group = | 3808 | sbi->s_clusters_per_group = |
3803 | le32_to_cpu(es->s_clusters_per_group); | 3809 | le32_to_cpu(es->s_clusters_per_group); |
3804 | if (sbi->s_clusters_per_group > blocksize * 8) { | 3810 | if (sbi->s_clusters_per_group > blocksize * 8) { |
3805 | ext4_msg(sb, KERN_ERR, | 3811 | ext4_msg(sb, KERN_ERR, |
3806 | "#clusters per group too big: %lu", | 3812 | "#clusters per group too big: %lu", |
3807 | sbi->s_clusters_per_group); | 3813 | sbi->s_clusters_per_group); |
3808 | goto failed_mount; | 3814 | goto failed_mount; |
3809 | } | 3815 | } |
3810 | if (sbi->s_blocks_per_group != | 3816 | if (sbi->s_blocks_per_group != |
3811 | (sbi->s_clusters_per_group * (clustersize / blocksize))) { | 3817 | (sbi->s_clusters_per_group * (clustersize / blocksize))) { |
3812 | ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " | 3818 | ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " |
3813 | "clusters per group (%lu) inconsistent", | 3819 | "clusters per group (%lu) inconsistent", |
3814 | sbi->s_blocks_per_group, | 3820 | sbi->s_blocks_per_group, |
3815 | sbi->s_clusters_per_group); | 3821 | sbi->s_clusters_per_group); |
3816 | goto failed_mount; | 3822 | goto failed_mount; |
3817 | } | 3823 | } |
3818 | } else { | 3824 | } else { |
3819 | if (clustersize != blocksize) { | 3825 | if (clustersize != blocksize) { |
3820 | ext4_warning(sb, "fragment/cluster size (%d) != " | 3826 | ext4_warning(sb, "fragment/cluster size (%d) != " |
3821 | "block size (%d)", clustersize, | 3827 | "block size (%d)", clustersize, |
3822 | blocksize); | 3828 | blocksize); |
3823 | clustersize = blocksize; | 3829 | clustersize = blocksize; |
3824 | } | 3830 | } |
3825 | if (sbi->s_blocks_per_group > blocksize * 8) { | 3831 | if (sbi->s_blocks_per_group > blocksize * 8) { |
3826 | ext4_msg(sb, KERN_ERR, | 3832 | ext4_msg(sb, KERN_ERR, |
3827 | "#blocks per group too big: %lu", | 3833 | "#blocks per group too big: %lu", |
3828 | sbi->s_blocks_per_group); | 3834 | sbi->s_blocks_per_group); |
3829 | goto failed_mount; | 3835 | goto failed_mount; |
3830 | } | 3836 | } |
3831 | sbi->s_clusters_per_group = sbi->s_blocks_per_group; | 3837 | sbi->s_clusters_per_group = sbi->s_blocks_per_group; |
3832 | sbi->s_cluster_bits = 0; | 3838 | sbi->s_cluster_bits = 0; |
3833 | } | 3839 | } |
3834 | sbi->s_cluster_ratio = clustersize / blocksize; | 3840 | sbi->s_cluster_ratio = clustersize / blocksize; |
3835 | 3841 | ||
3836 | if (sbi->s_inodes_per_group > blocksize * 8) { | 3842 | if (sbi->s_inodes_per_group > blocksize * 8) { |
3837 | ext4_msg(sb, KERN_ERR, | 3843 | ext4_msg(sb, KERN_ERR, |
3838 | "#inodes per group too big: %lu", | 3844 | "#inodes per group too big: %lu", |
3839 | sbi->s_inodes_per_group); | 3845 | sbi->s_inodes_per_group); |
3840 | goto failed_mount; | 3846 | goto failed_mount; |
3841 | } | 3847 | } |
3842 | 3848 | ||
3843 | /* Do we have standard group size of clustersize * 8 blocks ? */ | 3849 | /* Do we have standard group size of clustersize * 8 blocks ? */ |
3844 | if (sbi->s_blocks_per_group == clustersize << 3) | 3850 | if (sbi->s_blocks_per_group == clustersize << 3) |
3845 | set_opt2(sb, STD_GROUP_SIZE); | 3851 | set_opt2(sb, STD_GROUP_SIZE); |
3846 | 3852 | ||
3847 | /* | 3853 | /* |
3848 | * Test whether we have more sectors than will fit in sector_t, | 3854 | * Test whether we have more sectors than will fit in sector_t, |
3849 | * and whether the max offset is addressable by the page cache. | 3855 | * and whether the max offset is addressable by the page cache. |
3850 | */ | 3856 | */ |
3851 | err = generic_check_addressable(sb->s_blocksize_bits, | 3857 | err = generic_check_addressable(sb->s_blocksize_bits, |
3852 | ext4_blocks_count(es)); | 3858 | ext4_blocks_count(es)); |
3853 | if (err) { | 3859 | if (err) { |
3854 | ext4_msg(sb, KERN_ERR, "filesystem" | 3860 | ext4_msg(sb, KERN_ERR, "filesystem" |
3855 | " too large to mount safely on this system"); | 3861 | " too large to mount safely on this system"); |
3856 | if (sizeof(sector_t) < 8) | 3862 | if (sizeof(sector_t) < 8) |
3857 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 3863 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
3858 | goto failed_mount; | 3864 | goto failed_mount; |
3859 | } | 3865 | } |
3860 | 3866 | ||
3861 | if (EXT4_BLOCKS_PER_GROUP(sb) == 0) | 3867 | if (EXT4_BLOCKS_PER_GROUP(sb) == 0) |
3862 | goto cantfind_ext4; | 3868 | goto cantfind_ext4; |
3863 | 3869 | ||
3864 | /* check blocks count against device size */ | 3870 | /* check blocks count against device size */ |
3865 | blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; | 3871 | blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; |
3866 | if (blocks_count && ext4_blocks_count(es) > blocks_count) { | 3872 | if (blocks_count && ext4_blocks_count(es) > blocks_count) { |
3867 | ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " | 3873 | ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " |
3868 | "exceeds size of device (%llu blocks)", | 3874 | "exceeds size of device (%llu blocks)", |
3869 | ext4_blocks_count(es), blocks_count); | 3875 | ext4_blocks_count(es), blocks_count); |
3870 | goto failed_mount; | 3876 | goto failed_mount; |
3871 | } | 3877 | } |
3872 | 3878 | ||
3873 | /* | 3879 | /* |
3874 | * It makes no sense for the first data block to be beyond the end | 3880 | * It makes no sense for the first data block to be beyond the end |
3875 | * of the filesystem. | 3881 | * of the filesystem. |
3876 | */ | 3882 | */ |
3877 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { | 3883 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { |
3878 | ext4_msg(sb, KERN_WARNING, "bad geometry: first data " | 3884 | ext4_msg(sb, KERN_WARNING, "bad geometry: first data " |
3879 | "block %u is beyond end of filesystem (%llu)", | 3885 | "block %u is beyond end of filesystem (%llu)", |
3880 | le32_to_cpu(es->s_first_data_block), | 3886 | le32_to_cpu(es->s_first_data_block), |
3881 | ext4_blocks_count(es)); | 3887 | ext4_blocks_count(es)); |
3882 | goto failed_mount; | 3888 | goto failed_mount; |
3883 | } | 3889 | } |
3884 | blocks_count = (ext4_blocks_count(es) - | 3890 | blocks_count = (ext4_blocks_count(es) - |
3885 | le32_to_cpu(es->s_first_data_block) + | 3891 | le32_to_cpu(es->s_first_data_block) + |
3886 | EXT4_BLOCKS_PER_GROUP(sb) - 1); | 3892 | EXT4_BLOCKS_PER_GROUP(sb) - 1); |
3887 | do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); | 3893 | do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); |
3888 | if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { | 3894 | if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { |
3889 | ext4_msg(sb, KERN_WARNING, "groups count too large: %u " | 3895 | ext4_msg(sb, KERN_WARNING, "groups count too large: %u " |
3890 | "(block count %llu, first data block %u, " | 3896 | "(block count %llu, first data block %u, " |
3891 | "blocks per group %lu)", sbi->s_groups_count, | 3897 | "blocks per group %lu)", sbi->s_groups_count, |
3892 | ext4_blocks_count(es), | 3898 | ext4_blocks_count(es), |
3893 | le32_to_cpu(es->s_first_data_block), | 3899 | le32_to_cpu(es->s_first_data_block), |
3894 | EXT4_BLOCKS_PER_GROUP(sb)); | 3900 | EXT4_BLOCKS_PER_GROUP(sb)); |
3895 | goto failed_mount; | 3901 | goto failed_mount; |
3896 | } | 3902 | } |
3897 | sbi->s_groups_count = blocks_count; | 3903 | sbi->s_groups_count = blocks_count; |
3898 | sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, | 3904 | sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, |
3899 | (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); | 3905 | (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); |
3900 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | 3906 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / |
3901 | EXT4_DESC_PER_BLOCK(sb); | 3907 | EXT4_DESC_PER_BLOCK(sb); |
3902 | sbi->s_group_desc = ext4_kvmalloc(db_count * | 3908 | sbi->s_group_desc = ext4_kvmalloc(db_count * |
3903 | sizeof(struct buffer_head *), | 3909 | sizeof(struct buffer_head *), |
3904 | GFP_KERNEL); | 3910 | GFP_KERNEL); |
3905 | if (sbi->s_group_desc == NULL) { | 3911 | if (sbi->s_group_desc == NULL) { |
3906 | ext4_msg(sb, KERN_ERR, "not enough memory"); | 3912 | ext4_msg(sb, KERN_ERR, "not enough memory"); |
3907 | ret = -ENOMEM; | 3913 | ret = -ENOMEM; |
3908 | goto failed_mount; | 3914 | goto failed_mount; |
3909 | } | 3915 | } |
3910 | 3916 | ||
3911 | if (ext4_proc_root) | 3917 | if (ext4_proc_root) |
3912 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | 3918 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); |
3913 | 3919 | ||
3914 | if (sbi->s_proc) | 3920 | if (sbi->s_proc) |
3915 | proc_create_data("options", S_IRUGO, sbi->s_proc, | 3921 | proc_create_data("options", S_IRUGO, sbi->s_proc, |
3916 | &ext4_seq_options_fops, sb); | 3922 | &ext4_seq_options_fops, sb); |
3917 | 3923 | ||
3918 | bgl_lock_init(sbi->s_blockgroup_lock); | 3924 | bgl_lock_init(sbi->s_blockgroup_lock); |
3919 | 3925 | ||
3920 | for (i = 0; i < db_count; i++) { | 3926 | for (i = 0; i < db_count; i++) { |
3921 | block = descriptor_loc(sb, logical_sb_block, i); | 3927 | block = descriptor_loc(sb, logical_sb_block, i); |
3922 | sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); | 3928 | sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); |
3923 | if (!sbi->s_group_desc[i]) { | 3929 | if (!sbi->s_group_desc[i]) { |
3924 | ext4_msg(sb, KERN_ERR, | 3930 | ext4_msg(sb, KERN_ERR, |
3925 | "can't read group descriptor %d", i); | 3931 | "can't read group descriptor %d", i); |
3926 | db_count = i; | 3932 | db_count = i; |
3927 | goto failed_mount2; | 3933 | goto failed_mount2; |
3928 | } | 3934 | } |
3929 | } | 3935 | } |
3930 | if (!ext4_check_descriptors(sb, &first_not_zeroed)) { | 3936 | if (!ext4_check_descriptors(sb, &first_not_zeroed)) { |
3931 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); | 3937 | ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); |
3932 | goto failed_mount2; | 3938 | goto failed_mount2; |
3933 | } | 3939 | } |
3934 | 3940 | ||
3935 | sbi->s_gdb_count = db_count; | 3941 | sbi->s_gdb_count = db_count; |
3936 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 3942 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
3937 | spin_lock_init(&sbi->s_next_gen_lock); | 3943 | spin_lock_init(&sbi->s_next_gen_lock); |
3938 | 3944 | ||
3939 | init_timer(&sbi->s_err_report); | 3945 | setup_timer(&sbi->s_err_report, print_daily_error_info, |
3940 | sbi->s_err_report.function = print_daily_error_info; | 3946 | (unsigned long) sb); |
3941 | sbi->s_err_report.data = (unsigned long) sb; | ||
3942 | 3947 | ||
3943 | /* Register extent status tree shrinker */ | 3948 | /* Register extent status tree shrinker */ |
3944 | if (ext4_es_register_shrinker(sbi)) | 3949 | if (ext4_es_register_shrinker(sbi)) |
3945 | goto failed_mount3; | 3950 | goto failed_mount3; |
3946 | 3951 | ||
3947 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3952 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
3948 | sbi->s_extent_max_zeroout_kb = 32; | 3953 | sbi->s_extent_max_zeroout_kb = 32; |
3949 | 3954 | ||
3950 | /* | 3955 | /* |
3951 | * set up enough so that it can read an inode | 3956 | * set up enough so that it can read an inode |
3952 | */ | 3957 | */ |
3953 | sb->s_op = &ext4_sops; | 3958 | sb->s_op = &ext4_sops; |
3954 | sb->s_export_op = &ext4_export_ops; | 3959 | sb->s_export_op = &ext4_export_ops; |
3955 | sb->s_xattr = ext4_xattr_handlers; | 3960 | sb->s_xattr = ext4_xattr_handlers; |
3956 | #ifdef CONFIG_QUOTA | 3961 | #ifdef CONFIG_QUOTA |
3957 | sb->dq_op = &ext4_quota_operations; | 3962 | sb->dq_op = &ext4_quota_operations; |
3958 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) | 3963 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) |
3959 | sb->s_qcop = &dquot_quotactl_sysfile_ops; | 3964 | sb->s_qcop = &dquot_quotactl_sysfile_ops; |
3960 | else | 3965 | else |
3961 | sb->s_qcop = &ext4_qctl_operations; | 3966 | sb->s_qcop = &ext4_qctl_operations; |
3962 | sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; | 3967 | sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; |
3963 | #endif | 3968 | #endif |
3964 | memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); | 3969 | memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); |
3965 | 3970 | ||
3966 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ | 3971 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ |
3967 | mutex_init(&sbi->s_orphan_lock); | 3972 | mutex_init(&sbi->s_orphan_lock); |
3968 | 3973 | ||
3969 | sb->s_root = NULL; | 3974 | sb->s_root = NULL; |
3970 | 3975 | ||
3971 | needs_recovery = (es->s_last_orphan != 0 || | 3976 | needs_recovery = (es->s_last_orphan != 0 || |
3972 | EXT4_HAS_INCOMPAT_FEATURE(sb, | 3977 | EXT4_HAS_INCOMPAT_FEATURE(sb, |
3973 | EXT4_FEATURE_INCOMPAT_RECOVER)); | 3978 | EXT4_FEATURE_INCOMPAT_RECOVER)); |
3974 | 3979 | ||
3975 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && | 3980 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && |
3976 | !(sb->s_flags & MS_RDONLY)) | 3981 | !(sb->s_flags & MS_RDONLY)) |
3977 | if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) | 3982 | if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) |
3978 | goto failed_mount3a; | 3983 | goto failed_mount3a; |
3979 | 3984 | ||
3980 | /* | 3985 | /* |
3981 | * The first inode we look at is the journal inode. Don't try | 3986 | * The first inode we look at is the journal inode. Don't try |
3982 | * root first: it may be modified in the journal! | 3987 | * root first: it may be modified in the journal! |
3983 | */ | 3988 | */ |
3984 | if (!test_opt(sb, NOLOAD) && | 3989 | if (!test_opt(sb, NOLOAD) && |
3985 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { | 3990 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { |
3986 | if (ext4_load_journal(sb, es, journal_devnum)) | 3991 | if (ext4_load_journal(sb, es, journal_devnum)) |
3987 | goto failed_mount3a; | 3992 | goto failed_mount3a; |
3988 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && | 3993 | } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && |
3989 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { | 3994 | EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { |
3990 | ext4_msg(sb, KERN_ERR, "required journal recovery " | 3995 | ext4_msg(sb, KERN_ERR, "required journal recovery " |
3991 | "suppressed and not mounted read-only"); | 3996 | "suppressed and not mounted read-only"); |
3992 | goto failed_mount_wq; | 3997 | goto failed_mount_wq; |
3993 | } else { | 3998 | } else { |
3994 | clear_opt(sb, DATA_FLAGS); | 3999 | clear_opt(sb, DATA_FLAGS); |
3995 | sbi->s_journal = NULL; | 4000 | sbi->s_journal = NULL; |
3996 | needs_recovery = 0; | 4001 | needs_recovery = 0; |
3997 | goto no_journal; | 4002 | goto no_journal; |
3998 | } | 4003 | } |
3999 | 4004 | ||
4000 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && | 4005 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && |
4001 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, | 4006 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, |
4002 | JBD2_FEATURE_INCOMPAT_64BIT)) { | 4007 | JBD2_FEATURE_INCOMPAT_64BIT)) { |
4003 | ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); | 4008 | ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); |
4004 | goto failed_mount_wq; | 4009 | goto failed_mount_wq; |
4005 | } | 4010 | } |
4006 | 4011 | ||
4007 | if (!set_journal_csum_feature_set(sb)) { | 4012 | if (!set_journal_csum_feature_set(sb)) { |
4008 | ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " | 4013 | ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " |
4009 | "feature set"); | 4014 | "feature set"); |
4010 | goto failed_mount_wq; | 4015 | goto failed_mount_wq; |
4011 | } | 4016 | } |
4012 | 4017 | ||
4013 | /* We have now updated the journal if required, so we can | 4018 | /* We have now updated the journal if required, so we can |
4014 | * validate the data journaling mode. */ | 4019 | * validate the data journaling mode. */ |
4015 | switch (test_opt(sb, DATA_FLAGS)) { | 4020 | switch (test_opt(sb, DATA_FLAGS)) { |
4016 | case 0: | 4021 | case 0: |
4017 | /* No mode set, assume a default based on the journal | 4022 | /* No mode set, assume a default based on the journal |
4018 | * capabilities: ORDERED_DATA if the journal can | 4023 | * capabilities: ORDERED_DATA if the journal can |
4019 | * cope, else JOURNAL_DATA | 4024 | * cope, else JOURNAL_DATA |
4020 | */ | 4025 | */ |
4021 | if (jbd2_journal_check_available_features | 4026 | if (jbd2_journal_check_available_features |
4022 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) | 4027 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) |
4023 | set_opt(sb, ORDERED_DATA); | 4028 | set_opt(sb, ORDERED_DATA); |
4024 | else | 4029 | else |
4025 | set_opt(sb, JOURNAL_DATA); | 4030 | set_opt(sb, JOURNAL_DATA); |
4026 | break; | 4031 | break; |
4027 | 4032 | ||
4028 | case EXT4_MOUNT_ORDERED_DATA: | 4033 | case EXT4_MOUNT_ORDERED_DATA: |
4029 | case EXT4_MOUNT_WRITEBACK_DATA: | 4034 | case EXT4_MOUNT_WRITEBACK_DATA: |
4030 | if (!jbd2_journal_check_available_features | 4035 | if (!jbd2_journal_check_available_features |
4031 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { | 4036 | (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { |
4032 | ext4_msg(sb, KERN_ERR, "Journal does not support " | 4037 | ext4_msg(sb, KERN_ERR, "Journal does not support " |
4033 | "requested data journaling mode"); | 4038 | "requested data journaling mode"); |
4034 | goto failed_mount_wq; | 4039 | goto failed_mount_wq; |
4035 | } | 4040 | } |
4036 | default: | 4041 | default: |
4037 | break; | 4042 | break; |
4038 | } | 4043 | } |
4039 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 4044 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
4040 | 4045 | ||
4041 | sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; | 4046 | sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; |
4042 | 4047 | ||
4043 | no_journal: | 4048 | no_journal: |
4044 | if (ext4_mballoc_ready) { | 4049 | if (ext4_mballoc_ready) { |
4045 | sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); | 4050 | sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); |
4046 | if (!sbi->s_mb_cache) { | 4051 | if (!sbi->s_mb_cache) { |
4047 | ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); | 4052 | ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); |
4048 | goto failed_mount_wq; | 4053 | goto failed_mount_wq; |
4049 | } | 4054 | } |
4050 | } | 4055 | } |
4051 | 4056 | ||
4052 | /* | 4057 | /* |
4053 | * Get the # of file system overhead blocks from the | 4058 | * Get the # of file system overhead blocks from the |
4054 | * superblock if present. | 4059 | * superblock if present. |
4055 | */ | 4060 | */ |
4056 | if (es->s_overhead_clusters) | 4061 | if (es->s_overhead_clusters) |
4057 | sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); | 4062 | sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); |
4058 | else { | 4063 | else { |
4059 | err = ext4_calculate_overhead(sb); | 4064 | err = ext4_calculate_overhead(sb); |
4060 | if (err) | 4065 | if (err) |
4061 | goto failed_mount_wq; | 4066 | goto failed_mount_wq; |
4062 | } | 4067 | } |
4063 | 4068 | ||
4064 | /* | 4069 | /* |
4065 | * The maximum number of concurrent works can be high and | 4070 | * The maximum number of concurrent works can be high and |
4066 | * concurrency isn't really necessary. Limit it to 1. | 4071 | * concurrency isn't really necessary. Limit it to 1. |
4067 | */ | 4072 | */ |
4068 | EXT4_SB(sb)->rsv_conversion_wq = | 4073 | EXT4_SB(sb)->rsv_conversion_wq = |
4069 | alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | 4074 | alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); |
4070 | if (!EXT4_SB(sb)->rsv_conversion_wq) { | 4075 | if (!EXT4_SB(sb)->rsv_conversion_wq) { |
4071 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); | 4076 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); |
4072 | ret = -ENOMEM; | 4077 | ret = -ENOMEM; |
4073 | goto failed_mount4; | 4078 | goto failed_mount4; |
4074 | } | 4079 | } |
4075 | 4080 | ||
4076 | /* | 4081 | /* |
4077 | * The jbd2_journal_load will have done any necessary log recovery, | 4082 | * The jbd2_journal_load will have done any necessary log recovery, |
4078 | * so we can safely mount the rest of the filesystem now. | 4083 | * so we can safely mount the rest of the filesystem now. |
4079 | */ | 4084 | */ |
4080 | 4085 | ||
4081 | root = ext4_iget(sb, EXT4_ROOT_INO); | 4086 | root = ext4_iget(sb, EXT4_ROOT_INO); |
4082 | if (IS_ERR(root)) { | 4087 | if (IS_ERR(root)) { |
4083 | ext4_msg(sb, KERN_ERR, "get root inode failed"); | 4088 | ext4_msg(sb, KERN_ERR, "get root inode failed"); |
4084 | ret = PTR_ERR(root); | 4089 | ret = PTR_ERR(root); |
4085 | root = NULL; | 4090 | root = NULL; |
4086 | goto failed_mount4; | 4091 | goto failed_mount4; |
4087 | } | 4092 | } |
4088 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | 4093 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
4089 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); | 4094 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); |
4090 | iput(root); | 4095 | iput(root); |
4091 | goto failed_mount4; | 4096 | goto failed_mount4; |
4092 | } | 4097 | } |
4093 | sb->s_root = d_make_root(root); | 4098 | sb->s_root = d_make_root(root); |
4094 | if (!sb->s_root) { | 4099 | if (!sb->s_root) { |
4095 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); | 4100 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); |
4096 | ret = -ENOMEM; | 4101 | ret = -ENOMEM; |
4097 | goto failed_mount4; | 4102 | goto failed_mount4; |
4098 | } | 4103 | } |
4099 | 4104 | ||
4100 | if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) | 4105 | if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) |
4101 | sb->s_flags |= MS_RDONLY; | 4106 | sb->s_flags |= MS_RDONLY; |
4102 | 4107 | ||
4103 | /* determine the minimum size of new large inodes, if present */ | 4108 | /* determine the minimum size of new large inodes, if present */ |
4104 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { | 4109 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { |
4105 | sbi->s_want_extra_isize = sizeof(struct ext4_inode) - | 4110 | sbi->s_want_extra_isize = sizeof(struct ext4_inode) - |
4106 | EXT4_GOOD_OLD_INODE_SIZE; | 4111 | EXT4_GOOD_OLD_INODE_SIZE; |
4107 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 4112 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
4108 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { | 4113 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { |
4109 | if (sbi->s_want_extra_isize < | 4114 | if (sbi->s_want_extra_isize < |
4110 | le16_to_cpu(es->s_want_extra_isize)) | 4115 | le16_to_cpu(es->s_want_extra_isize)) |
4111 | sbi->s_want_extra_isize = | 4116 | sbi->s_want_extra_isize = |
4112 | le16_to_cpu(es->s_want_extra_isize); | 4117 | le16_to_cpu(es->s_want_extra_isize); |
4113 | if (sbi->s_want_extra_isize < | 4118 | if (sbi->s_want_extra_isize < |
4114 | le16_to_cpu(es->s_min_extra_isize)) | 4119 | le16_to_cpu(es->s_min_extra_isize)) |
4115 | sbi->s_want_extra_isize = | 4120 | sbi->s_want_extra_isize = |
4116 | le16_to_cpu(es->s_min_extra_isize); | 4121 | le16_to_cpu(es->s_min_extra_isize); |
4117 | } | 4122 | } |
4118 | } | 4123 | } |
4119 | /* Check if enough inode space is available */ | 4124 | /* Check if enough inode space is available */ |
4120 | if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > | 4125 | if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > |
4121 | sbi->s_inode_size) { | 4126 | sbi->s_inode_size) { |
4122 | sbi->s_want_extra_isize = sizeof(struct ext4_inode) - | 4127 | sbi->s_want_extra_isize = sizeof(struct ext4_inode) - |
4123 | EXT4_GOOD_OLD_INODE_SIZE; | 4128 | EXT4_GOOD_OLD_INODE_SIZE; |
4124 | ext4_msg(sb, KERN_INFO, "required extra inode space not" | 4129 | ext4_msg(sb, KERN_INFO, "required extra inode space not" |
4125 | "available"); | 4130 | "available"); |
4126 | } | 4131 | } |
4127 | 4132 | ||
4128 | err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); | 4133 | err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); |
4129 | if (err) { | 4134 | if (err) { |
4130 | ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " | 4135 | ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " |
4131 | "reserved pool", ext4_calculate_resv_clusters(sb)); | 4136 | "reserved pool", ext4_calculate_resv_clusters(sb)); |
4132 | goto failed_mount4a; | 4137 | goto failed_mount4a; |
4133 | } | 4138 | } |
4134 | 4139 | ||
4135 | err = ext4_setup_system_zone(sb); | 4140 | err = ext4_setup_system_zone(sb); |
4136 | if (err) { | 4141 | if (err) { |
4137 | ext4_msg(sb, KERN_ERR, "failed to initialize system " | 4142 | ext4_msg(sb, KERN_ERR, "failed to initialize system " |
4138 | "zone (%d)", err); | 4143 | "zone (%d)", err); |
4139 | goto failed_mount4a; | 4144 | goto failed_mount4a; |
4140 | } | 4145 | } |
4141 | 4146 | ||
4142 | ext4_ext_init(sb); | 4147 | ext4_ext_init(sb); |
4143 | err = ext4_mb_init(sb); | 4148 | err = ext4_mb_init(sb); |
4144 | if (err) { | 4149 | if (err) { |
4145 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", | 4150 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", |
4146 | err); | 4151 | err); |
4147 | goto failed_mount5; | 4152 | goto failed_mount5; |
4148 | } | 4153 | } |
4149 | 4154 | ||
4150 | block = ext4_count_free_clusters(sb); | 4155 | block = ext4_count_free_clusters(sb); |
4151 | ext4_free_blocks_count_set(sbi->s_es, | 4156 | ext4_free_blocks_count_set(sbi->s_es, |
4152 | EXT4_C2B(sbi, block)); | 4157 | EXT4_C2B(sbi, block)); |
4153 | err = percpu_counter_init(&sbi->s_freeclusters_counter, block, | 4158 | err = percpu_counter_init(&sbi->s_freeclusters_counter, block, |
4154 | GFP_KERNEL); | 4159 | GFP_KERNEL); |
4155 | if (!err) { | 4160 | if (!err) { |
4156 | unsigned long freei = ext4_count_free_inodes(sb); | 4161 | unsigned long freei = ext4_count_free_inodes(sb); |
4157 | sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); | 4162 | sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); |
4158 | err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, | 4163 | err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, |
4159 | GFP_KERNEL); | 4164 | GFP_KERNEL); |
4160 | } | 4165 | } |
4161 | if (!err) | 4166 | if (!err) |
4162 | err = percpu_counter_init(&sbi->s_dirs_counter, | 4167 | err = percpu_counter_init(&sbi->s_dirs_counter, |
4163 | ext4_count_dirs(sb), GFP_KERNEL); | 4168 | ext4_count_dirs(sb), GFP_KERNEL); |
4164 | if (!err) | 4169 | if (!err) |
4165 | err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, | 4170 | err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, |
4166 | GFP_KERNEL); | 4171 | GFP_KERNEL); |
4167 | if (err) { | 4172 | if (err) { |
4168 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | 4173 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
4169 | goto failed_mount6; | 4174 | goto failed_mount6; |
4170 | } | 4175 | } |
4171 | 4176 | ||
4172 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 4177 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
4173 | if (!ext4_fill_flex_info(sb)) { | 4178 | if (!ext4_fill_flex_info(sb)) { |
4174 | ext4_msg(sb, KERN_ERR, | 4179 | ext4_msg(sb, KERN_ERR, |
4175 | "unable to initialize " | 4180 | "unable to initialize " |
4176 | "flex_bg meta info!"); | 4181 | "flex_bg meta info!"); |
4177 | goto failed_mount6; | 4182 | goto failed_mount6; |
4178 | } | 4183 | } |
4179 | 4184 | ||
4180 | err = ext4_register_li_request(sb, first_not_zeroed); | 4185 | err = ext4_register_li_request(sb, first_not_zeroed); |
4181 | if (err) | 4186 | if (err) |
4182 | goto failed_mount6; | 4187 | goto failed_mount6; |
4183 | 4188 | ||
4184 | sbi->s_kobj.kset = ext4_kset; | 4189 | sbi->s_kobj.kset = ext4_kset; |
4185 | init_completion(&sbi->s_kobj_unregister); | 4190 | init_completion(&sbi->s_kobj_unregister); |
4186 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, | 4191 | err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, |
4187 | "%s", sb->s_id); | 4192 | "%s", sb->s_id); |
4188 | if (err) | 4193 | if (err) |
4189 | goto failed_mount7; | 4194 | goto failed_mount7; |
4190 | 4195 | ||
4191 | #ifdef CONFIG_QUOTA | 4196 | #ifdef CONFIG_QUOTA |
4192 | /* Enable quota usage during mount. */ | 4197 | /* Enable quota usage during mount. */ |
4193 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && | 4198 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && |
4194 | !(sb->s_flags & MS_RDONLY)) { | 4199 | !(sb->s_flags & MS_RDONLY)) { |
4195 | err = ext4_enable_quotas(sb); | 4200 | err = ext4_enable_quotas(sb); |
4196 | if (err) | 4201 | if (err) |
4197 | goto failed_mount8; | 4202 | goto failed_mount8; |
4198 | } | 4203 | } |
4199 | #endif /* CONFIG_QUOTA */ | 4204 | #endif /* CONFIG_QUOTA */ |
4200 | 4205 | ||
4201 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; | 4206 | EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; |
4202 | ext4_orphan_cleanup(sb, es); | 4207 | ext4_orphan_cleanup(sb, es); |
4203 | EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; | 4208 | EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; |
4204 | if (needs_recovery) { | 4209 | if (needs_recovery) { |
4205 | ext4_msg(sb, KERN_INFO, "recovery complete"); | 4210 | ext4_msg(sb, KERN_INFO, "recovery complete"); |
4206 | ext4_mark_recovery_complete(sb, es); | 4211 | ext4_mark_recovery_complete(sb, es); |
4207 | } | 4212 | } |
4208 | if (EXT4_SB(sb)->s_journal) { | 4213 | if (EXT4_SB(sb)->s_journal) { |
4209 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | 4214 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) |
4210 | descr = " journalled data mode"; | 4215 | descr = " journalled data mode"; |
4211 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | 4216 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) |
4212 | descr = " ordered data mode"; | 4217 | descr = " ordered data mode"; |
4213 | else | 4218 | else |
4214 | descr = " writeback data mode"; | 4219 | descr = " writeback data mode"; |
4215 | } else | 4220 | } else |
4216 | descr = "out journal"; | 4221 | descr = "out journal"; |
4217 | 4222 | ||
4218 | if (test_opt(sb, DISCARD)) { | 4223 | if (test_opt(sb, DISCARD)) { |
4219 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | 4224 | struct request_queue *q = bdev_get_queue(sb->s_bdev); |
4220 | if (!blk_queue_discard(q)) | 4225 | if (!blk_queue_discard(q)) |
4221 | ext4_msg(sb, KERN_WARNING, | 4226 | ext4_msg(sb, KERN_WARNING, |
4222 | "mounting with \"discard\" option, but " | 4227 | "mounting with \"discard\" option, but " |
4223 | "the device does not support discard"); | 4228 | "the device does not support discard"); |
4224 | } | 4229 | } |
4225 | 4230 | ||
4226 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " | 4231 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " |
4227 | "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, | 4232 | "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, |
4228 | *sbi->s_es->s_mount_opts ? "; " : "", orig_data); | 4233 | *sbi->s_es->s_mount_opts ? "; " : "", orig_data); |
4229 | 4234 | ||
4230 | if (es->s_error_count) | 4235 | if (es->s_error_count) |
4231 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ | 4236 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ |
4232 | 4237 | ||
4233 | /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ | 4238 | /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ |
4234 | ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); | 4239 | ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); |
4235 | ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); | 4240 | ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); |
4236 | ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); | 4241 | ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); |
4237 | 4242 | ||
4238 | kfree(orig_data); | 4243 | kfree(orig_data); |
4239 | return 0; | 4244 | return 0; |
4240 | 4245 | ||
4241 | cantfind_ext4: | 4246 | cantfind_ext4: |
4242 | if (!silent) | 4247 | if (!silent) |
4243 | ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); | 4248 | ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); |
4244 | goto failed_mount; | 4249 | goto failed_mount; |
4245 | 4250 | ||
4246 | #ifdef CONFIG_QUOTA | 4251 | #ifdef CONFIG_QUOTA |
4247 | failed_mount8: | 4252 | failed_mount8: |
4248 | kobject_del(&sbi->s_kobj); | 4253 | kobject_del(&sbi->s_kobj); |
4249 | #endif | 4254 | #endif |
4250 | failed_mount7: | 4255 | failed_mount7: |
4251 | ext4_unregister_li_request(sb); | 4256 | ext4_unregister_li_request(sb); |
4252 | failed_mount6: | 4257 | failed_mount6: |
4253 | ext4_mb_release(sb); | 4258 | ext4_mb_release(sb); |
4254 | if (sbi->s_flex_groups) | 4259 | if (sbi->s_flex_groups) |
4255 | kvfree(sbi->s_flex_groups); | 4260 | kvfree(sbi->s_flex_groups); |
4256 | percpu_counter_destroy(&sbi->s_freeclusters_counter); | 4261 | percpu_counter_destroy(&sbi->s_freeclusters_counter); |
4257 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 4262 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
4258 | percpu_counter_destroy(&sbi->s_dirs_counter); | 4263 | percpu_counter_destroy(&sbi->s_dirs_counter); |
4259 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); | 4264 | percpu_counter_destroy(&sbi->s_dirtyclusters_counter); |
4260 | failed_mount5: | 4265 | failed_mount5: |
4261 | ext4_ext_release(sb); | 4266 | ext4_ext_release(sb); |
4262 | ext4_release_system_zone(sb); | 4267 | ext4_release_system_zone(sb); |
4263 | failed_mount4a: | 4268 | failed_mount4a: |
4264 | dput(sb->s_root); | 4269 | dput(sb->s_root); |
4265 | sb->s_root = NULL; | 4270 | sb->s_root = NULL; |
4266 | failed_mount4: | 4271 | failed_mount4: |
4267 | ext4_msg(sb, KERN_ERR, "mount failed"); | 4272 | ext4_msg(sb, KERN_ERR, "mount failed"); |
4268 | if (EXT4_SB(sb)->rsv_conversion_wq) | 4273 | if (EXT4_SB(sb)->rsv_conversion_wq) |
4269 | destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | 4274 | destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); |
4270 | failed_mount_wq: | 4275 | failed_mount_wq: |
4271 | if (sbi->s_journal) { | 4276 | if (sbi->s_journal) { |
4272 | jbd2_journal_destroy(sbi->s_journal); | 4277 | jbd2_journal_destroy(sbi->s_journal); |
4273 | sbi->s_journal = NULL; | 4278 | sbi->s_journal = NULL; |
4274 | } | 4279 | } |
4275 | failed_mount3a: | 4280 | failed_mount3a: |
4276 | ext4_es_unregister_shrinker(sbi); | 4281 | ext4_es_unregister_shrinker(sbi); |
4277 | failed_mount3: | 4282 | failed_mount3: |
4278 | del_timer_sync(&sbi->s_err_report); | 4283 | del_timer_sync(&sbi->s_err_report); |
4279 | if (sbi->s_mmp_tsk) | 4284 | if (sbi->s_mmp_tsk) |
4280 | kthread_stop(sbi->s_mmp_tsk); | 4285 | kthread_stop(sbi->s_mmp_tsk); |
4281 | failed_mount2: | 4286 | failed_mount2: |
4282 | for (i = 0; i < db_count; i++) | 4287 | for (i = 0; i < db_count; i++) |
4283 | brelse(sbi->s_group_desc[i]); | 4288 | brelse(sbi->s_group_desc[i]); |
4284 | kvfree(sbi->s_group_desc); | 4289 | kvfree(sbi->s_group_desc); |
4285 | failed_mount: | 4290 | failed_mount: |
4286 | if (sbi->s_chksum_driver) | 4291 | if (sbi->s_chksum_driver) |
4287 | crypto_free_shash(sbi->s_chksum_driver); | 4292 | crypto_free_shash(sbi->s_chksum_driver); |
4288 | if (sbi->s_proc) { | 4293 | if (sbi->s_proc) { |
4289 | remove_proc_entry("options", sbi->s_proc); | 4294 | remove_proc_entry("options", sbi->s_proc); |
4290 | remove_proc_entry(sb->s_id, ext4_proc_root); | 4295 | remove_proc_entry(sb->s_id, ext4_proc_root); |
4291 | } | 4296 | } |
4292 | #ifdef CONFIG_QUOTA | 4297 | #ifdef CONFIG_QUOTA |
4293 | for (i = 0; i < EXT4_MAXQUOTAS; i++) | 4298 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
4294 | kfree(sbi->s_qf_names[i]); | 4299 | kfree(sbi->s_qf_names[i]); |
4295 | #endif | 4300 | #endif |
4296 | ext4_blkdev_remove(sbi); | 4301 | ext4_blkdev_remove(sbi); |
4297 | brelse(bh); | 4302 | brelse(bh); |
4298 | out_fail: | 4303 | out_fail: |
4299 | sb->s_fs_info = NULL; | 4304 | sb->s_fs_info = NULL; |
4300 | kfree(sbi->s_blockgroup_lock); | 4305 | kfree(sbi->s_blockgroup_lock); |
4301 | kfree(sbi); | 4306 | kfree(sbi); |
4302 | out_free_orig: | 4307 | out_free_orig: |
4303 | kfree(orig_data); | 4308 | kfree(orig_data); |
4304 | return err ? err : ret; | 4309 | return err ? err : ret; |
4305 | } | 4310 | } |
4306 | 4311 | ||
4307 | /* | 4312 | /* |
4308 | * Setup any per-fs journal parameters now. We'll do this both on | 4313 | * Setup any per-fs journal parameters now. We'll do this both on |
4309 | * initial mount, once the journal has been initialised but before we've | 4314 | * initial mount, once the journal has been initialised but before we've |
4310 | * done any recovery; and again on any subsequent remount. | 4315 | * done any recovery; and again on any subsequent remount. |
4311 | */ | 4316 | */ |
4312 | static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | 4317 | static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) |
4313 | { | 4318 | { |
4314 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4319 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4315 | 4320 | ||
4316 | journal->j_commit_interval = sbi->s_commit_interval; | 4321 | journal->j_commit_interval = sbi->s_commit_interval; |
4317 | journal->j_min_batch_time = sbi->s_min_batch_time; | 4322 | journal->j_min_batch_time = sbi->s_min_batch_time; |
4318 | journal->j_max_batch_time = sbi->s_max_batch_time; | 4323 | journal->j_max_batch_time = sbi->s_max_batch_time; |
4319 | 4324 | ||
4320 | write_lock(&journal->j_state_lock); | 4325 | write_lock(&journal->j_state_lock); |
4321 | if (test_opt(sb, BARRIER)) | 4326 | if (test_opt(sb, BARRIER)) |
4322 | journal->j_flags |= JBD2_BARRIER; | 4327 | journal->j_flags |= JBD2_BARRIER; |
4323 | else | 4328 | else |
4324 | journal->j_flags &= ~JBD2_BARRIER; | 4329 | journal->j_flags &= ~JBD2_BARRIER; |
4325 | if (test_opt(sb, DATA_ERR_ABORT)) | 4330 | if (test_opt(sb, DATA_ERR_ABORT)) |
4326 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; | 4331 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; |
4327 | else | 4332 | else |
4328 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; | 4333 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; |
4329 | write_unlock(&journal->j_state_lock); | 4334 | write_unlock(&journal->j_state_lock); |
4330 | } | 4335 | } |
4331 | 4336 | ||
4332 | static journal_t *ext4_get_journal(struct super_block *sb, | 4337 | static journal_t *ext4_get_journal(struct super_block *sb, |
4333 | unsigned int journal_inum) | 4338 | unsigned int journal_inum) |
4334 | { | 4339 | { |
4335 | struct inode *journal_inode; | 4340 | struct inode *journal_inode; |
4336 | journal_t *journal; | 4341 | journal_t *journal; |
4337 | 4342 | ||
4338 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); | 4343 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); |
4339 | 4344 | ||
4340 | /* First, test for the existence of a valid inode on disk. Bad | 4345 | /* First, test for the existence of a valid inode on disk. Bad |
4341 | * things happen if we iget() an unused inode, as the subsequent | 4346 | * things happen if we iget() an unused inode, as the subsequent |
4342 | * iput() will try to delete it. */ | 4347 | * iput() will try to delete it. */ |
4343 | 4348 | ||
4344 | journal_inode = ext4_iget(sb, journal_inum); | 4349 | journal_inode = ext4_iget(sb, journal_inum); |
4345 | if (IS_ERR(journal_inode)) { | 4350 | if (IS_ERR(journal_inode)) { |
4346 | ext4_msg(sb, KERN_ERR, "no journal found"); | 4351 | ext4_msg(sb, KERN_ERR, "no journal found"); |
4347 | return NULL; | 4352 | return NULL; |
4348 | } | 4353 | } |
4349 | if (!journal_inode->i_nlink) { | 4354 | if (!journal_inode->i_nlink) { |
4350 | make_bad_inode(journal_inode); | 4355 | make_bad_inode(journal_inode); |
4351 | iput(journal_inode); | 4356 | iput(journal_inode); |
4352 | ext4_msg(sb, KERN_ERR, "journal inode is deleted"); | 4357 | ext4_msg(sb, KERN_ERR, "journal inode is deleted"); |
4353 | return NULL; | 4358 | return NULL; |
4354 | } | 4359 | } |
4355 | 4360 | ||
4356 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", | 4361 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", |
4357 | journal_inode, journal_inode->i_size); | 4362 | journal_inode, journal_inode->i_size); |
4358 | if (!S_ISREG(journal_inode->i_mode)) { | 4363 | if (!S_ISREG(journal_inode->i_mode)) { |
4359 | ext4_msg(sb, KERN_ERR, "invalid journal inode"); | 4364 | ext4_msg(sb, KERN_ERR, "invalid journal inode"); |
4360 | iput(journal_inode); | 4365 | iput(journal_inode); |
4361 | return NULL; | 4366 | return NULL; |
4362 | } | 4367 | } |
4363 | 4368 | ||
4364 | journal = jbd2_journal_init_inode(journal_inode); | 4369 | journal = jbd2_journal_init_inode(journal_inode); |
4365 | if (!journal) { | 4370 | if (!journal) { |
4366 | ext4_msg(sb, KERN_ERR, "Could not load journal inode"); | 4371 | ext4_msg(sb, KERN_ERR, "Could not load journal inode"); |
4367 | iput(journal_inode); | 4372 | iput(journal_inode); |
4368 | return NULL; | 4373 | return NULL; |
4369 | } | 4374 | } |
4370 | journal->j_private = sb; | 4375 | journal->j_private = sb; |
4371 | ext4_init_journal_params(sb, journal); | 4376 | ext4_init_journal_params(sb, journal); |
4372 | return journal; | 4377 | return journal; |
4373 | } | 4378 | } |
4374 | 4379 | ||
4375 | static journal_t *ext4_get_dev_journal(struct super_block *sb, | 4380 | static journal_t *ext4_get_dev_journal(struct super_block *sb, |
4376 | dev_t j_dev) | 4381 | dev_t j_dev) |
4377 | { | 4382 | { |
4378 | struct buffer_head *bh; | 4383 | struct buffer_head *bh; |
4379 | journal_t *journal; | 4384 | journal_t *journal; |
4380 | ext4_fsblk_t start; | 4385 | ext4_fsblk_t start; |
4381 | ext4_fsblk_t len; | 4386 | ext4_fsblk_t len; |
4382 | int hblock, blocksize; | 4387 | int hblock, blocksize; |
4383 | ext4_fsblk_t sb_block; | 4388 | ext4_fsblk_t sb_block; |
4384 | unsigned long offset; | 4389 | unsigned long offset; |
4385 | struct ext4_super_block *es; | 4390 | struct ext4_super_block *es; |
4386 | struct block_device *bdev; | 4391 | struct block_device *bdev; |
4387 | 4392 | ||
4388 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); | 4393 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); |
4389 | 4394 | ||
4390 | bdev = ext4_blkdev_get(j_dev, sb); | 4395 | bdev = ext4_blkdev_get(j_dev, sb); |
4391 | if (bdev == NULL) | 4396 | if (bdev == NULL) |
4392 | return NULL; | 4397 | return NULL; |
4393 | 4398 | ||
4394 | blocksize = sb->s_blocksize; | 4399 | blocksize = sb->s_blocksize; |
4395 | hblock = bdev_logical_block_size(bdev); | 4400 | hblock = bdev_logical_block_size(bdev); |
4396 | if (blocksize < hblock) { | 4401 | if (blocksize < hblock) { |
4397 | ext4_msg(sb, KERN_ERR, | 4402 | ext4_msg(sb, KERN_ERR, |
4398 | "blocksize too small for journal device"); | 4403 | "blocksize too small for journal device"); |
4399 | goto out_bdev; | 4404 | goto out_bdev; |
4400 | } | 4405 | } |
4401 | 4406 | ||
4402 | sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; | 4407 | sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; |
4403 | offset = EXT4_MIN_BLOCK_SIZE % blocksize; | 4408 | offset = EXT4_MIN_BLOCK_SIZE % blocksize; |
4404 | set_blocksize(bdev, blocksize); | 4409 | set_blocksize(bdev, blocksize); |
4405 | if (!(bh = __bread(bdev, sb_block, blocksize))) { | 4410 | if (!(bh = __bread(bdev, sb_block, blocksize))) { |
4406 | ext4_msg(sb, KERN_ERR, "couldn't read superblock of " | 4411 | ext4_msg(sb, KERN_ERR, "couldn't read superblock of " |
4407 | "external journal"); | 4412 | "external journal"); |
4408 | goto out_bdev; | 4413 | goto out_bdev; |
4409 | } | 4414 | } |
4410 | 4415 | ||
4411 | es = (struct ext4_super_block *) (bh->b_data + offset); | 4416 | es = (struct ext4_super_block *) (bh->b_data + offset); |
4412 | if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || | 4417 | if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || |
4413 | !(le32_to_cpu(es->s_feature_incompat) & | 4418 | !(le32_to_cpu(es->s_feature_incompat) & |
4414 | EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { | 4419 | EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { |
4415 | ext4_msg(sb, KERN_ERR, "external journal has " | 4420 | ext4_msg(sb, KERN_ERR, "external journal has " |
4416 | "bad superblock"); | 4421 | "bad superblock"); |
4417 | brelse(bh); | 4422 | brelse(bh); |
4418 | goto out_bdev; | 4423 | goto out_bdev; |
4419 | } | 4424 | } |
4420 | 4425 | ||
4421 | if ((le32_to_cpu(es->s_feature_ro_compat) & | 4426 | if ((le32_to_cpu(es->s_feature_ro_compat) & |
4422 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | 4427 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && |
4423 | es->s_checksum != ext4_superblock_csum(sb, es)) { | 4428 | es->s_checksum != ext4_superblock_csum(sb, es)) { |
4424 | ext4_msg(sb, KERN_ERR, "external journal has " | 4429 | ext4_msg(sb, KERN_ERR, "external journal has " |
4425 | "corrupt superblock"); | 4430 | "corrupt superblock"); |
4426 | brelse(bh); | 4431 | brelse(bh); |
4427 | goto out_bdev; | 4432 | goto out_bdev; |
4428 | } | 4433 | } |
4429 | 4434 | ||
4430 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { | 4435 | if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { |
4431 | ext4_msg(sb, KERN_ERR, "journal UUID does not match"); | 4436 | ext4_msg(sb, KERN_ERR, "journal UUID does not match"); |
4432 | brelse(bh); | 4437 | brelse(bh); |
4433 | goto out_bdev; | 4438 | goto out_bdev; |
4434 | } | 4439 | } |
4435 | 4440 | ||
4436 | len = ext4_blocks_count(es); | 4441 | len = ext4_blocks_count(es); |
4437 | start = sb_block + 1; | 4442 | start = sb_block + 1; |
4438 | brelse(bh); /* we're done with the superblock */ | 4443 | brelse(bh); /* we're done with the superblock */ |
4439 | 4444 | ||
4440 | journal = jbd2_journal_init_dev(bdev, sb->s_bdev, | 4445 | journal = jbd2_journal_init_dev(bdev, sb->s_bdev, |
4441 | start, len, blocksize); | 4446 | start, len, blocksize); |
4442 | if (!journal) { | 4447 | if (!journal) { |
4443 | ext4_msg(sb, KERN_ERR, "failed to create device journal"); | 4448 | ext4_msg(sb, KERN_ERR, "failed to create device journal"); |
4444 | goto out_bdev; | 4449 | goto out_bdev; |
4445 | } | 4450 | } |
4446 | journal->j_private = sb; | 4451 | journal->j_private = sb; |
4447 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer); | 4452 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer); |
4448 | wait_on_buffer(journal->j_sb_buffer); | 4453 | wait_on_buffer(journal->j_sb_buffer); |
4449 | if (!buffer_uptodate(journal->j_sb_buffer)) { | 4454 | if (!buffer_uptodate(journal->j_sb_buffer)) { |
4450 | ext4_msg(sb, KERN_ERR, "I/O error on journal device"); | 4455 | ext4_msg(sb, KERN_ERR, "I/O error on journal device"); |
4451 | goto out_journal; | 4456 | goto out_journal; |
4452 | } | 4457 | } |
4453 | if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { | 4458 | if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { |
4454 | ext4_msg(sb, KERN_ERR, "External journal has more than one " | 4459 | ext4_msg(sb, KERN_ERR, "External journal has more than one " |
4455 | "user (unsupported) - %d", | 4460 | "user (unsupported) - %d", |
4456 | be32_to_cpu(journal->j_superblock->s_nr_users)); | 4461 | be32_to_cpu(journal->j_superblock->s_nr_users)); |
4457 | goto out_journal; | 4462 | goto out_journal; |
4458 | } | 4463 | } |
4459 | EXT4_SB(sb)->journal_bdev = bdev; | 4464 | EXT4_SB(sb)->journal_bdev = bdev; |
4460 | ext4_init_journal_params(sb, journal); | 4465 | ext4_init_journal_params(sb, journal); |
4461 | return journal; | 4466 | return journal; |
4462 | 4467 | ||
4463 | out_journal: | 4468 | out_journal: |
4464 | jbd2_journal_destroy(journal); | 4469 | jbd2_journal_destroy(journal); |
4465 | out_bdev: | 4470 | out_bdev: |
4466 | ext4_blkdev_put(bdev); | 4471 | ext4_blkdev_put(bdev); |
4467 | return NULL; | 4472 | return NULL; |
4468 | } | 4473 | } |
4469 | 4474 | ||
4470 | static int ext4_load_journal(struct super_block *sb, | 4475 | static int ext4_load_journal(struct super_block *sb, |
4471 | struct ext4_super_block *es, | 4476 | struct ext4_super_block *es, |
4472 | unsigned long journal_devnum) | 4477 | unsigned long journal_devnum) |
4473 | { | 4478 | { |
4474 | journal_t *journal; | 4479 | journal_t *journal; |
4475 | unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); | 4480 | unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); |
4476 | dev_t journal_dev; | 4481 | dev_t journal_dev; |
4477 | int err = 0; | 4482 | int err = 0; |
4478 | int really_read_only; | 4483 | int really_read_only; |
4479 | 4484 | ||
4480 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); | 4485 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); |
4481 | 4486 | ||
4482 | if (journal_devnum && | 4487 | if (journal_devnum && |
4483 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 4488 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
4484 | ext4_msg(sb, KERN_INFO, "external journal device major/minor " | 4489 | ext4_msg(sb, KERN_INFO, "external journal device major/minor " |
4485 | "numbers have changed"); | 4490 | "numbers have changed"); |
4486 | journal_dev = new_decode_dev(journal_devnum); | 4491 | journal_dev = new_decode_dev(journal_devnum); |
4487 | } else | 4492 | } else |
4488 | journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); | 4493 | journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); |
4489 | 4494 | ||
4490 | really_read_only = bdev_read_only(sb->s_bdev); | 4495 | really_read_only = bdev_read_only(sb->s_bdev); |
4491 | 4496 | ||
4492 | /* | 4497 | /* |
4493 | * Are we loading a blank journal or performing recovery after a | 4498 | * Are we loading a blank journal or performing recovery after a |
4494 | * crash? For recovery, we need to check in advance whether we | 4499 | * crash? For recovery, we need to check in advance whether we |
4495 | * can get read-write access to the device. | 4500 | * can get read-write access to the device. |
4496 | */ | 4501 | */ |
4497 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { | 4502 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { |
4498 | if (sb->s_flags & MS_RDONLY) { | 4503 | if (sb->s_flags & MS_RDONLY) { |
4499 | ext4_msg(sb, KERN_INFO, "INFO: recovery " | 4504 | ext4_msg(sb, KERN_INFO, "INFO: recovery " |
4500 | "required on readonly filesystem"); | 4505 | "required on readonly filesystem"); |
4501 | if (really_read_only) { | 4506 | if (really_read_only) { |
4502 | ext4_msg(sb, KERN_ERR, "write access " | 4507 | ext4_msg(sb, KERN_ERR, "write access " |
4503 | "unavailable, cannot proceed"); | 4508 | "unavailable, cannot proceed"); |
4504 | return -EROFS; | 4509 | return -EROFS; |
4505 | } | 4510 | } |
4506 | ext4_msg(sb, KERN_INFO, "write access will " | 4511 | ext4_msg(sb, KERN_INFO, "write access will " |
4507 | "be enabled during recovery"); | 4512 | "be enabled during recovery"); |
4508 | } | 4513 | } |
4509 | } | 4514 | } |
4510 | 4515 | ||
4511 | if (journal_inum && journal_dev) { | 4516 | if (journal_inum && journal_dev) { |
4512 | ext4_msg(sb, KERN_ERR, "filesystem has both journal " | 4517 | ext4_msg(sb, KERN_ERR, "filesystem has both journal " |
4513 | "and inode journals!"); | 4518 | "and inode journals!"); |
4514 | return -EINVAL; | 4519 | return -EINVAL; |
4515 | } | 4520 | } |
4516 | 4521 | ||
4517 | if (journal_inum) { | 4522 | if (journal_inum) { |
4518 | if (!(journal = ext4_get_journal(sb, journal_inum))) | 4523 | if (!(journal = ext4_get_journal(sb, journal_inum))) |
4519 | return -EINVAL; | 4524 | return -EINVAL; |
4520 | } else { | 4525 | } else { |
4521 | if (!(journal = ext4_get_dev_journal(sb, journal_dev))) | 4526 | if (!(journal = ext4_get_dev_journal(sb, journal_dev))) |
4522 | return -EINVAL; | 4527 | return -EINVAL; |
4523 | } | 4528 | } |
4524 | 4529 | ||
4525 | if (!(journal->j_flags & JBD2_BARRIER)) | 4530 | if (!(journal->j_flags & JBD2_BARRIER)) |
4526 | ext4_msg(sb, KERN_INFO, "barriers disabled"); | 4531 | ext4_msg(sb, KERN_INFO, "barriers disabled"); |
4527 | 4532 | ||
4528 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) | 4533 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) |
4529 | err = jbd2_journal_wipe(journal, !really_read_only); | 4534 | err = jbd2_journal_wipe(journal, !really_read_only); |
4530 | if (!err) { | 4535 | if (!err) { |
4531 | char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); | 4536 | char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); |
4532 | if (save) | 4537 | if (save) |
4533 | memcpy(save, ((char *) es) + | 4538 | memcpy(save, ((char *) es) + |
4534 | EXT4_S_ERR_START, EXT4_S_ERR_LEN); | 4539 | EXT4_S_ERR_START, EXT4_S_ERR_LEN); |
4535 | err = jbd2_journal_load(journal); | 4540 | err = jbd2_journal_load(journal); |
4536 | if (save) | 4541 | if (save) |
4537 | memcpy(((char *) es) + EXT4_S_ERR_START, | 4542 | memcpy(((char *) es) + EXT4_S_ERR_START, |
4538 | save, EXT4_S_ERR_LEN); | 4543 | save, EXT4_S_ERR_LEN); |
4539 | kfree(save); | 4544 | kfree(save); |
4540 | } | 4545 | } |
4541 | 4546 | ||
4542 | if (err) { | 4547 | if (err) { |
4543 | ext4_msg(sb, KERN_ERR, "error loading journal"); | 4548 | ext4_msg(sb, KERN_ERR, "error loading journal"); |
4544 | jbd2_journal_destroy(journal); | 4549 | jbd2_journal_destroy(journal); |
4545 | return err; | 4550 | return err; |
4546 | } | 4551 | } |
4547 | 4552 | ||
4548 | EXT4_SB(sb)->s_journal = journal; | 4553 | EXT4_SB(sb)->s_journal = journal; |
4549 | ext4_clear_journal_err(sb, es); | 4554 | ext4_clear_journal_err(sb, es); |
4550 | 4555 | ||
4551 | if (!really_read_only && journal_devnum && | 4556 | if (!really_read_only && journal_devnum && |
4552 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { | 4557 | journal_devnum != le32_to_cpu(es->s_journal_dev)) { |
4553 | es->s_journal_dev = cpu_to_le32(journal_devnum); | 4558 | es->s_journal_dev = cpu_to_le32(journal_devnum); |
4554 | 4559 | ||
4555 | /* Make sure we flush the recovery flag to disk. */ | 4560 | /* Make sure we flush the recovery flag to disk. */ |
4556 | ext4_commit_super(sb, 1); | 4561 | ext4_commit_super(sb, 1); |
4557 | } | 4562 | } |
4558 | 4563 | ||
4559 | return 0; | 4564 | return 0; |
4560 | } | 4565 | } |
4561 | 4566 | ||
4562 | static int ext4_commit_super(struct super_block *sb, int sync) | 4567 | static int ext4_commit_super(struct super_block *sb, int sync) |
4563 | { | 4568 | { |
4564 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 4569 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
4565 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; | 4570 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; |
4566 | int error = 0; | 4571 | int error = 0; |
4567 | 4572 | ||
4568 | if (!sbh || block_device_ejected(sb)) | 4573 | if (!sbh || block_device_ejected(sb)) |
4569 | return error; | 4574 | return error; |
4570 | if (buffer_write_io_error(sbh)) { | 4575 | if (buffer_write_io_error(sbh)) { |
4571 | /* | 4576 | /* |
4572 | * Oh, dear. A previous attempt to write the | 4577 | * Oh, dear. A previous attempt to write the |
4573 | * superblock failed. This could happen because the | 4578 | * superblock failed. This could happen because the |
4574 | * USB device was yanked out. Or it could happen to | 4579 | * USB device was yanked out. Or it could happen to |
4575 | * be a transient write error and maybe the block will | 4580 | * be a transient write error and maybe the block will |
4576 | * be remapped. Nothing we can do but to retry the | 4581 | * be remapped. Nothing we can do but to retry the |
4577 | * write and hope for the best. | 4582 | * write and hope for the best. |
4578 | */ | 4583 | */ |
4579 | ext4_msg(sb, KERN_ERR, "previous I/O error to " | 4584 | ext4_msg(sb, KERN_ERR, "previous I/O error to " |
4580 | "superblock detected"); | 4585 | "superblock detected"); |
4581 | clear_buffer_write_io_error(sbh); | 4586 | clear_buffer_write_io_error(sbh); |
4582 | set_buffer_uptodate(sbh); | 4587 | set_buffer_uptodate(sbh); |
4583 | } | 4588 | } |
4584 | /* | 4589 | /* |
4585 | * If the file system is mounted read-only, don't update the | 4590 | * If the file system is mounted read-only, don't update the |
4586 | * superblock write time. This avoids updating the superblock | 4591 | * superblock write time. This avoids updating the superblock |
4587 | * write time when we are mounting the root file system | 4592 | * write time when we are mounting the root file system |
4588 | * read/only but we need to replay the journal; at that point, | 4593 | * read/only but we need to replay the journal; at that point, |
4589 | * for people who are east of GMT and who make their clock | 4594 | * for people who are east of GMT and who make their clock |
4590 | * tick in localtime for Windows bug-for-bug compatibility, | 4595 | * tick in localtime for Windows bug-for-bug compatibility, |
4591 | * the clock is set in the future, and this will cause e2fsck | 4596 | * the clock is set in the future, and this will cause e2fsck |
4592 | * to complain and force a full file system check. | 4597 | * to complain and force a full file system check. |
4593 | */ | 4598 | */ |
4594 | if (!(sb->s_flags & MS_RDONLY)) | 4599 | if (!(sb->s_flags & MS_RDONLY)) |
4595 | es->s_wtime = cpu_to_le32(get_seconds()); | 4600 | es->s_wtime = cpu_to_le32(get_seconds()); |
4596 | if (sb->s_bdev->bd_part) | 4601 | if (sb->s_bdev->bd_part) |
4597 | es->s_kbytes_written = | 4602 | es->s_kbytes_written = |
4598 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | 4603 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + |
4599 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 4604 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
4600 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); | 4605 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); |
4601 | else | 4606 | else |
4602 | es->s_kbytes_written = | 4607 | es->s_kbytes_written = |
4603 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); | 4608 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); |
4604 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) | 4609 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) |
4605 | ext4_free_blocks_count_set(es, | 4610 | ext4_free_blocks_count_set(es, |
4606 | EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( | 4611 | EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( |
4607 | &EXT4_SB(sb)->s_freeclusters_counter))); | 4612 | &EXT4_SB(sb)->s_freeclusters_counter))); |
4608 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) | 4613 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) |
4609 | es->s_free_inodes_count = | 4614 | es->s_free_inodes_count = |
4610 | cpu_to_le32(percpu_counter_sum_positive( | 4615 | cpu_to_le32(percpu_counter_sum_positive( |
4611 | &EXT4_SB(sb)->s_freeinodes_counter)); | 4616 | &EXT4_SB(sb)->s_freeinodes_counter)); |
4612 | BUFFER_TRACE(sbh, "marking dirty"); | 4617 | BUFFER_TRACE(sbh, "marking dirty"); |
4613 | ext4_superblock_csum_set(sb); | 4618 | ext4_superblock_csum_set(sb); |
4614 | mark_buffer_dirty(sbh); | 4619 | mark_buffer_dirty(sbh); |
4615 | if (sync) { | 4620 | if (sync) { |
4616 | error = sync_dirty_buffer(sbh); | 4621 | error = sync_dirty_buffer(sbh); |
4617 | if (error) | 4622 | if (error) |
4618 | return error; | 4623 | return error; |
4619 | 4624 | ||
4620 | error = buffer_write_io_error(sbh); | 4625 | error = buffer_write_io_error(sbh); |
4621 | if (error) { | 4626 | if (error) { |
4622 | ext4_msg(sb, KERN_ERR, "I/O error while writing " | 4627 | ext4_msg(sb, KERN_ERR, "I/O error while writing " |
4623 | "superblock"); | 4628 | "superblock"); |
4624 | clear_buffer_write_io_error(sbh); | 4629 | clear_buffer_write_io_error(sbh); |
4625 | set_buffer_uptodate(sbh); | 4630 | set_buffer_uptodate(sbh); |
4626 | } | 4631 | } |
4627 | } | 4632 | } |
4628 | return error; | 4633 | return error; |
4629 | } | 4634 | } |
4630 | 4635 | ||
4631 | /* | 4636 | /* |
4632 | * Have we just finished recovery? If so, and if we are mounting (or | 4637 | * Have we just finished recovery? If so, and if we are mounting (or |
4633 | * remounting) the filesystem readonly, then we will end up with a | 4638 | * remounting) the filesystem readonly, then we will end up with a |
4634 | * consistent fs on disk. Record that fact. | 4639 | * consistent fs on disk. Record that fact. |
4635 | */ | 4640 | */ |
4636 | static void ext4_mark_recovery_complete(struct super_block *sb, | 4641 | static void ext4_mark_recovery_complete(struct super_block *sb, |
4637 | struct ext4_super_block *es) | 4642 | struct ext4_super_block *es) |
4638 | { | 4643 | { |
4639 | journal_t *journal = EXT4_SB(sb)->s_journal; | 4644 | journal_t *journal = EXT4_SB(sb)->s_journal; |
4640 | 4645 | ||
4641 | if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { | 4646 | if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { |
4642 | BUG_ON(journal != NULL); | 4647 | BUG_ON(journal != NULL); |
4643 | return; | 4648 | return; |
4644 | } | 4649 | } |
4645 | jbd2_journal_lock_updates(journal); | 4650 | jbd2_journal_lock_updates(journal); |
4646 | if (jbd2_journal_flush(journal) < 0) | 4651 | if (jbd2_journal_flush(journal) < 0) |
4647 | goto out; | 4652 | goto out; |
4648 | 4653 | ||
4649 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && | 4654 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && |
4650 | sb->s_flags & MS_RDONLY) { | 4655 | sb->s_flags & MS_RDONLY) { |
4651 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 4656 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
4652 | ext4_commit_super(sb, 1); | 4657 | ext4_commit_super(sb, 1); |
4653 | } | 4658 | } |
4654 | 4659 | ||
4655 | out: | 4660 | out: |
4656 | jbd2_journal_unlock_updates(journal); | 4661 | jbd2_journal_unlock_updates(journal); |
4657 | } | 4662 | } |
4658 | 4663 | ||
4659 | /* | 4664 | /* |
4660 | * If we are mounting (or read-write remounting) a filesystem whose journal | 4665 | * If we are mounting (or read-write remounting) a filesystem whose journal |
4661 | * has recorded an error from a previous lifetime, move that error to the | 4666 | * has recorded an error from a previous lifetime, move that error to the |
4662 | * main filesystem now. | 4667 | * main filesystem now. |
4663 | */ | 4668 | */ |
4664 | static void ext4_clear_journal_err(struct super_block *sb, | 4669 | static void ext4_clear_journal_err(struct super_block *sb, |
4665 | struct ext4_super_block *es) | 4670 | struct ext4_super_block *es) |
4666 | { | 4671 | { |
4667 | journal_t *journal; | 4672 | journal_t *journal; |
4668 | int j_errno; | 4673 | int j_errno; |
4669 | const char *errstr; | 4674 | const char *errstr; |
4670 | 4675 | ||
4671 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); | 4676 | BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); |
4672 | 4677 | ||
4673 | journal = EXT4_SB(sb)->s_journal; | 4678 | journal = EXT4_SB(sb)->s_journal; |
4674 | 4679 | ||
4675 | /* | 4680 | /* |
4676 | * Now check for any error status which may have been recorded in the | 4681 | * Now check for any error status which may have been recorded in the |
4677 | * journal by a prior ext4_error() or ext4_abort() | 4682 | * journal by a prior ext4_error() or ext4_abort() |
4678 | */ | 4683 | */ |
4679 | 4684 | ||
4680 | j_errno = jbd2_journal_errno(journal); | 4685 | j_errno = jbd2_journal_errno(journal); |
4681 | if (j_errno) { | 4686 | if (j_errno) { |
4682 | char nbuf[16]; | 4687 | char nbuf[16]; |
4683 | 4688 | ||
4684 | errstr = ext4_decode_error(sb, j_errno, nbuf); | 4689 | errstr = ext4_decode_error(sb, j_errno, nbuf); |
4685 | ext4_warning(sb, "Filesystem error recorded " | 4690 | ext4_warning(sb, "Filesystem error recorded " |
4686 | "from previous mount: %s", errstr); | 4691 | "from previous mount: %s", errstr); |
4687 | ext4_warning(sb, "Marking fs in need of filesystem check."); | 4692 | ext4_warning(sb, "Marking fs in need of filesystem check."); |
4688 | 4693 | ||
4689 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | 4694 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; |
4690 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | 4695 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); |
4691 | ext4_commit_super(sb, 1); | 4696 | ext4_commit_super(sb, 1); |
4692 | 4697 | ||
4693 | jbd2_journal_clear_err(journal); | 4698 | jbd2_journal_clear_err(journal); |
4694 | jbd2_journal_update_sb_errno(journal); | 4699 | jbd2_journal_update_sb_errno(journal); |
4695 | } | 4700 | } |
4696 | } | 4701 | } |
4697 | 4702 | ||
4698 | /* | 4703 | /* |
4699 | * Force the running and committing transactions to commit, | 4704 | * Force the running and committing transactions to commit, |
4700 | * and wait on the commit. | 4705 | * and wait on the commit. |
4701 | */ | 4706 | */ |
4702 | int ext4_force_commit(struct super_block *sb) | 4707 | int ext4_force_commit(struct super_block *sb) |
4703 | { | 4708 | { |
4704 | journal_t *journal; | 4709 | journal_t *journal; |
4705 | 4710 | ||
4706 | if (sb->s_flags & MS_RDONLY) | 4711 | if (sb->s_flags & MS_RDONLY) |
4707 | return 0; | 4712 | return 0; |
4708 | 4713 | ||
4709 | journal = EXT4_SB(sb)->s_journal; | 4714 | journal = EXT4_SB(sb)->s_journal; |
4710 | return ext4_journal_force_commit(journal); | 4715 | return ext4_journal_force_commit(journal); |
4711 | } | 4716 | } |
4712 | 4717 | ||
4713 | static int ext4_sync_fs(struct super_block *sb, int wait) | 4718 | static int ext4_sync_fs(struct super_block *sb, int wait) |
4714 | { | 4719 | { |
4715 | int ret = 0; | 4720 | int ret = 0; |
4716 | tid_t target; | 4721 | tid_t target; |
4717 | bool needs_barrier = false; | 4722 | bool needs_barrier = false; |
4718 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4723 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4719 | 4724 | ||
4720 | trace_ext4_sync_fs(sb, wait); | 4725 | trace_ext4_sync_fs(sb, wait); |
4721 | flush_workqueue(sbi->rsv_conversion_wq); | 4726 | flush_workqueue(sbi->rsv_conversion_wq); |
4722 | /* | 4727 | /* |
4723 | * Writeback quota in non-journalled quota case - journalled quota has | 4728 | * Writeback quota in non-journalled quota case - journalled quota has |
4724 | * no dirty dquots | 4729 | * no dirty dquots |
4725 | */ | 4730 | */ |
4726 | dquot_writeback_dquots(sb, -1); | 4731 | dquot_writeback_dquots(sb, -1); |
4727 | /* | 4732 | /* |
4728 | * Data writeback is possible w/o journal transaction, so barrier must | 4733 | * Data writeback is possible w/o journal transaction, so barrier must |
4729 | * being sent at the end of the function. But we can skip it if | 4734 | * being sent at the end of the function. But we can skip it if |
4730 | * transaction_commit will do it for us. | 4735 | * transaction_commit will do it for us. |
4731 | */ | 4736 | */ |
4732 | if (sbi->s_journal) { | 4737 | if (sbi->s_journal) { |
4733 | target = jbd2_get_latest_transaction(sbi->s_journal); | 4738 | target = jbd2_get_latest_transaction(sbi->s_journal); |
4734 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && | 4739 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && |
4735 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) | 4740 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) |
4736 | needs_barrier = true; | 4741 | needs_barrier = true; |
4737 | 4742 | ||
4738 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | 4743 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { |
4739 | if (wait) | 4744 | if (wait) |
4740 | ret = jbd2_log_wait_commit(sbi->s_journal, | 4745 | ret = jbd2_log_wait_commit(sbi->s_journal, |
4741 | target); | 4746 | target); |
4742 | } | 4747 | } |
4743 | } else if (wait && test_opt(sb, BARRIER)) | 4748 | } else if (wait && test_opt(sb, BARRIER)) |
4744 | needs_barrier = true; | 4749 | needs_barrier = true; |
4745 | if (needs_barrier) { | 4750 | if (needs_barrier) { |
4746 | int err; | 4751 | int err; |
4747 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | 4752 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); |
4748 | if (!ret) | 4753 | if (!ret) |
4749 | ret = err; | 4754 | ret = err; |
4750 | } | 4755 | } |
4751 | 4756 | ||
4752 | return ret; | 4757 | return ret; |
4753 | } | 4758 | } |
4754 | 4759 | ||
4755 | /* | 4760 | /* |
4756 | * LVM calls this function before a (read-only) snapshot is created. This | 4761 | * LVM calls this function before a (read-only) snapshot is created. This |
4757 | * gives us a chance to flush the journal completely and mark the fs clean. | 4762 | * gives us a chance to flush the journal completely and mark the fs clean. |
4758 | * | 4763 | * |
4759 | * Note that only this function cannot bring a filesystem to be in a clean | 4764 | * Note that only this function cannot bring a filesystem to be in a clean |
4760 | * state independently. It relies on upper layer to stop all data & metadata | 4765 | * state independently. It relies on upper layer to stop all data & metadata |
4761 | * modifications. | 4766 | * modifications. |
4762 | */ | 4767 | */ |
4763 | static int ext4_freeze(struct super_block *sb) | 4768 | static int ext4_freeze(struct super_block *sb) |
4764 | { | 4769 | { |
4765 | int error = 0; | 4770 | int error = 0; |
4766 | journal_t *journal; | 4771 | journal_t *journal; |
4767 | 4772 | ||
4768 | if (sb->s_flags & MS_RDONLY) | 4773 | if (sb->s_flags & MS_RDONLY) |
4769 | return 0; | 4774 | return 0; |
4770 | 4775 | ||
4771 | journal = EXT4_SB(sb)->s_journal; | 4776 | journal = EXT4_SB(sb)->s_journal; |
4772 | 4777 | ||
4773 | if (journal) { | 4778 | if (journal) { |
4774 | /* Now we set up the journal barrier. */ | 4779 | /* Now we set up the journal barrier. */ |
4775 | jbd2_journal_lock_updates(journal); | 4780 | jbd2_journal_lock_updates(journal); |
4776 | 4781 | ||
4777 | /* | 4782 | /* |
4778 | * Don't clear the needs_recovery flag if we failed to | 4783 | * Don't clear the needs_recovery flag if we failed to |
4779 | * flush the journal. | 4784 | * flush the journal. |
4780 | */ | 4785 | */ |
4781 | error = jbd2_journal_flush(journal); | 4786 | error = jbd2_journal_flush(journal); |
4782 | if (error < 0) | 4787 | if (error < 0) |
4783 | goto out; | 4788 | goto out; |
4784 | } | 4789 | } |
4785 | 4790 | ||
4786 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 4791 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
4787 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 4792 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
4788 | error = ext4_commit_super(sb, 1); | 4793 | error = ext4_commit_super(sb, 1); |
4789 | out: | 4794 | out: |
4790 | if (journal) | 4795 | if (journal) |
4791 | /* we rely on upper layer to stop further updates */ | 4796 | /* we rely on upper layer to stop further updates */ |
4792 | jbd2_journal_unlock_updates(journal); | 4797 | jbd2_journal_unlock_updates(journal); |
4793 | return error; | 4798 | return error; |
4794 | } | 4799 | } |
4795 | 4800 | ||
4796 | /* | 4801 | /* |
4797 | * Called by LVM after the snapshot is done. We need to reset the RECOVER | 4802 | * Called by LVM after the snapshot is done. We need to reset the RECOVER |
4798 | * flag here, even though the filesystem is not technically dirty yet. | 4803 | * flag here, even though the filesystem is not technically dirty yet. |
4799 | */ | 4804 | */ |
4800 | static int ext4_unfreeze(struct super_block *sb) | 4805 | static int ext4_unfreeze(struct super_block *sb) |
4801 | { | 4806 | { |
4802 | if (sb->s_flags & MS_RDONLY) | 4807 | if (sb->s_flags & MS_RDONLY) |
4803 | return 0; | 4808 | return 0; |
4804 | 4809 | ||
4805 | /* Reset the needs_recovery flag before the fs is unlocked. */ | 4810 | /* Reset the needs_recovery flag before the fs is unlocked. */ |
4806 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 4811 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
4807 | ext4_commit_super(sb, 1); | 4812 | ext4_commit_super(sb, 1); |
4808 | return 0; | 4813 | return 0; |
4809 | } | 4814 | } |
4810 | 4815 | ||
4811 | /* | 4816 | /* |
4812 | * Structure to save mount options for ext4_remount's benefit | 4817 | * Structure to save mount options for ext4_remount's benefit |
4813 | */ | 4818 | */ |
4814 | struct ext4_mount_options { | 4819 | struct ext4_mount_options { |
4815 | unsigned long s_mount_opt; | 4820 | unsigned long s_mount_opt; |
4816 | unsigned long s_mount_opt2; | 4821 | unsigned long s_mount_opt2; |
4817 | kuid_t s_resuid; | 4822 | kuid_t s_resuid; |
4818 | kgid_t s_resgid; | 4823 | kgid_t s_resgid; |
4819 | unsigned long s_commit_interval; | 4824 | unsigned long s_commit_interval; |
4820 | u32 s_min_batch_time, s_max_batch_time; | 4825 | u32 s_min_batch_time, s_max_batch_time; |
4821 | #ifdef CONFIG_QUOTA | 4826 | #ifdef CONFIG_QUOTA |
4822 | int s_jquota_fmt; | 4827 | int s_jquota_fmt; |
4823 | char *s_qf_names[EXT4_MAXQUOTAS]; | 4828 | char *s_qf_names[EXT4_MAXQUOTAS]; |
4824 | #endif | 4829 | #endif |
4825 | }; | 4830 | }; |
4826 | 4831 | ||
4827 | static int ext4_remount(struct super_block *sb, int *flags, char *data) | 4832 | static int ext4_remount(struct super_block *sb, int *flags, char *data) |
4828 | { | 4833 | { |
4829 | struct ext4_super_block *es; | 4834 | struct ext4_super_block *es; |
4830 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4835 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4831 | unsigned long old_sb_flags; | 4836 | unsigned long old_sb_flags; |
4832 | struct ext4_mount_options old_opts; | 4837 | struct ext4_mount_options old_opts; |
4833 | int enable_quota = 0; | 4838 | int enable_quota = 0; |
4834 | ext4_group_t g; | 4839 | ext4_group_t g; |
4835 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 4840 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
4836 | int err = 0; | 4841 | int err = 0; |
4837 | #ifdef CONFIG_QUOTA | 4842 | #ifdef CONFIG_QUOTA |
4838 | int i, j; | 4843 | int i, j; |
4839 | #endif | 4844 | #endif |
4840 | char *orig_data = kstrdup(data, GFP_KERNEL); | 4845 | char *orig_data = kstrdup(data, GFP_KERNEL); |
4841 | 4846 | ||
4842 | /* Store the original options */ | 4847 | /* Store the original options */ |
4843 | old_sb_flags = sb->s_flags; | 4848 | old_sb_flags = sb->s_flags; |
4844 | old_opts.s_mount_opt = sbi->s_mount_opt; | 4849 | old_opts.s_mount_opt = sbi->s_mount_opt; |
4845 | old_opts.s_mount_opt2 = sbi->s_mount_opt2; | 4850 | old_opts.s_mount_opt2 = sbi->s_mount_opt2; |
4846 | old_opts.s_resuid = sbi->s_resuid; | 4851 | old_opts.s_resuid = sbi->s_resuid; |
4847 | old_opts.s_resgid = sbi->s_resgid; | 4852 | old_opts.s_resgid = sbi->s_resgid; |
4848 | old_opts.s_commit_interval = sbi->s_commit_interval; | 4853 | old_opts.s_commit_interval = sbi->s_commit_interval; |
4849 | old_opts.s_min_batch_time = sbi->s_min_batch_time; | 4854 | old_opts.s_min_batch_time = sbi->s_min_batch_time; |
4850 | old_opts.s_max_batch_time = sbi->s_max_batch_time; | 4855 | old_opts.s_max_batch_time = sbi->s_max_batch_time; |
4851 | #ifdef CONFIG_QUOTA | 4856 | #ifdef CONFIG_QUOTA |
4852 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; | 4857 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; |
4853 | for (i = 0; i < EXT4_MAXQUOTAS; i++) | 4858 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
4854 | if (sbi->s_qf_names[i]) { | 4859 | if (sbi->s_qf_names[i]) { |
4855 | old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], | 4860 | old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], |
4856 | GFP_KERNEL); | 4861 | GFP_KERNEL); |
4857 | if (!old_opts.s_qf_names[i]) { | 4862 | if (!old_opts.s_qf_names[i]) { |
4858 | for (j = 0; j < i; j++) | 4863 | for (j = 0; j < i; j++) |
4859 | kfree(old_opts.s_qf_names[j]); | 4864 | kfree(old_opts.s_qf_names[j]); |
4860 | kfree(orig_data); | 4865 | kfree(orig_data); |
4861 | return -ENOMEM; | 4866 | return -ENOMEM; |
4862 | } | 4867 | } |
4863 | } else | 4868 | } else |
4864 | old_opts.s_qf_names[i] = NULL; | 4869 | old_opts.s_qf_names[i] = NULL; |
4865 | #endif | 4870 | #endif |
4866 | if (sbi->s_journal && sbi->s_journal->j_task->io_context) | 4871 | if (sbi->s_journal && sbi->s_journal->j_task->io_context) |
4867 | journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; | 4872 | journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; |
4868 | 4873 | ||
4869 | /* | ||
4870 | * Allow the "check" option to be passed as a remount option. | ||
4871 | */ | ||
4872 | if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { | 4874 | if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { |
4873 | err = -EINVAL; | 4875 | err = -EINVAL; |
4874 | goto restore_opts; | 4876 | goto restore_opts; |
4875 | } | 4877 | } |
4876 | 4878 | ||
4877 | if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ | 4879 | if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ |
4878 | test_opt(sb, JOURNAL_CHECKSUM)) { | 4880 | test_opt(sb, JOURNAL_CHECKSUM)) { |
4879 | ext4_msg(sb, KERN_ERR, "changing journal_checksum " | 4881 | ext4_msg(sb, KERN_ERR, "changing journal_checksum " |
4880 | "during remount not supported"); | 4882 | "during remount not supported; ignoring"); |
4881 | err = -EINVAL; | 4883 | sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM; |
4882 | goto restore_opts; | ||
4883 | } | 4884 | } |
4884 | 4885 | ||
4885 | if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ | ||
4886 | test_opt(sb, JOURNAL_CHECKSUM)) { | ||
4887 | ext4_msg(sb, KERN_ERR, "changing journal_checksum " | ||
4888 | "during remount not supported"); | ||
4889 | err = -EINVAL; | ||
4890 | goto restore_opts; | ||
4891 | } | ||
4892 | |||
4893 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | 4886 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { |
4894 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { | 4887 | if (test_opt2(sb, EXPLICIT_DELALLOC)) { |
4895 | ext4_msg(sb, KERN_ERR, "can't mount with " | 4888 | ext4_msg(sb, KERN_ERR, "can't mount with " |
4896 | "both data=journal and delalloc"); | 4889 | "both data=journal and delalloc"); |
4897 | err = -EINVAL; | 4890 | err = -EINVAL; |
4898 | goto restore_opts; | 4891 | goto restore_opts; |
4899 | } | 4892 | } |
4900 | if (test_opt(sb, DIOREAD_NOLOCK)) { | 4893 | if (test_opt(sb, DIOREAD_NOLOCK)) { |
4901 | ext4_msg(sb, KERN_ERR, "can't mount with " | 4894 | ext4_msg(sb, KERN_ERR, "can't mount with " |
4902 | "both data=journal and dioread_nolock"); | 4895 | "both data=journal and dioread_nolock"); |
4903 | err = -EINVAL; | 4896 | err = -EINVAL; |
4904 | goto restore_opts; | 4897 | goto restore_opts; |
4905 | } | 4898 | } |
4906 | if (test_opt(sb, DAX)) { | 4899 | if (test_opt(sb, DAX)) { |
4907 | ext4_msg(sb, KERN_ERR, "can't mount with " | 4900 | ext4_msg(sb, KERN_ERR, "can't mount with " |
4908 | "both data=journal and dax"); | 4901 | "both data=journal and dax"); |
4909 | err = -EINVAL; | 4902 | err = -EINVAL; |
4910 | goto restore_opts; | 4903 | goto restore_opts; |
4911 | } | 4904 | } |
4912 | } | 4905 | } |
4913 | 4906 | ||
4914 | if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { | 4907 | if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { |
4915 | ext4_msg(sb, KERN_WARNING, "warning: refusing change of " | 4908 | ext4_msg(sb, KERN_WARNING, "warning: refusing change of " |
4916 | "dax flag with busy inodes while remounting"); | 4909 | "dax flag with busy inodes while remounting"); |
4917 | sbi->s_mount_opt ^= EXT4_MOUNT_DAX; | 4910 | sbi->s_mount_opt ^= EXT4_MOUNT_DAX; |
4918 | } | 4911 | } |
4919 | 4912 | ||
4920 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) | 4913 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) |
4921 | ext4_abort(sb, "Abort forced by user"); | 4914 | ext4_abort(sb, "Abort forced by user"); |
4922 | 4915 | ||
4923 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 4916 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
4924 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); | 4917 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); |
4925 | 4918 | ||
4926 | es = sbi->s_es; | 4919 | es = sbi->s_es; |
4927 | 4920 | ||
4928 | if (sbi->s_journal) { | 4921 | if (sbi->s_journal) { |
4929 | ext4_init_journal_params(sb, sbi->s_journal); | 4922 | ext4_init_journal_params(sb, sbi->s_journal); |
4930 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 4923 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
4931 | } | 4924 | } |
4932 | 4925 | ||
4933 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { | 4926 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { |
4934 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { | 4927 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { |
4935 | err = -EROFS; | 4928 | err = -EROFS; |
4936 | goto restore_opts; | 4929 | goto restore_opts; |
4937 | } | 4930 | } |
4938 | 4931 | ||
4939 | if (*flags & MS_RDONLY) { | 4932 | if (*flags & MS_RDONLY) { |
4940 | err = sync_filesystem(sb); | 4933 | err = sync_filesystem(sb); |
4941 | if (err < 0) | 4934 | if (err < 0) |
4942 | goto restore_opts; | 4935 | goto restore_opts; |
4943 | err = dquot_suspend(sb, -1); | 4936 | err = dquot_suspend(sb, -1); |
4944 | if (err < 0) | 4937 | if (err < 0) |
4945 | goto restore_opts; | 4938 | goto restore_opts; |
4946 | 4939 | ||
4947 | /* | 4940 | /* |
4948 | * First of all, the unconditional stuff we have to do | 4941 | * First of all, the unconditional stuff we have to do |
4949 | * to disable replay of the journal when we next remount | 4942 | * to disable replay of the journal when we next remount |
4950 | */ | 4943 | */ |
4951 | sb->s_flags |= MS_RDONLY; | 4944 | sb->s_flags |= MS_RDONLY; |
4952 | 4945 | ||
4953 | /* | 4946 | /* |
4954 | * OK, test if we are remounting a valid rw partition | 4947 | * OK, test if we are remounting a valid rw partition |
4955 | * readonly, and if so set the rdonly flag and then | 4948 | * readonly, and if so set the rdonly flag and then |
4956 | * mark the partition as valid again. | 4949 | * mark the partition as valid again. |
4957 | */ | 4950 | */ |
4958 | if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && | 4951 | if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) && |
4959 | (sbi->s_mount_state & EXT4_VALID_FS)) | 4952 | (sbi->s_mount_state & EXT4_VALID_FS)) |
4960 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 4953 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
4961 | 4954 | ||
4962 | if (sbi->s_journal) | 4955 | if (sbi->s_journal) |
4963 | ext4_mark_recovery_complete(sb, es); | 4956 | ext4_mark_recovery_complete(sb, es); |
4964 | } else { | 4957 | } else { |
4965 | /* Make sure we can mount this feature set readwrite */ | 4958 | /* Make sure we can mount this feature set readwrite */ |
4966 | if (!ext4_feature_set_ok(sb, 0)) { | 4959 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
4960 | EXT4_FEATURE_RO_COMPAT_READONLY) || | ||
4961 | !ext4_feature_set_ok(sb, 0)) { | ||
4967 | err = -EROFS; | 4962 | err = -EROFS; |
4968 | goto restore_opts; | 4963 | goto restore_opts; |
4969 | } | 4964 | } |
4970 | /* | 4965 | /* |
4971 | * Make sure the group descriptor checksums | 4966 | * Make sure the group descriptor checksums |
4972 | * are sane. If they aren't, refuse to remount r/w. | 4967 | * are sane. If they aren't, refuse to remount r/w. |
4973 | */ | 4968 | */ |
4974 | for (g = 0; g < sbi->s_groups_count; g++) { | 4969 | for (g = 0; g < sbi->s_groups_count; g++) { |
4975 | struct ext4_group_desc *gdp = | 4970 | struct ext4_group_desc *gdp = |
4976 | ext4_get_group_desc(sb, g, NULL); | 4971 | ext4_get_group_desc(sb, g, NULL); |
4977 | 4972 | ||
4978 | if (!ext4_group_desc_csum_verify(sb, g, gdp)) { | 4973 | if (!ext4_group_desc_csum_verify(sb, g, gdp)) { |
4979 | ext4_msg(sb, KERN_ERR, | 4974 | ext4_msg(sb, KERN_ERR, |
4980 | "ext4_remount: Checksum for group %u failed (%u!=%u)", | 4975 | "ext4_remount: Checksum for group %u failed (%u!=%u)", |
4981 | g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), | 4976 | g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), |
4982 | le16_to_cpu(gdp->bg_checksum)); | 4977 | le16_to_cpu(gdp->bg_checksum)); |
4983 | err = -EINVAL; | 4978 | err = -EINVAL; |
4984 | goto restore_opts; | 4979 | goto restore_opts; |
4985 | } | 4980 | } |
4986 | } | 4981 | } |
4987 | 4982 | ||
4988 | /* | 4983 | /* |
4989 | * If we have an unprocessed orphan list hanging | 4984 | * If we have an unprocessed orphan list hanging |
4990 | * around from a previously readonly bdev mount, | 4985 | * around from a previously readonly bdev mount, |
4991 | * require a full umount/remount for now. | 4986 | * require a full umount/remount for now. |
4992 | */ | 4987 | */ |
4993 | if (es->s_last_orphan) { | 4988 | if (es->s_last_orphan) { |
4994 | ext4_msg(sb, KERN_WARNING, "Couldn't " | 4989 | ext4_msg(sb, KERN_WARNING, "Couldn't " |
4995 | "remount RDWR because of unprocessed " | 4990 | "remount RDWR because of unprocessed " |
4996 | "orphan inode list. Please " | 4991 | "orphan inode list. Please " |
4997 | "umount/remount instead"); | 4992 | "umount/remount instead"); |
4998 | err = -EINVAL; | 4993 | err = -EINVAL; |
4999 | goto restore_opts; | 4994 | goto restore_opts; |
5000 | } | 4995 | } |
5001 | 4996 | ||
5002 | /* | 4997 | /* |
5003 | * Mounting a RDONLY partition read-write, so reread | 4998 | * Mounting a RDONLY partition read-write, so reread |
5004 | * and store the current valid flag. (It may have | 4999 | * and store the current valid flag. (It may have |
5005 | * been changed by e2fsck since we originally mounted | 5000 | * been changed by e2fsck since we originally mounted |
5006 | * the partition.) | 5001 | * the partition.) |
5007 | */ | 5002 | */ |
5008 | if (sbi->s_journal) | 5003 | if (sbi->s_journal) |
5009 | ext4_clear_journal_err(sb, es); | 5004 | ext4_clear_journal_err(sb, es); |
5010 | sbi->s_mount_state = le16_to_cpu(es->s_state); | 5005 | sbi->s_mount_state = le16_to_cpu(es->s_state); |
5011 | if (!ext4_setup_super(sb, es, 0)) | 5006 | if (!ext4_setup_super(sb, es, 0)) |
5012 | sb->s_flags &= ~MS_RDONLY; | 5007 | sb->s_flags &= ~MS_RDONLY; |
5013 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | 5008 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, |
5014 | EXT4_FEATURE_INCOMPAT_MMP)) | 5009 | EXT4_FEATURE_INCOMPAT_MMP)) |
5015 | if (ext4_multi_mount_protect(sb, | 5010 | if (ext4_multi_mount_protect(sb, |
5016 | le64_to_cpu(es->s_mmp_block))) { | 5011 | le64_to_cpu(es->s_mmp_block))) { |
5017 | err = -EROFS; | 5012 | err = -EROFS; |
5018 | goto restore_opts; | 5013 | goto restore_opts; |
5019 | } | 5014 | } |
5020 | enable_quota = 1; | 5015 | enable_quota = 1; |
5021 | } | 5016 | } |
5022 | } | 5017 | } |
5023 | 5018 | ||
5024 | /* | 5019 | /* |
5025 | * Reinitialize lazy itable initialization thread based on | 5020 | * Reinitialize lazy itable initialization thread based on |
5026 | * current settings | 5021 | * current settings |
5027 | */ | 5022 | */ |
5028 | if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) | 5023 | if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE)) |
5029 | ext4_unregister_li_request(sb); | 5024 | ext4_unregister_li_request(sb); |
5030 | else { | 5025 | else { |
5031 | ext4_group_t first_not_zeroed; | 5026 | ext4_group_t first_not_zeroed; |
5032 | first_not_zeroed = ext4_has_uninit_itable(sb); | 5027 | first_not_zeroed = ext4_has_uninit_itable(sb); |
5033 | ext4_register_li_request(sb, first_not_zeroed); | 5028 | ext4_register_li_request(sb, first_not_zeroed); |
5034 | } | 5029 | } |
5035 | 5030 | ||
5036 | ext4_setup_system_zone(sb); | 5031 | ext4_setup_system_zone(sb); |
5037 | if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) | 5032 | if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) |
5038 | ext4_commit_super(sb, 1); | 5033 | ext4_commit_super(sb, 1); |
5039 | 5034 | ||
5040 | #ifdef CONFIG_QUOTA | 5035 | #ifdef CONFIG_QUOTA |
5041 | /* Release old quota file names */ | 5036 | /* Release old quota file names */ |
5042 | for (i = 0; i < EXT4_MAXQUOTAS; i++) | 5037 | for (i = 0; i < EXT4_MAXQUOTAS; i++) |
5043 | kfree(old_opts.s_qf_names[i]); | 5038 | kfree(old_opts.s_qf_names[i]); |
5044 | if (enable_quota) { | 5039 | if (enable_quota) { |
5045 | if (sb_any_quota_suspended(sb)) | 5040 | if (sb_any_quota_suspended(sb)) |
5046 | dquot_resume(sb, -1); | 5041 | dquot_resume(sb, -1); |
5047 | else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 5042 | else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
5048 | EXT4_FEATURE_RO_COMPAT_QUOTA)) { | 5043 | EXT4_FEATURE_RO_COMPAT_QUOTA)) { |
5049 | err = ext4_enable_quotas(sb); | 5044 | err = ext4_enable_quotas(sb); |
5050 | if (err) | 5045 | if (err) |
5051 | goto restore_opts; | 5046 | goto restore_opts; |
5052 | } | 5047 | } |
5053 | } | 5048 | } |
5054 | #endif | 5049 | #endif |
5055 | 5050 | ||
5056 | *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); | 5051 | *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); |
5057 | ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); | 5052 | ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); |
5058 | kfree(orig_data); | 5053 | kfree(orig_data); |
5059 | return 0; | 5054 | return 0; |
5060 | 5055 | ||
5061 | restore_opts: | 5056 | restore_opts: |
5062 | sb->s_flags = old_sb_flags; | 5057 | sb->s_flags = old_sb_flags; |
5063 | sbi->s_mount_opt = old_opts.s_mount_opt; | 5058 | sbi->s_mount_opt = old_opts.s_mount_opt; |
5064 | sbi->s_mount_opt2 = old_opts.s_mount_opt2; | 5059 | sbi->s_mount_opt2 = old_opts.s_mount_opt2; |
5065 | sbi->s_resuid = old_opts.s_resuid; | 5060 | sbi->s_resuid = old_opts.s_resuid; |
5066 | sbi->s_resgid = old_opts.s_resgid; | 5061 | sbi->s_resgid = old_opts.s_resgid; |
5067 | sbi->s_commit_interval = old_opts.s_commit_interval; | 5062 | sbi->s_commit_interval = old_opts.s_commit_interval; |
5068 | sbi->s_min_batch_time = old_opts.s_min_batch_time; | 5063 | sbi->s_min_batch_time = old_opts.s_min_batch_time; |
5069 | sbi->s_max_batch_time = old_opts.s_max_batch_time; | 5064 | sbi->s_max_batch_time = old_opts.s_max_batch_time; |
5070 | #ifdef CONFIG_QUOTA | 5065 | #ifdef CONFIG_QUOTA |
5071 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; | 5066 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; |
5072 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { | 5067 | for (i = 0; i < EXT4_MAXQUOTAS; i++) { |
5073 | kfree(sbi->s_qf_names[i]); | 5068 | kfree(sbi->s_qf_names[i]); |
5074 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 5069 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
5075 | } | 5070 | } |
5076 | #endif | 5071 | #endif |
5077 | kfree(orig_data); | 5072 | kfree(orig_data); |
5078 | return err; | 5073 | return err; |
5079 | } | 5074 | } |
5080 | 5075 | ||
5081 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | 5076 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) |
5082 | { | 5077 | { |
5083 | struct super_block *sb = dentry->d_sb; | 5078 | struct super_block *sb = dentry->d_sb; |
5084 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 5079 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
5085 | struct ext4_super_block *es = sbi->s_es; | 5080 | struct ext4_super_block *es = sbi->s_es; |
5086 | ext4_fsblk_t overhead = 0, resv_blocks; | 5081 | ext4_fsblk_t overhead = 0, resv_blocks; |
5087 | u64 fsid; | 5082 | u64 fsid; |
5088 | s64 bfree; | 5083 | s64 bfree; |
5089 | resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters)); | 5084 | resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters)); |
5090 | 5085 | ||
5091 | if (!test_opt(sb, MINIX_DF)) | 5086 | if (!test_opt(sb, MINIX_DF)) |
5092 | overhead = sbi->s_overhead; | 5087 | overhead = sbi->s_overhead; |
5093 | 5088 | ||
5094 | buf->f_type = EXT4_SUPER_MAGIC; | 5089 | buf->f_type = EXT4_SUPER_MAGIC; |
5095 | buf->f_bsize = sb->s_blocksize; | 5090 | buf->f_bsize = sb->s_blocksize; |
5096 | buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead); | 5091 | buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead); |
5097 | bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - | 5092 | bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - |
5098 | percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); | 5093 | percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); |
5099 | /* prevent underflow in case that few free space is available */ | 5094 | /* prevent underflow in case that few free space is available */ |
5100 | buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0)); | 5095 | buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0)); |
5101 | buf->f_bavail = buf->f_bfree - | 5096 | buf->f_bavail = buf->f_bfree - |
5102 | (ext4_r_blocks_count(es) + resv_blocks); | 5097 | (ext4_r_blocks_count(es) + resv_blocks); |
5103 | if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks)) | 5098 | if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks)) |
5104 | buf->f_bavail = 0; | 5099 | buf->f_bavail = 0; |
5105 | buf->f_files = le32_to_cpu(es->s_inodes_count); | 5100 | buf->f_files = le32_to_cpu(es->s_inodes_count); |
5106 | buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); | 5101 | buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); |
5107 | buf->f_namelen = EXT4_NAME_LEN; | 5102 | buf->f_namelen = EXT4_NAME_LEN; |
5108 | fsid = le64_to_cpup((void *)es->s_uuid) ^ | 5103 | fsid = le64_to_cpup((void *)es->s_uuid) ^ |
5109 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); | 5104 | le64_to_cpup((void *)es->s_uuid + sizeof(u64)); |
5110 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; | 5105 | buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; |
5111 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; | 5106 | buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; |
5112 | 5107 | ||
5113 | return 0; | 5108 | return 0; |
5114 | } | 5109 | } |
5115 | 5110 | ||
5116 | /* Helper function for writing quotas on sync - we need to start transaction | 5111 | /* Helper function for writing quotas on sync - we need to start transaction |
5117 | * before quota file is locked for write. Otherwise the are possible deadlocks: | 5112 | * before quota file is locked for write. Otherwise the are possible deadlocks: |
5118 | * Process 1 Process 2 | 5113 | * Process 1 Process 2 |
5119 | * ext4_create() quota_sync() | 5114 | * ext4_create() quota_sync() |
5120 | * jbd2_journal_start() write_dquot() | 5115 | * jbd2_journal_start() write_dquot() |
5121 | * dquot_initialize() down(dqio_mutex) | 5116 | * dquot_initialize() down(dqio_mutex) |
5122 | * down(dqio_mutex) jbd2_journal_start() | 5117 | * down(dqio_mutex) jbd2_journal_start() |
5123 | * | 5118 | * |
5124 | */ | 5119 | */ |
5125 | 5120 | ||
5126 | #ifdef CONFIG_QUOTA | 5121 | #ifdef CONFIG_QUOTA |
5127 | 5122 | ||
5128 | static inline struct inode *dquot_to_inode(struct dquot *dquot) | 5123 | static inline struct inode *dquot_to_inode(struct dquot *dquot) |
5129 | { | 5124 | { |
5130 | return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; | 5125 | return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; |
5131 | } | 5126 | } |
5132 | 5127 | ||
5133 | static int ext4_write_dquot(struct dquot *dquot) | 5128 | static int ext4_write_dquot(struct dquot *dquot) |
5134 | { | 5129 | { |
5135 | int ret, err; | 5130 | int ret, err; |
5136 | handle_t *handle; | 5131 | handle_t *handle; |
5137 | struct inode *inode; | 5132 | struct inode *inode; |
5138 | 5133 | ||
5139 | inode = dquot_to_inode(dquot); | 5134 | inode = dquot_to_inode(dquot); |
5140 | handle = ext4_journal_start(inode, EXT4_HT_QUOTA, | 5135 | handle = ext4_journal_start(inode, EXT4_HT_QUOTA, |
5141 | EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); | 5136 | EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); |
5142 | if (IS_ERR(handle)) | 5137 | if (IS_ERR(handle)) |
5143 | return PTR_ERR(handle); | 5138 | return PTR_ERR(handle); |
5144 | ret = dquot_commit(dquot); | 5139 | ret = dquot_commit(dquot); |
5145 | err = ext4_journal_stop(handle); | 5140 | err = ext4_journal_stop(handle); |
5146 | if (!ret) | 5141 | if (!ret) |
5147 | ret = err; | 5142 | ret = err; |
5148 | return ret; | 5143 | return ret; |
5149 | } | 5144 | } |
5150 | 5145 | ||
5151 | static int ext4_acquire_dquot(struct dquot *dquot) | 5146 | static int ext4_acquire_dquot(struct dquot *dquot) |
5152 | { | 5147 | { |
5153 | int ret, err; | 5148 | int ret, err; |
5154 | handle_t *handle; | 5149 | handle_t *handle; |
5155 | 5150 | ||
5156 | handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, | 5151 | handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, |
5157 | EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); | 5152 | EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); |
5158 | if (IS_ERR(handle)) | 5153 | if (IS_ERR(handle)) |
5159 | return PTR_ERR(handle); | 5154 | return PTR_ERR(handle); |
5160 | ret = dquot_acquire(dquot); | 5155 | ret = dquot_acquire(dquot); |
5161 | err = ext4_journal_stop(handle); | 5156 | err = ext4_journal_stop(handle); |
5162 | if (!ret) | 5157 | if (!ret) |
5163 | ret = err; | 5158 | ret = err; |
5164 | return ret; | 5159 | return ret; |
5165 | } | 5160 | } |
5166 | 5161 | ||
5167 | static int ext4_release_dquot(struct dquot *dquot) | 5162 | static int ext4_release_dquot(struct dquot *dquot) |
5168 | { | 5163 | { |
5169 | int ret, err; | 5164 | int ret, err; |
5170 | handle_t *handle; | 5165 | handle_t *handle; |
5171 | 5166 | ||
5172 | handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, | 5167 | handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA, |
5173 | EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); | 5168 | EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); |
5174 | if (IS_ERR(handle)) { | 5169 | if (IS_ERR(handle)) { |
5175 | /* Release dquot anyway to avoid endless cycle in dqput() */ | 5170 | /* Release dquot anyway to avoid endless cycle in dqput() */ |
5176 | dquot_release(dquot); | 5171 | dquot_release(dquot); |
5177 | return PTR_ERR(handle); | 5172 | return PTR_ERR(handle); |
5178 | } | 5173 | } |
5179 | ret = dquot_release(dquot); | 5174 | ret = dquot_release(dquot); |
5180 | err = ext4_journal_stop(handle); | 5175 | err = ext4_journal_stop(handle); |
5181 | if (!ret) | 5176 | if (!ret) |
5182 | ret = err; | 5177 | ret = err; |
5183 | return ret; | 5178 | return ret; |
5184 | } | 5179 | } |
5185 | 5180 | ||
5186 | static int ext4_mark_dquot_dirty(struct dquot *dquot) | 5181 | static int ext4_mark_dquot_dirty(struct dquot *dquot) |
5187 | { | 5182 | { |
5188 | struct super_block *sb = dquot->dq_sb; | 5183 | struct super_block *sb = dquot->dq_sb; |
5189 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 5184 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
5190 | 5185 | ||
5191 | /* Are we journaling quotas? */ | 5186 | /* Are we journaling quotas? */ |
5192 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || | 5187 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || |
5193 | sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { | 5188 | sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { |
5194 | dquot_mark_dquot_dirty(dquot); | 5189 | dquot_mark_dquot_dirty(dquot); |
5195 | return ext4_write_dquot(dquot); | 5190 | return ext4_write_dquot(dquot); |
5196 | } else { | 5191 | } else { |
5197 | return dquot_mark_dquot_dirty(dquot); | 5192 | return dquot_mark_dquot_dirty(dquot); |
5198 | } | 5193 | } |
5199 | } | 5194 | } |
5200 | 5195 | ||
5201 | static int ext4_write_info(struct super_block *sb, int type) | 5196 | static int ext4_write_info(struct super_block *sb, int type) |
5202 | { | 5197 | { |
5203 | int ret, err; | 5198 | int ret, err; |
5204 | handle_t *handle; | 5199 | handle_t *handle; |
5205 | 5200 | ||
5206 | /* Data block + inode block */ | 5201 | /* Data block + inode block */ |
5207 | handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2); | 5202 | handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2); |
5208 | if (IS_ERR(handle)) | 5203 | if (IS_ERR(handle)) |
5209 | return PTR_ERR(handle); | 5204 | return PTR_ERR(handle); |
5210 | ret = dquot_commit_info(sb, type); | 5205 | ret = dquot_commit_info(sb, type); |
5211 | err = ext4_journal_stop(handle); | 5206 | err = ext4_journal_stop(handle); |
5212 | if (!ret) | 5207 | if (!ret) |
5213 | ret = err; | 5208 | ret = err; |
5214 | return ret; | 5209 | return ret; |
5215 | } | 5210 | } |
5216 | 5211 | ||
5217 | /* | 5212 | /* |
5218 | * Turn on quotas during mount time - we need to find | 5213 | * Turn on quotas during mount time - we need to find |
5219 | * the quota file and such... | 5214 | * the quota file and such... |
5220 | */ | 5215 | */ |
5221 | static int ext4_quota_on_mount(struct super_block *sb, int type) | 5216 | static int ext4_quota_on_mount(struct super_block *sb, int type) |
5222 | { | 5217 | { |
5223 | return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], | 5218 | return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], |
5224 | EXT4_SB(sb)->s_jquota_fmt, type); | 5219 | EXT4_SB(sb)->s_jquota_fmt, type); |
5225 | } | 5220 | } |
5226 | 5221 | ||
5227 | /* | 5222 | /* |
5228 | * Standard function to be called on quota_on | 5223 | * Standard function to be called on quota_on |
5229 | */ | 5224 | */ |
5230 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 5225 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
5231 | struct path *path) | 5226 | struct path *path) |
5232 | { | 5227 | { |
5233 | int err; | 5228 | int err; |
5234 | 5229 | ||
5235 | if (!test_opt(sb, QUOTA)) | 5230 | if (!test_opt(sb, QUOTA)) |
5236 | return -EINVAL; | 5231 | return -EINVAL; |
5237 | 5232 | ||
5238 | /* Quotafile not on the same filesystem? */ | 5233 | /* Quotafile not on the same filesystem? */ |
5239 | if (path->dentry->d_sb != sb) | 5234 | if (path->dentry->d_sb != sb) |
5240 | return -EXDEV; | 5235 | return -EXDEV; |
5241 | /* Journaling quota? */ | 5236 | /* Journaling quota? */ |
5242 | if (EXT4_SB(sb)->s_qf_names[type]) { | 5237 | if (EXT4_SB(sb)->s_qf_names[type]) { |
5243 | /* Quotafile not in fs root? */ | 5238 | /* Quotafile not in fs root? */ |
5244 | if (path->dentry->d_parent != sb->s_root) | 5239 | if (path->dentry->d_parent != sb->s_root) |
5245 | ext4_msg(sb, KERN_WARNING, | 5240 | ext4_msg(sb, KERN_WARNING, |
5246 | "Quota file not on filesystem root. " | 5241 | "Quota file not on filesystem root. " |
5247 | "Journaled quota will not work"); | 5242 | "Journaled quota will not work"); |
5248 | } | 5243 | } |
5249 | 5244 | ||
5250 | /* | 5245 | /* |
5251 | * When we journal data on quota file, we have to flush journal to see | 5246 | * When we journal data on quota file, we have to flush journal to see |
5252 | * all updates to the file when we bypass pagecache... | 5247 | * all updates to the file when we bypass pagecache... |
5253 | */ | 5248 | */ |
5254 | if (EXT4_SB(sb)->s_journal && | 5249 | if (EXT4_SB(sb)->s_journal && |
5255 | ext4_should_journal_data(path->dentry->d_inode)) { | 5250 | ext4_should_journal_data(path->dentry->d_inode)) { |
5256 | /* | 5251 | /* |
5257 | * We don't need to lock updates but journal_flush() could | 5252 | * We don't need to lock updates but journal_flush() could |
5258 | * otherwise be livelocked... | 5253 | * otherwise be livelocked... |
5259 | */ | 5254 | */ |
5260 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 5255 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
5261 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 5256 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
5262 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 5257 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
5263 | if (err) | 5258 | if (err) |
5264 | return err; | 5259 | return err; |
5265 | } | 5260 | } |
5266 | 5261 | ||
5267 | return dquot_quota_on(sb, type, format_id, path); | 5262 | return dquot_quota_on(sb, type, format_id, path); |
5268 | } | 5263 | } |
5269 | 5264 | ||
5270 | static int ext4_quota_enable(struct super_block *sb, int type, int format_id, | 5265 | static int ext4_quota_enable(struct super_block *sb, int type, int format_id, |
5271 | unsigned int flags) | 5266 | unsigned int flags) |
5272 | { | 5267 | { |
5273 | int err; | 5268 | int err; |
5274 | struct inode *qf_inode; | 5269 | struct inode *qf_inode; |
5275 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { | 5270 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { |
5276 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), | 5271 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), |
5277 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) | 5272 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) |
5278 | }; | 5273 | }; |
5279 | 5274 | ||
5280 | BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); | 5275 | BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); |
5281 | 5276 | ||
5282 | if (!qf_inums[type]) | 5277 | if (!qf_inums[type]) |
5283 | return -EPERM; | 5278 | return -EPERM; |
5284 | 5279 | ||
5285 | qf_inode = ext4_iget(sb, qf_inums[type]); | 5280 | qf_inode = ext4_iget(sb, qf_inums[type]); |
5286 | if (IS_ERR(qf_inode)) { | 5281 | if (IS_ERR(qf_inode)) { |
5287 | ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); | 5282 | ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); |
5288 | return PTR_ERR(qf_inode); | 5283 | return PTR_ERR(qf_inode); |
5289 | } | 5284 | } |
5290 | 5285 | ||
5291 | /* Don't account quota for quota files to avoid recursion */ | 5286 | /* Don't account quota for quota files to avoid recursion */ |
5292 | qf_inode->i_flags |= S_NOQUOTA; | 5287 | qf_inode->i_flags |= S_NOQUOTA; |
5293 | err = dquot_enable(qf_inode, type, format_id, flags); | 5288 | err = dquot_enable(qf_inode, type, format_id, flags); |
5294 | iput(qf_inode); | 5289 | iput(qf_inode); |
5295 | 5290 | ||
5296 | return err; | 5291 | return err; |
5297 | } | 5292 | } |
5298 | 5293 | ||
5299 | /* Enable usage tracking for all quota types. */ | 5294 | /* Enable usage tracking for all quota types. */ |
5300 | static int ext4_enable_quotas(struct super_block *sb) | 5295 | static int ext4_enable_quotas(struct super_block *sb) |
5301 | { | 5296 | { |
5302 | int type, err = 0; | 5297 | int type, err = 0; |
5303 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { | 5298 | unsigned long qf_inums[EXT4_MAXQUOTAS] = { |
5304 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), | 5299 | le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), |
5305 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) | 5300 | le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) |
5306 | }; | 5301 | }; |
5307 | 5302 | ||
5308 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; | 5303 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; |
5309 | for (type = 0; type < EXT4_MAXQUOTAS; type++) { | 5304 | for (type = 0; type < EXT4_MAXQUOTAS; type++) { |
5310 | if (qf_inums[type]) { | 5305 | if (qf_inums[type]) { |
5311 | err = ext4_quota_enable(sb, type, QFMT_VFS_V1, | 5306 | err = ext4_quota_enable(sb, type, QFMT_VFS_V1, |
5312 | DQUOT_USAGE_ENABLED); | 5307 | DQUOT_USAGE_ENABLED); |
5313 | if (err) { | 5308 | if (err) { |
5314 | ext4_warning(sb, | 5309 | ext4_warning(sb, |
5315 | "Failed to enable quota tracking " | 5310 | "Failed to enable quota tracking " |
5316 | "(type=%d, err=%d). Please run " | 5311 | "(type=%d, err=%d). Please run " |
5317 | "e2fsck to fix.", type, err); | 5312 | "e2fsck to fix.", type, err); |
5318 | return err; | 5313 | return err; |
5319 | } | 5314 | } |
5320 | } | 5315 | } |
5321 | } | 5316 | } |
5322 | return 0; | 5317 | return 0; |
5323 | } | 5318 | } |
5324 | 5319 | ||
5325 | static int ext4_quota_off(struct super_block *sb, int type) | 5320 | static int ext4_quota_off(struct super_block *sb, int type) |
5326 | { | 5321 | { |
5327 | struct inode *inode = sb_dqopt(sb)->files[type]; | 5322 | struct inode *inode = sb_dqopt(sb)->files[type]; |
5328 | handle_t *handle; | 5323 | handle_t *handle; |
5329 | 5324 | ||
5330 | /* Force all delayed allocation blocks to be allocated. | 5325 | /* Force all delayed allocation blocks to be allocated. |
5331 | * Caller already holds s_umount sem */ | 5326 | * Caller already holds s_umount sem */ |
5332 | if (test_opt(sb, DELALLOC)) | 5327 | if (test_opt(sb, DELALLOC)) |
5333 | sync_filesystem(sb); | 5328 | sync_filesystem(sb); |
5334 | 5329 | ||
5335 | if (!inode) | 5330 | if (!inode) |
5336 | goto out; | 5331 | goto out; |
5337 | 5332 | ||
5338 | /* Update modification times of quota files when userspace can | 5333 | /* Update modification times of quota files when userspace can |
5339 | * start looking at them */ | 5334 | * start looking at them */ |
5340 | handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); | 5335 | handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); |
5341 | if (IS_ERR(handle)) | 5336 | if (IS_ERR(handle)) |
5342 | goto out; | 5337 | goto out; |
5343 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 5338 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
5344 | ext4_mark_inode_dirty(handle, inode); | 5339 | ext4_mark_inode_dirty(handle, inode); |
5345 | ext4_journal_stop(handle); | 5340 | ext4_journal_stop(handle); |
5346 | 5341 | ||
5347 | out: | 5342 | out: |
5348 | return dquot_quota_off(sb, type); | 5343 | return dquot_quota_off(sb, type); |
5349 | } | 5344 | } |
5350 | 5345 | ||
5351 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | 5346 | /* Read data from quotafile - avoid pagecache and such because we cannot afford |
5352 | * acquiring the locks... As quota files are never truncated and quota code | 5347 | * acquiring the locks... As quota files are never truncated and quota code |
5353 | * itself serializes the operations (and no one else should touch the files) | 5348 | * itself serializes the operations (and no one else should touch the files) |
5354 | * we don't have to be afraid of races */ | 5349 | * we don't have to be afraid of races */ |
5355 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 5350 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
5356 | size_t len, loff_t off) | 5351 | size_t len, loff_t off) |
5357 | { | 5352 | { |
5358 | struct inode *inode = sb_dqopt(sb)->files[type]; | 5353 | struct inode *inode = sb_dqopt(sb)->files[type]; |
5359 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 5354 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
5360 | int offset = off & (sb->s_blocksize - 1); | 5355 | int offset = off & (sb->s_blocksize - 1); |
5361 | int tocopy; | 5356 | int tocopy; |
5362 | size_t toread; | 5357 | size_t toread; |
5363 | struct buffer_head *bh; | 5358 | struct buffer_head *bh; |
5364 | loff_t i_size = i_size_read(inode); | 5359 | loff_t i_size = i_size_read(inode); |
5365 | 5360 | ||
5366 | if (off > i_size) | 5361 | if (off > i_size) |
5367 | return 0; | 5362 | return 0; |
5368 | if (off+len > i_size) | 5363 | if (off+len > i_size) |
5369 | len = i_size-off; | 5364 | len = i_size-off; |
5370 | toread = len; | 5365 | toread = len; |
5371 | while (toread > 0) { | 5366 | while (toread > 0) { |
5372 | tocopy = sb->s_blocksize - offset < toread ? | 5367 | tocopy = sb->s_blocksize - offset < toread ? |
5373 | sb->s_blocksize - offset : toread; | 5368 | sb->s_blocksize - offset : toread; |
5374 | bh = ext4_bread(NULL, inode, blk, 0); | 5369 | bh = ext4_bread(NULL, inode, blk, 0); |
5375 | if (IS_ERR(bh)) | 5370 | if (IS_ERR(bh)) |
5376 | return PTR_ERR(bh); | 5371 | return PTR_ERR(bh); |
5377 | if (!bh) /* A hole? */ | 5372 | if (!bh) /* A hole? */ |
5378 | memset(data, 0, tocopy); | 5373 | memset(data, 0, tocopy); |
5379 | else | 5374 | else |
5380 | memcpy(data, bh->b_data+offset, tocopy); | 5375 | memcpy(data, bh->b_data+offset, tocopy); |
5381 | brelse(bh); | 5376 | brelse(bh); |
5382 | offset = 0; | 5377 | offset = 0; |
5383 | toread -= tocopy; | 5378 | toread -= tocopy; |
5384 | data += tocopy; | 5379 | data += tocopy; |
5385 | blk++; | 5380 | blk++; |
5386 | } | 5381 | } |
5387 | return len; | 5382 | return len; |
5388 | } | 5383 | } |
5389 | 5384 | ||
5390 | /* Write to quotafile (we know the transaction is already started and has | 5385 | /* Write to quotafile (we know the transaction is already started and has |
5391 | * enough credits) */ | 5386 | * enough credits) */ |
5392 | static ssize_t ext4_quota_write(struct super_block *sb, int type, | 5387 | static ssize_t ext4_quota_write(struct super_block *sb, int type, |
5393 | const char *data, size_t len, loff_t off) | 5388 | const char *data, size_t len, loff_t off) |
5394 | { | 5389 | { |
5395 | struct inode *inode = sb_dqopt(sb)->files[type]; | 5390 | struct inode *inode = sb_dqopt(sb)->files[type]; |
5396 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 5391 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
5397 | int err, offset = off & (sb->s_blocksize - 1); | 5392 | int err, offset = off & (sb->s_blocksize - 1); |
5398 | struct buffer_head *bh; | 5393 | struct buffer_head *bh; |
5399 | handle_t *handle = journal_current_handle(); | 5394 | handle_t *handle = journal_current_handle(); |
5400 | 5395 | ||
5401 | if (EXT4_SB(sb)->s_journal && !handle) { | 5396 | if (EXT4_SB(sb)->s_journal && !handle) { |
5402 | ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" | 5397 | ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" |
5403 | " cancelled because transaction is not started", | 5398 | " cancelled because transaction is not started", |
5404 | (unsigned long long)off, (unsigned long long)len); | 5399 | (unsigned long long)off, (unsigned long long)len); |
5405 | return -EIO; | 5400 | return -EIO; |
5406 | } | 5401 | } |
5407 | /* | 5402 | /* |
5408 | * Since we account only one data block in transaction credits, | 5403 | * Since we account only one data block in transaction credits, |
5409 | * then it is impossible to cross a block boundary. | 5404 | * then it is impossible to cross a block boundary. |
5410 | */ | 5405 | */ |
5411 | if (sb->s_blocksize - offset < len) { | 5406 | if (sb->s_blocksize - offset < len) { |
5412 | ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" | 5407 | ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" |
5413 | " cancelled because not block aligned", | 5408 | " cancelled because not block aligned", |
5414 | (unsigned long long)off, (unsigned long long)len); | 5409 | (unsigned long long)off, (unsigned long long)len); |
5415 | return -EIO; | 5410 | return -EIO; |
5416 | } | 5411 | } |
5417 | 5412 | ||
5418 | bh = ext4_bread(handle, inode, blk, 1); | 5413 | bh = ext4_bread(handle, inode, blk, 1); |
5419 | if (IS_ERR(bh)) | 5414 | if (IS_ERR(bh)) |
5420 | return PTR_ERR(bh); | 5415 | return PTR_ERR(bh); |
5421 | if (!bh) | 5416 | if (!bh) |
5422 | goto out; | 5417 | goto out; |
5423 | BUFFER_TRACE(bh, "get write access"); | 5418 | BUFFER_TRACE(bh, "get write access"); |
5424 | err = ext4_journal_get_write_access(handle, bh); | 5419 | err = ext4_journal_get_write_access(handle, bh); |
5425 | if (err) { | 5420 | if (err) { |
5426 | brelse(bh); | 5421 | brelse(bh); |
5427 | return err; | 5422 | return err; |
5428 | } | 5423 | } |
5429 | lock_buffer(bh); | 5424 | lock_buffer(bh); |
5430 | memcpy(bh->b_data+offset, data, len); | 5425 | memcpy(bh->b_data+offset, data, len); |
5431 | flush_dcache_page(bh->b_page); | 5426 | flush_dcache_page(bh->b_page); |
5432 | unlock_buffer(bh); | 5427 | unlock_buffer(bh); |
5433 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | 5428 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
5434 | brelse(bh); | 5429 | brelse(bh); |
5435 | out: | 5430 | out: |
5436 | if (inode->i_size < off + len) { | 5431 | if (inode->i_size < off + len) { |
5437 | i_size_write(inode, off + len); | 5432 | i_size_write(inode, off + len); |
5438 | EXT4_I(inode)->i_disksize = inode->i_size; | 5433 | EXT4_I(inode)->i_disksize = inode->i_size; |
5439 | ext4_mark_inode_dirty(handle, inode); | 5434 | ext4_mark_inode_dirty(handle, inode); |
5440 | } | 5435 | } |
5441 | return len; | 5436 | return len; |
5442 | } | 5437 | } |
5443 | 5438 | ||
5444 | #endif | 5439 | #endif |
5445 | 5440 | ||
5446 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, | 5441 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
5447 | const char *dev_name, void *data) | 5442 | const char *dev_name, void *data) |
5448 | { | 5443 | { |
5449 | return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); | 5444 | return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); |
5450 | } | 5445 | } |
5451 | 5446 | ||
5452 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 5447 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
5453 | static inline void register_as_ext2(void) | 5448 | static inline void register_as_ext2(void) |
5454 | { | 5449 | { |
5455 | int err = register_filesystem(&ext2_fs_type); | 5450 | int err = register_filesystem(&ext2_fs_type); |
5456 | if (err) | 5451 | if (err) |
5457 | printk(KERN_WARNING | 5452 | printk(KERN_WARNING |
5458 | "EXT4-fs: Unable to register as ext2 (%d)\n", err); | 5453 | "EXT4-fs: Unable to register as ext2 (%d)\n", err); |
5459 | } | 5454 | } |
5460 | 5455 | ||
5461 | static inline void unregister_as_ext2(void) | 5456 | static inline void unregister_as_ext2(void) |
5462 | { | 5457 | { |
5463 | unregister_filesystem(&ext2_fs_type); | 5458 | unregister_filesystem(&ext2_fs_type); |
5464 | } | 5459 | } |
5465 | 5460 | ||
5466 | static inline int ext2_feature_set_ok(struct super_block *sb) | 5461 | static inline int ext2_feature_set_ok(struct super_block *sb) |
5467 | { | 5462 | { |
5468 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) | 5463 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) |
5469 | return 0; | 5464 | return 0; |
5470 | if (sb->s_flags & MS_RDONLY) | 5465 | if (sb->s_flags & MS_RDONLY) |
5471 | return 1; | 5466 | return 1; |
5472 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) | 5467 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) |
5473 | return 0; | 5468 | return 0; |
5474 | return 1; | 5469 | return 1; |
5475 | } | 5470 | } |
5476 | #else | 5471 | #else |
5477 | static inline void register_as_ext2(void) { } | 5472 | static inline void register_as_ext2(void) { } |
5478 | static inline void unregister_as_ext2(void) { } | 5473 | static inline void unregister_as_ext2(void) { } |
5479 | static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } | 5474 | static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } |
5480 | #endif | 5475 | #endif |
5481 | 5476 | ||
5482 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 5477 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
5483 | static inline void register_as_ext3(void) | 5478 | static inline void register_as_ext3(void) |
5484 | { | 5479 | { |
5485 | int err = register_filesystem(&ext3_fs_type); | 5480 | int err = register_filesystem(&ext3_fs_type); |
5486 | if (err) | 5481 | if (err) |
5487 | printk(KERN_WARNING | 5482 | printk(KERN_WARNING |
5488 | "EXT4-fs: Unable to register as ext3 (%d)\n", err); | 5483 | "EXT4-fs: Unable to register as ext3 (%d)\n", err); |
5489 | } | 5484 | } |
5490 | 5485 | ||
5491 | static inline void unregister_as_ext3(void) | 5486 | static inline void unregister_as_ext3(void) |
5492 | { | 5487 | { |
5493 | unregister_filesystem(&ext3_fs_type); | 5488 | unregister_filesystem(&ext3_fs_type); |
5494 | } | 5489 | } |
5495 | 5490 | ||
5496 | static inline int ext3_feature_set_ok(struct super_block *sb) | 5491 | static inline int ext3_feature_set_ok(struct super_block *sb) |
5497 | { | 5492 | { |
5498 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) | 5493 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) |
5499 | return 0; | 5494 | return 0; |
5500 | if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) | 5495 | if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) |
5501 | return 0; | 5496 | return 0; |
5502 | if (sb->s_flags & MS_RDONLY) | 5497 | if (sb->s_flags & MS_RDONLY) |
5503 | return 1; | 5498 | return 1; |
5504 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) | 5499 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) |
5505 | return 0; | 5500 | return 0; |
5506 | return 1; | 5501 | return 1; |
5507 | } | 5502 | } |
5508 | #else | 5503 | #else |
5509 | static inline void register_as_ext3(void) { } | 5504 | static inline void register_as_ext3(void) { } |
5510 | static inline void unregister_as_ext3(void) { } | 5505 | static inline void unregister_as_ext3(void) { } |
5511 | static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } | 5506 | static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } |
5512 | #endif | 5507 | #endif |
5513 | 5508 | ||
5514 | static struct file_system_type ext4_fs_type = { | 5509 | static struct file_system_type ext4_fs_type = { |
5515 | .owner = THIS_MODULE, | 5510 | .owner = THIS_MODULE, |
5516 | .name = "ext4", | 5511 | .name = "ext4", |
5517 | .mount = ext4_mount, | 5512 | .mount = ext4_mount, |
5518 | .kill_sb = kill_block_super, | 5513 | .kill_sb = kill_block_super, |
5519 | .fs_flags = FS_REQUIRES_DEV, | 5514 | .fs_flags = FS_REQUIRES_DEV, |
5520 | }; | 5515 | }; |
5521 | MODULE_ALIAS_FS("ext4"); | 5516 | MODULE_ALIAS_FS("ext4"); |
5522 | 5517 | ||
5523 | static int __init ext4_init_feat_adverts(void) | 5518 | static int __init ext4_init_feat_adverts(void) |
5524 | { | 5519 | { |
5525 | struct ext4_features *ef; | 5520 | struct ext4_features *ef; |
5526 | int ret = -ENOMEM; | 5521 | int ret = -ENOMEM; |
5527 | 5522 | ||
5528 | ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); | 5523 | ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); |
5529 | if (!ef) | 5524 | if (!ef) |
5530 | goto out; | 5525 | goto out; |
5531 | 5526 | ||
5532 | ef->f_kobj.kset = ext4_kset; | 5527 | ef->f_kobj.kset = ext4_kset; |
5533 | init_completion(&ef->f_kobj_unregister); | 5528 | init_completion(&ef->f_kobj_unregister); |
5534 | ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, | 5529 | ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, |
5535 | "features"); | 5530 | "features"); |
5536 | if (ret) { | 5531 | if (ret) { |
5537 | kfree(ef); | 5532 | kfree(ef); |
5538 | goto out; | 5533 | goto out; |
5539 | } | 5534 | } |
5540 | 5535 | ||
5541 | ext4_feat = ef; | 5536 | ext4_feat = ef; |
5542 | ret = 0; | 5537 | ret = 0; |
5543 | out: | 5538 | out: |
5544 | return ret; | 5539 | return ret; |
5545 | } | 5540 | } |
5546 | 5541 | ||
5547 | static void ext4_exit_feat_adverts(void) | 5542 | static void ext4_exit_feat_adverts(void) |
5548 | { | 5543 | { |
5549 | kobject_put(&ext4_feat->f_kobj); | 5544 | kobject_put(&ext4_feat->f_kobj); |
5550 | wait_for_completion(&ext4_feat->f_kobj_unregister); | 5545 | wait_for_completion(&ext4_feat->f_kobj_unregister); |
5551 | kfree(ext4_feat); | 5546 | kfree(ext4_feat); |
5552 | } | 5547 | } |
5553 | 5548 | ||
5554 | /* Shared across all ext4 file systems */ | 5549 | /* Shared across all ext4 file systems */ |
5555 | wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | 5550 | wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; |
5556 | struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | 5551 | struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; |
5557 | 5552 | ||
5558 | static int __init ext4_init_fs(void) | 5553 | static int __init ext4_init_fs(void) |
5559 | { | 5554 | { |
5560 | int i, err; | 5555 | int i, err; |
5561 | 5556 | ||
5562 | ext4_li_info = NULL; | 5557 | ext4_li_info = NULL; |
5563 | mutex_init(&ext4_li_mtx); | 5558 | mutex_init(&ext4_li_mtx); |
5564 | 5559 | ||
5565 | /* Build-time check for flags consistency */ | 5560 | /* Build-time check for flags consistency */ |
5566 | ext4_check_flag_values(); | 5561 | ext4_check_flag_values(); |
5567 | 5562 | ||
5568 | for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { | 5563 | for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { |
5569 | mutex_init(&ext4__aio_mutex[i]); | 5564 | mutex_init(&ext4__aio_mutex[i]); |
5570 | init_waitqueue_head(&ext4__ioend_wq[i]); | 5565 | init_waitqueue_head(&ext4__ioend_wq[i]); |
5571 | } | 5566 | } |
5572 | 5567 | ||
5573 | err = ext4_init_es(); | 5568 | err = ext4_init_es(); |
5574 | if (err) | 5569 | if (err) |
5575 | return err; | 5570 | return err; |
5576 | 5571 | ||
5577 | err = ext4_init_pageio(); | 5572 | err = ext4_init_pageio(); |
5578 | if (err) | 5573 | if (err) |
5579 | goto out7; | 5574 | goto out7; |
5580 | 5575 | ||
5581 | err = ext4_init_system_zone(); | 5576 | err = ext4_init_system_zone(); |
5582 | if (err) | 5577 | if (err) |
5583 | goto out6; | 5578 | goto out6; |
5584 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 5579 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
5585 | if (!ext4_kset) { | 5580 | if (!ext4_kset) { |
5586 | err = -ENOMEM; | 5581 | err = -ENOMEM; |
5587 | goto out5; | 5582 | goto out5; |
5588 | } | 5583 | } |
5589 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 5584 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
5590 | 5585 | ||
5591 | err = ext4_init_feat_adverts(); | 5586 | err = ext4_init_feat_adverts(); |
5592 | if (err) | 5587 | if (err) |
5593 | goto out4; | 5588 | goto out4; |
5594 | 5589 | ||
5595 | err = ext4_init_mballoc(); | 5590 | err = ext4_init_mballoc(); |
5596 | if (err) | 5591 | if (err) |
5597 | goto out2; | 5592 | goto out2; |
5598 | else | 5593 | else |
5599 | ext4_mballoc_ready = 1; | 5594 | ext4_mballoc_ready = 1; |
5600 | err = init_inodecache(); | 5595 | err = init_inodecache(); |
5601 | if (err) | 5596 | if (err) |
5602 | goto out1; | 5597 | goto out1; |
5603 | register_as_ext3(); | 5598 | register_as_ext3(); |
5604 | register_as_ext2(); | 5599 | register_as_ext2(); |
5605 | err = register_filesystem(&ext4_fs_type); | 5600 | err = register_filesystem(&ext4_fs_type); |
5606 | if (err) | 5601 | if (err) |
5607 | goto out; | 5602 | goto out; |
5608 | 5603 | ||
5609 | return 0; | 5604 | return 0; |
5610 | out: | 5605 | out: |
5611 | unregister_as_ext2(); | 5606 | unregister_as_ext2(); |
5612 | unregister_as_ext3(); | 5607 | unregister_as_ext3(); |
5613 | destroy_inodecache(); | 5608 | destroy_inodecache(); |
5614 | out1: | 5609 | out1: |
5615 | ext4_mballoc_ready = 0; | 5610 | ext4_mballoc_ready = 0; |
5616 | ext4_exit_mballoc(); | 5611 | ext4_exit_mballoc(); |
5617 | out2: | 5612 | out2: |
5618 | ext4_exit_feat_adverts(); | 5613 | ext4_exit_feat_adverts(); |
5619 | out4: | 5614 | out4: |
5620 | if (ext4_proc_root) | 5615 | if (ext4_proc_root) |
5621 | remove_proc_entry("fs/ext4", NULL); | 5616 | remove_proc_entry("fs/ext4", NULL); |
5622 | kset_unregister(ext4_kset); | 5617 | kset_unregister(ext4_kset); |
5623 | out5: | 5618 | out5: |
5624 | ext4_exit_system_zone(); | 5619 | ext4_exit_system_zone(); |
5625 | out6: | 5620 | out6: |
5626 | ext4_exit_pageio(); | 5621 | ext4_exit_pageio(); |
5627 | out7: | 5622 | out7: |
5628 | ext4_exit_es(); | 5623 | ext4_exit_es(); |
5629 | 5624 | ||
5630 | return err; | 5625 | return err; |
5631 | } | 5626 | } |
5632 | 5627 | ||
5633 | static void __exit ext4_exit_fs(void) | 5628 | static void __exit ext4_exit_fs(void) |
5634 | { | 5629 | { |
5635 | ext4_destroy_lazyinit_thread(); | 5630 | ext4_destroy_lazyinit_thread(); |
5636 | unregister_as_ext2(); | 5631 | unregister_as_ext2(); |
5637 | unregister_as_ext3(); | 5632 | unregister_as_ext3(); |
5638 | unregister_filesystem(&ext4_fs_type); | 5633 | unregister_filesystem(&ext4_fs_type); |
5639 | destroy_inodecache(); | 5634 | destroy_inodecache(); |
5640 | ext4_exit_mballoc(); | 5635 | ext4_exit_mballoc(); |
5641 | ext4_exit_feat_adverts(); | 5636 | ext4_exit_feat_adverts(); |
5642 | remove_proc_entry("fs/ext4", NULL); | 5637 | remove_proc_entry("fs/ext4", NULL); |
5643 | kset_unregister(ext4_kset); | 5638 | kset_unregister(ext4_kset); |
5644 | ext4_exit_system_zone(); | 5639 | ext4_exit_system_zone(); |
5645 | ext4_exit_pageio(); | 5640 | ext4_exit_pageio(); |
5646 | ext4_exit_es(); | 5641 | ext4_exit_es(); |
fs/jbd2/recovery.c
1 | /* | 1 | /* |
2 | * linux/fs/jbd2/recovery.c | 2 | * linux/fs/jbd2/recovery.c |
3 | * | 3 | * |
4 | * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 | 4 | * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 |
5 | * | 5 | * |
6 | * Copyright 1999-2000 Red Hat Software --- All Rights Reserved | 6 | * Copyright 1999-2000 Red Hat Software --- All Rights Reserved |
7 | * | 7 | * |
8 | * This file is part of the Linux kernel and is made available under | 8 | * This file is part of the Linux kernel and is made available under |
9 | * the terms of the GNU General Public License, version 2, or at your | 9 | * the terms of the GNU General Public License, version 2, or at your |
10 | * option, any later version, incorporated herein by reference. | 10 | * option, any later version, incorporated herein by reference. |
11 | * | 11 | * |
12 | * Journal recovery routines for the generic filesystem journaling code; | 12 | * Journal recovery routines for the generic filesystem journaling code; |
13 | * part of the ext2fs journaling system. | 13 | * part of the ext2fs journaling system. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #ifndef __KERNEL__ | 16 | #ifndef __KERNEL__ |
17 | #include "jfs_user.h" | 17 | #include "jfs_user.h" |
18 | #else | 18 | #else |
19 | #include <linux/time.h> | 19 | #include <linux/time.h> |
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/jbd2.h> | 21 | #include <linux/jbd2.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/crc32.h> | 23 | #include <linux/crc32.h> |
24 | #include <linux/blkdev.h> | 24 | #include <linux/blkdev.h> |
25 | #endif | 25 | #endif |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * Maintain information about the progress of the recovery job, so that | 28 | * Maintain information about the progress of the recovery job, so that |
29 | * the different passes can carry information between them. | 29 | * the different passes can carry information between them. |
30 | */ | 30 | */ |
31 | struct recovery_info | 31 | struct recovery_info |
32 | { | 32 | { |
33 | tid_t start_transaction; | 33 | tid_t start_transaction; |
34 | tid_t end_transaction; | 34 | tid_t end_transaction; |
35 | 35 | ||
36 | int nr_replays; | 36 | int nr_replays; |
37 | int nr_revokes; | 37 | int nr_revokes; |
38 | int nr_revoke_hits; | 38 | int nr_revoke_hits; |
39 | }; | 39 | }; |
40 | 40 | ||
41 | enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; | 41 | enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; |
42 | static int do_one_pass(journal_t *journal, | 42 | static int do_one_pass(journal_t *journal, |
43 | struct recovery_info *info, enum passtype pass); | 43 | struct recovery_info *info, enum passtype pass); |
44 | static int scan_revoke_records(journal_t *, struct buffer_head *, | 44 | static int scan_revoke_records(journal_t *, struct buffer_head *, |
45 | tid_t, struct recovery_info *); | 45 | tid_t, struct recovery_info *); |
46 | 46 | ||
47 | #ifdef __KERNEL__ | 47 | #ifdef __KERNEL__ |
48 | 48 | ||
49 | /* Release readahead buffers after use */ | 49 | /* Release readahead buffers after use */ |
50 | static void journal_brelse_array(struct buffer_head *b[], int n) | 50 | static void journal_brelse_array(struct buffer_head *b[], int n) |
51 | { | 51 | { |
52 | while (--n >= 0) | 52 | while (--n >= 0) |
53 | brelse (b[n]); | 53 | brelse (b[n]); |
54 | } | 54 | } |
55 | 55 | ||
56 | 56 | ||
57 | /* | 57 | /* |
58 | * When reading from the journal, we are going through the block device | 58 | * When reading from the journal, we are going through the block device |
59 | * layer directly and so there is no readahead being done for us. We | 59 | * layer directly and so there is no readahead being done for us. We |
60 | * need to implement any readahead ourselves if we want it to happen at | 60 | * need to implement any readahead ourselves if we want it to happen at |
61 | * all. Recovery is basically one long sequential read, so make sure we | 61 | * all. Recovery is basically one long sequential read, so make sure we |
62 | * do the IO in reasonably large chunks. | 62 | * do the IO in reasonably large chunks. |
63 | * | 63 | * |
64 | * This is not so critical that we need to be enormously clever about | 64 | * This is not so critical that we need to be enormously clever about |
65 | * the readahead size, though. 128K is a purely arbitrary, good-enough | 65 | * the readahead size, though. 128K is a purely arbitrary, good-enough |
66 | * fixed value. | 66 | * fixed value. |
67 | */ | 67 | */ |
68 | 68 | ||
69 | #define MAXBUF 8 | 69 | #define MAXBUF 8 |
70 | static int do_readahead(journal_t *journal, unsigned int start) | 70 | static int do_readahead(journal_t *journal, unsigned int start) |
71 | { | 71 | { |
72 | int err; | 72 | int err; |
73 | unsigned int max, nbufs, next; | 73 | unsigned int max, nbufs, next; |
74 | unsigned long long blocknr; | 74 | unsigned long long blocknr; |
75 | struct buffer_head *bh; | 75 | struct buffer_head *bh; |
76 | 76 | ||
77 | struct buffer_head * bufs[MAXBUF]; | 77 | struct buffer_head * bufs[MAXBUF]; |
78 | 78 | ||
79 | /* Do up to 128K of readahead */ | 79 | /* Do up to 128K of readahead */ |
80 | max = start + (128 * 1024 / journal->j_blocksize); | 80 | max = start + (128 * 1024 / journal->j_blocksize); |
81 | if (max > journal->j_maxlen) | 81 | if (max > journal->j_maxlen) |
82 | max = journal->j_maxlen; | 82 | max = journal->j_maxlen; |
83 | 83 | ||
84 | /* Do the readahead itself. We'll submit MAXBUF buffer_heads at | 84 | /* Do the readahead itself. We'll submit MAXBUF buffer_heads at |
85 | * a time to the block device IO layer. */ | 85 | * a time to the block device IO layer. */ |
86 | 86 | ||
87 | nbufs = 0; | 87 | nbufs = 0; |
88 | 88 | ||
89 | for (next = start; next < max; next++) { | 89 | for (next = start; next < max; next++) { |
90 | err = jbd2_journal_bmap(journal, next, &blocknr); | 90 | err = jbd2_journal_bmap(journal, next, &blocknr); |
91 | 91 | ||
92 | if (err) { | 92 | if (err) { |
93 | printk(KERN_ERR "JBD2: bad block at offset %u\n", | 93 | printk(KERN_ERR "JBD2: bad block at offset %u\n", |
94 | next); | 94 | next); |
95 | goto failed; | 95 | goto failed; |
96 | } | 96 | } |
97 | 97 | ||
98 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 98 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); |
99 | if (!bh) { | 99 | if (!bh) { |
100 | err = -ENOMEM; | 100 | err = -ENOMEM; |
101 | goto failed; | 101 | goto failed; |
102 | } | 102 | } |
103 | 103 | ||
104 | if (!buffer_uptodate(bh) && !buffer_locked(bh)) { | 104 | if (!buffer_uptodate(bh) && !buffer_locked(bh)) { |
105 | bufs[nbufs++] = bh; | 105 | bufs[nbufs++] = bh; |
106 | if (nbufs == MAXBUF) { | 106 | if (nbufs == MAXBUF) { |
107 | ll_rw_block(READ, nbufs, bufs); | 107 | ll_rw_block(READ, nbufs, bufs); |
108 | journal_brelse_array(bufs, nbufs); | 108 | journal_brelse_array(bufs, nbufs); |
109 | nbufs = 0; | 109 | nbufs = 0; |
110 | } | 110 | } |
111 | } else | 111 | } else |
112 | brelse(bh); | 112 | brelse(bh); |
113 | } | 113 | } |
114 | 114 | ||
115 | if (nbufs) | 115 | if (nbufs) |
116 | ll_rw_block(READ, nbufs, bufs); | 116 | ll_rw_block(READ, nbufs, bufs); |
117 | err = 0; | 117 | err = 0; |
118 | 118 | ||
119 | failed: | 119 | failed: |
120 | if (nbufs) | 120 | if (nbufs) |
121 | journal_brelse_array(bufs, nbufs); | 121 | journal_brelse_array(bufs, nbufs); |
122 | return err; | 122 | return err; |
123 | } | 123 | } |
124 | 124 | ||
125 | #endif /* __KERNEL__ */ | 125 | #endif /* __KERNEL__ */ |
126 | 126 | ||
127 | 127 | ||
128 | /* | 128 | /* |
129 | * Read a block from the journal | 129 | * Read a block from the journal |
130 | */ | 130 | */ |
131 | 131 | ||
132 | static int jread(struct buffer_head **bhp, journal_t *journal, | 132 | static int jread(struct buffer_head **bhp, journal_t *journal, |
133 | unsigned int offset) | 133 | unsigned int offset) |
134 | { | 134 | { |
135 | int err; | 135 | int err; |
136 | unsigned long long blocknr; | 136 | unsigned long long blocknr; |
137 | struct buffer_head *bh; | 137 | struct buffer_head *bh; |
138 | 138 | ||
139 | *bhp = NULL; | 139 | *bhp = NULL; |
140 | 140 | ||
141 | if (offset >= journal->j_maxlen) { | 141 | if (offset >= journal->j_maxlen) { |
142 | printk(KERN_ERR "JBD2: corrupted journal superblock\n"); | 142 | printk(KERN_ERR "JBD2: corrupted journal superblock\n"); |
143 | return -EIO; | 143 | return -EIO; |
144 | } | 144 | } |
145 | 145 | ||
146 | err = jbd2_journal_bmap(journal, offset, &blocknr); | 146 | err = jbd2_journal_bmap(journal, offset, &blocknr); |
147 | 147 | ||
148 | if (err) { | 148 | if (err) { |
149 | printk(KERN_ERR "JBD2: bad block at offset %u\n", | 149 | printk(KERN_ERR "JBD2: bad block at offset %u\n", |
150 | offset); | 150 | offset); |
151 | return err; | 151 | return err; |
152 | } | 152 | } |
153 | 153 | ||
154 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); | 154 | bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); |
155 | if (!bh) | 155 | if (!bh) |
156 | return -ENOMEM; | 156 | return -ENOMEM; |
157 | 157 | ||
158 | if (!buffer_uptodate(bh)) { | 158 | if (!buffer_uptodate(bh)) { |
159 | /* If this is a brand new buffer, start readahead. | 159 | /* If this is a brand new buffer, start readahead. |
160 | Otherwise, we assume we are already reading it. */ | 160 | Otherwise, we assume we are already reading it. */ |
161 | if (!buffer_req(bh)) | 161 | if (!buffer_req(bh)) |
162 | do_readahead(journal, offset); | 162 | do_readahead(journal, offset); |
163 | wait_on_buffer(bh); | 163 | wait_on_buffer(bh); |
164 | } | 164 | } |
165 | 165 | ||
166 | if (!buffer_uptodate(bh)) { | 166 | if (!buffer_uptodate(bh)) { |
167 | printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", | 167 | printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", |
168 | offset); | 168 | offset); |
169 | brelse(bh); | 169 | brelse(bh); |
170 | return -EIO; | 170 | return -EIO; |
171 | } | 171 | } |
172 | 172 | ||
173 | *bhp = bh; | 173 | *bhp = bh; |
174 | return 0; | 174 | return 0; |
175 | } | 175 | } |
176 | 176 | ||
177 | static int jbd2_descr_block_csum_verify(journal_t *j, | 177 | static int jbd2_descr_block_csum_verify(journal_t *j, |
178 | void *buf) | 178 | void *buf) |
179 | { | 179 | { |
180 | struct jbd2_journal_block_tail *tail; | 180 | struct jbd2_journal_block_tail *tail; |
181 | __be32 provided; | 181 | __be32 provided; |
182 | __u32 calculated; | 182 | __u32 calculated; |
183 | 183 | ||
184 | if (!jbd2_journal_has_csum_v2or3(j)) | 184 | if (!jbd2_journal_has_csum_v2or3(j)) |
185 | return 1; | 185 | return 1; |
186 | 186 | ||
187 | tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - | 187 | tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - |
188 | sizeof(struct jbd2_journal_block_tail)); | 188 | sizeof(struct jbd2_journal_block_tail)); |
189 | provided = tail->t_checksum; | 189 | provided = tail->t_checksum; |
190 | tail->t_checksum = 0; | 190 | tail->t_checksum = 0; |
191 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | 191 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); |
192 | tail->t_checksum = provided; | 192 | tail->t_checksum = provided; |
193 | 193 | ||
194 | return provided == cpu_to_be32(calculated); | 194 | return provided == cpu_to_be32(calculated); |
195 | } | 195 | } |
196 | 196 | ||
197 | /* | 197 | /* |
198 | * Count the number of in-use tags in a journal descriptor block. | 198 | * Count the number of in-use tags in a journal descriptor block. |
199 | */ | 199 | */ |
200 | 200 | ||
201 | static int count_tags(journal_t *journal, struct buffer_head *bh) | 201 | static int count_tags(journal_t *journal, struct buffer_head *bh) |
202 | { | 202 | { |
203 | char * tagp; | 203 | char * tagp; |
204 | journal_block_tag_t * tag; | 204 | journal_block_tag_t * tag; |
205 | int nr = 0, size = journal->j_blocksize; | 205 | int nr = 0, size = journal->j_blocksize; |
206 | int tag_bytes = journal_tag_bytes(journal); | 206 | int tag_bytes = journal_tag_bytes(journal); |
207 | 207 | ||
208 | if (jbd2_journal_has_csum_v2or3(journal)) | 208 | if (jbd2_journal_has_csum_v2or3(journal)) |
209 | size -= sizeof(struct jbd2_journal_block_tail); | 209 | size -= sizeof(struct jbd2_journal_block_tail); |
210 | 210 | ||
211 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 211 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
212 | 212 | ||
213 | while ((tagp - bh->b_data + tag_bytes) <= size) { | 213 | while ((tagp - bh->b_data + tag_bytes) <= size) { |
214 | tag = (journal_block_tag_t *) tagp; | 214 | tag = (journal_block_tag_t *) tagp; |
215 | 215 | ||
216 | nr++; | 216 | nr++; |
217 | tagp += tag_bytes; | 217 | tagp += tag_bytes; |
218 | if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) | 218 | if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) |
219 | tagp += 16; | 219 | tagp += 16; |
220 | 220 | ||
221 | if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) | 221 | if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) |
222 | break; | 222 | break; |
223 | } | 223 | } |
224 | 224 | ||
225 | return nr; | 225 | return nr; |
226 | } | 226 | } |
227 | 227 | ||
228 | 228 | ||
229 | /* Make sure we wrap around the log correctly! */ | 229 | /* Make sure we wrap around the log correctly! */ |
230 | #define wrap(journal, var) \ | 230 | #define wrap(journal, var) \ |
231 | do { \ | 231 | do { \ |
232 | if (var >= (journal)->j_last) \ | 232 | if (var >= (journal)->j_last) \ |
233 | var -= ((journal)->j_last - (journal)->j_first); \ | 233 | var -= ((journal)->j_last - (journal)->j_first); \ |
234 | } while (0) | 234 | } while (0) |
235 | 235 | ||
236 | /** | 236 | /** |
237 | * jbd2_journal_recover - recovers a on-disk journal | 237 | * jbd2_journal_recover - recovers a on-disk journal |
238 | * @journal: the journal to recover | 238 | * @journal: the journal to recover |
239 | * | 239 | * |
240 | * The primary function for recovering the log contents when mounting a | 240 | * The primary function for recovering the log contents when mounting a |
241 | * journaled device. | 241 | * journaled device. |
242 | * | 242 | * |
243 | * Recovery is done in three passes. In the first pass, we look for the | 243 | * Recovery is done in three passes. In the first pass, we look for the |
244 | * end of the log. In the second, we assemble the list of revoke | 244 | * end of the log. In the second, we assemble the list of revoke |
245 | * blocks. In the third and final pass, we replay any un-revoked blocks | 245 | * blocks. In the third and final pass, we replay any un-revoked blocks |
246 | * in the log. | 246 | * in the log. |
247 | */ | 247 | */ |
248 | int jbd2_journal_recover(journal_t *journal) | 248 | int jbd2_journal_recover(journal_t *journal) |
249 | { | 249 | { |
250 | int err, err2; | 250 | int err, err2; |
251 | journal_superblock_t * sb; | 251 | journal_superblock_t * sb; |
252 | 252 | ||
253 | struct recovery_info info; | 253 | struct recovery_info info; |
254 | 254 | ||
255 | memset(&info, 0, sizeof(info)); | 255 | memset(&info, 0, sizeof(info)); |
256 | sb = journal->j_superblock; | 256 | sb = journal->j_superblock; |
257 | 257 | ||
258 | /* | 258 | /* |
259 | * The journal superblock's s_start field (the current log head) | 259 | * The journal superblock's s_start field (the current log head) |
260 | * is always zero if, and only if, the journal was cleanly | 260 | * is always zero if, and only if, the journal was cleanly |
261 | * unmounted. | 261 | * unmounted. |
262 | */ | 262 | */ |
263 | 263 | ||
264 | if (!sb->s_start) { | 264 | if (!sb->s_start) { |
265 | jbd_debug(1, "No recovery required, last transaction %d\n", | 265 | jbd_debug(1, "No recovery required, last transaction %d\n", |
266 | be32_to_cpu(sb->s_sequence)); | 266 | be32_to_cpu(sb->s_sequence)); |
267 | journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; | 267 | journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; |
268 | return 0; | 268 | return 0; |
269 | } | 269 | } |
270 | 270 | ||
271 | err = do_one_pass(journal, &info, PASS_SCAN); | 271 | err = do_one_pass(journal, &info, PASS_SCAN); |
272 | if (!err) | 272 | if (!err) |
273 | err = do_one_pass(journal, &info, PASS_REVOKE); | 273 | err = do_one_pass(journal, &info, PASS_REVOKE); |
274 | if (!err) | 274 | if (!err) |
275 | err = do_one_pass(journal, &info, PASS_REPLAY); | 275 | err = do_one_pass(journal, &info, PASS_REPLAY); |
276 | 276 | ||
277 | jbd_debug(1, "JBD2: recovery, exit status %d, " | 277 | jbd_debug(1, "JBD2: recovery, exit status %d, " |
278 | "recovered transactions %u to %u\n", | 278 | "recovered transactions %u to %u\n", |
279 | err, info.start_transaction, info.end_transaction); | 279 | err, info.start_transaction, info.end_transaction); |
280 | jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", | 280 | jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", |
281 | info.nr_replays, info.nr_revoke_hits, info.nr_revokes); | 281 | info.nr_replays, info.nr_revoke_hits, info.nr_revokes); |
282 | 282 | ||
283 | /* Restart the log at the next transaction ID, thus invalidating | 283 | /* Restart the log at the next transaction ID, thus invalidating |
284 | * any existing commit records in the log. */ | 284 | * any existing commit records in the log. */ |
285 | journal->j_transaction_sequence = ++info.end_transaction; | 285 | journal->j_transaction_sequence = ++info.end_transaction; |
286 | 286 | ||
287 | jbd2_journal_clear_revoke(journal); | 287 | jbd2_journal_clear_revoke(journal); |
288 | err2 = sync_blockdev(journal->j_fs_dev); | 288 | err2 = sync_blockdev(journal->j_fs_dev); |
289 | if (!err) | 289 | if (!err) |
290 | err = err2; | 290 | err = err2; |
291 | /* Make sure all replayed data is on permanent storage */ | 291 | /* Make sure all replayed data is on permanent storage */ |
292 | if (journal->j_flags & JBD2_BARRIER) { | 292 | if (journal->j_flags & JBD2_BARRIER) { |
293 | err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 293 | err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
294 | if (!err) | 294 | if (!err) |
295 | err = err2; | 295 | err = err2; |
296 | } | 296 | } |
297 | return err; | 297 | return err; |
298 | } | 298 | } |
299 | 299 | ||
300 | /** | 300 | /** |
301 | * jbd2_journal_skip_recovery - Start journal and wipe exiting records | 301 | * jbd2_journal_skip_recovery - Start journal and wipe exiting records |
302 | * @journal: journal to startup | 302 | * @journal: journal to startup |
303 | * | 303 | * |
304 | * Locate any valid recovery information from the journal and set up the | 304 | * Locate any valid recovery information from the journal and set up the |
305 | * journal structures in memory to ignore it (presumably because the | 305 | * journal structures in memory to ignore it (presumably because the |
306 | * caller has evidence that it is out of date). | 306 | * caller has evidence that it is out of date). |
307 | * This function does'nt appear to be exorted.. | 307 | * This function does'nt appear to be exorted.. |
308 | * | 308 | * |
309 | * We perform one pass over the journal to allow us to tell the user how | 309 | * We perform one pass over the journal to allow us to tell the user how |
310 | * much recovery information is being erased, and to let us initialise | 310 | * much recovery information is being erased, and to let us initialise |
311 | * the journal transaction sequence numbers to the next unused ID. | 311 | * the journal transaction sequence numbers to the next unused ID. |
312 | */ | 312 | */ |
313 | int jbd2_journal_skip_recovery(journal_t *journal) | 313 | int jbd2_journal_skip_recovery(journal_t *journal) |
314 | { | 314 | { |
315 | int err; | 315 | int err; |
316 | 316 | ||
317 | struct recovery_info info; | 317 | struct recovery_info info; |
318 | 318 | ||
319 | memset (&info, 0, sizeof(info)); | 319 | memset (&info, 0, sizeof(info)); |
320 | 320 | ||
321 | err = do_one_pass(journal, &info, PASS_SCAN); | 321 | err = do_one_pass(journal, &info, PASS_SCAN); |
322 | 322 | ||
323 | if (err) { | 323 | if (err) { |
324 | printk(KERN_ERR "JBD2: error %d scanning journal\n", err); | 324 | printk(KERN_ERR "JBD2: error %d scanning journal\n", err); |
325 | ++journal->j_transaction_sequence; | 325 | ++journal->j_transaction_sequence; |
326 | } else { | 326 | } else { |
327 | #ifdef CONFIG_JBD2_DEBUG | 327 | #ifdef CONFIG_JBD2_DEBUG |
328 | int dropped = info.end_transaction - | 328 | int dropped = info.end_transaction - |
329 | be32_to_cpu(journal->j_superblock->s_sequence); | 329 | be32_to_cpu(journal->j_superblock->s_sequence); |
330 | jbd_debug(1, | 330 | jbd_debug(1, |
331 | "JBD2: ignoring %d transaction%s from the journal.\n", | 331 | "JBD2: ignoring %d transaction%s from the journal.\n", |
332 | dropped, (dropped == 1) ? "" : "s"); | 332 | dropped, (dropped == 1) ? "" : "s"); |
333 | #endif | 333 | #endif |
334 | journal->j_transaction_sequence = ++info.end_transaction; | 334 | journal->j_transaction_sequence = ++info.end_transaction; |
335 | } | 335 | } |
336 | 336 | ||
337 | journal->j_tail = 0; | 337 | journal->j_tail = 0; |
338 | return err; | 338 | return err; |
339 | } | 339 | } |
340 | 340 | ||
341 | static inline unsigned long long read_tag_block(journal_t *journal, | 341 | static inline unsigned long long read_tag_block(journal_t *journal, |
342 | journal_block_tag_t *tag) | 342 | journal_block_tag_t *tag) |
343 | { | 343 | { |
344 | unsigned long long block = be32_to_cpu(tag->t_blocknr); | 344 | unsigned long long block = be32_to_cpu(tag->t_blocknr); |
345 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) | 345 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) |
346 | block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; | 346 | block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; |
347 | return block; | 347 | return block; |
348 | } | 348 | } |
349 | 349 | ||
350 | /* | 350 | /* |
351 | * calc_chksums calculates the checksums for the blocks described in the | 351 | * calc_chksums calculates the checksums for the blocks described in the |
352 | * descriptor block. | 352 | * descriptor block. |
353 | */ | 353 | */ |
354 | static int calc_chksums(journal_t *journal, struct buffer_head *bh, | 354 | static int calc_chksums(journal_t *journal, struct buffer_head *bh, |
355 | unsigned long *next_log_block, __u32 *crc32_sum) | 355 | unsigned long *next_log_block, __u32 *crc32_sum) |
356 | { | 356 | { |
357 | int i, num_blks, err; | 357 | int i, num_blks, err; |
358 | unsigned long io_block; | 358 | unsigned long io_block; |
359 | struct buffer_head *obh; | 359 | struct buffer_head *obh; |
360 | 360 | ||
361 | num_blks = count_tags(journal, bh); | 361 | num_blks = count_tags(journal, bh); |
362 | /* Calculate checksum of the descriptor block. */ | 362 | /* Calculate checksum of the descriptor block. */ |
363 | *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); | 363 | *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); |
364 | 364 | ||
365 | for (i = 0; i < num_blks; i++) { | 365 | for (i = 0; i < num_blks; i++) { |
366 | io_block = (*next_log_block)++; | 366 | io_block = (*next_log_block)++; |
367 | wrap(journal, *next_log_block); | 367 | wrap(journal, *next_log_block); |
368 | err = jread(&obh, journal, io_block); | 368 | err = jread(&obh, journal, io_block); |
369 | if (err) { | 369 | if (err) { |
370 | printk(KERN_ERR "JBD2: IO error %d recovering block " | 370 | printk(KERN_ERR "JBD2: IO error %d recovering block " |
371 | "%lu in log\n", err, io_block); | 371 | "%lu in log\n", err, io_block); |
372 | return 1; | 372 | return 1; |
373 | } else { | 373 | } else { |
374 | *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, | 374 | *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, |
375 | obh->b_size); | 375 | obh->b_size); |
376 | } | 376 | } |
377 | put_bh(obh); | 377 | put_bh(obh); |
378 | } | 378 | } |
379 | return 0; | 379 | return 0; |
380 | } | 380 | } |
381 | 381 | ||
382 | static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | 382 | static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) |
383 | { | 383 | { |
384 | struct commit_header *h; | 384 | struct commit_header *h; |
385 | __be32 provided; | 385 | __be32 provided; |
386 | __u32 calculated; | 386 | __u32 calculated; |
387 | 387 | ||
388 | if (!jbd2_journal_has_csum_v2or3(j)) | 388 | if (!jbd2_journal_has_csum_v2or3(j)) |
389 | return 1; | 389 | return 1; |
390 | 390 | ||
391 | h = buf; | 391 | h = buf; |
392 | provided = h->h_chksum[0]; | 392 | provided = h->h_chksum[0]; |
393 | h->h_chksum[0] = 0; | 393 | h->h_chksum[0] = 0; |
394 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | 394 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); |
395 | h->h_chksum[0] = provided; | 395 | h->h_chksum[0] = provided; |
396 | 396 | ||
397 | return provided == cpu_to_be32(calculated); | 397 | return provided == cpu_to_be32(calculated); |
398 | } | 398 | } |
399 | 399 | ||
400 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, | 400 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, |
401 | void *buf, __u32 sequence) | 401 | void *buf, __u32 sequence) |
402 | { | 402 | { |
403 | journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; | 403 | journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; |
404 | __u32 csum32; | 404 | __u32 csum32; |
405 | __be32 seq; | 405 | __be32 seq; |
406 | 406 | ||
407 | if (!jbd2_journal_has_csum_v2or3(j)) | 407 | if (!jbd2_journal_has_csum_v2or3(j)) |
408 | return 1; | 408 | return 1; |
409 | 409 | ||
410 | seq = cpu_to_be32(sequence); | 410 | seq = cpu_to_be32(sequence); |
411 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); | 411 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); |
412 | csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); | 412 | csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); |
413 | 413 | ||
414 | if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) | 414 | if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) |
415 | return tag3->t_checksum == cpu_to_be32(csum32); | 415 | return tag3->t_checksum == cpu_to_be32(csum32); |
416 | else | 416 | else |
417 | return tag->t_checksum == cpu_to_be16(csum32); | 417 | return tag->t_checksum == cpu_to_be16(csum32); |
418 | } | 418 | } |
419 | 419 | ||
420 | static int do_one_pass(journal_t *journal, | 420 | static int do_one_pass(journal_t *journal, |
421 | struct recovery_info *info, enum passtype pass) | 421 | struct recovery_info *info, enum passtype pass) |
422 | { | 422 | { |
423 | unsigned int first_commit_ID, next_commit_ID; | 423 | unsigned int first_commit_ID, next_commit_ID; |
424 | unsigned long next_log_block; | 424 | unsigned long next_log_block; |
425 | int err, success = 0; | 425 | int err, success = 0; |
426 | journal_superblock_t * sb; | 426 | journal_superblock_t * sb; |
427 | journal_header_t * tmp; | 427 | journal_header_t * tmp; |
428 | struct buffer_head * bh; | 428 | struct buffer_head * bh; |
429 | unsigned int sequence; | 429 | unsigned int sequence; |
430 | int blocktype; | 430 | int blocktype; |
431 | int tag_bytes = journal_tag_bytes(journal); | 431 | int tag_bytes = journal_tag_bytes(journal); |
432 | __u32 crc32_sum = ~0; /* Transactional Checksums */ | 432 | __u32 crc32_sum = ~0; /* Transactional Checksums */ |
433 | int descr_csum_size = 0; | 433 | int descr_csum_size = 0; |
434 | int block_error = 0; | 434 | int block_error = 0; |
435 | 435 | ||
436 | /* | 436 | /* |
437 | * First thing is to establish what we expect to find in the log | 437 | * First thing is to establish what we expect to find in the log |
438 | * (in terms of transaction IDs), and where (in terms of log | 438 | * (in terms of transaction IDs), and where (in terms of log |
439 | * block offsets): query the superblock. | 439 | * block offsets): query the superblock. |
440 | */ | 440 | */ |
441 | 441 | ||
442 | sb = journal->j_superblock; | 442 | sb = journal->j_superblock; |
443 | next_commit_ID = be32_to_cpu(sb->s_sequence); | 443 | next_commit_ID = be32_to_cpu(sb->s_sequence); |
444 | next_log_block = be32_to_cpu(sb->s_start); | 444 | next_log_block = be32_to_cpu(sb->s_start); |
445 | 445 | ||
446 | first_commit_ID = next_commit_ID; | 446 | first_commit_ID = next_commit_ID; |
447 | if (pass == PASS_SCAN) | 447 | if (pass == PASS_SCAN) |
448 | info->start_transaction = first_commit_ID; | 448 | info->start_transaction = first_commit_ID; |
449 | 449 | ||
450 | jbd_debug(1, "Starting recovery pass %d\n", pass); | 450 | jbd_debug(1, "Starting recovery pass %d\n", pass); |
451 | 451 | ||
452 | /* | 452 | /* |
453 | * Now we walk through the log, transaction by transaction, | 453 | * Now we walk through the log, transaction by transaction, |
454 | * making sure that each transaction has a commit block in the | 454 | * making sure that each transaction has a commit block in the |
455 | * expected place. Each complete transaction gets replayed back | 455 | * expected place. Each complete transaction gets replayed back |
456 | * into the main filesystem. | 456 | * into the main filesystem. |
457 | */ | 457 | */ |
458 | 458 | ||
459 | while (1) { | 459 | while (1) { |
460 | int flags; | 460 | int flags; |
461 | char * tagp; | 461 | char * tagp; |
462 | journal_block_tag_t * tag; | 462 | journal_block_tag_t * tag; |
463 | struct buffer_head * obh; | 463 | struct buffer_head * obh; |
464 | struct buffer_head * nbh; | 464 | struct buffer_head * nbh; |
465 | 465 | ||
466 | cond_resched(); | 466 | cond_resched(); |
467 | 467 | ||
468 | /* If we already know where to stop the log traversal, | 468 | /* If we already know where to stop the log traversal, |
469 | * check right now that we haven't gone past the end of | 469 | * check right now that we haven't gone past the end of |
470 | * the log. */ | 470 | * the log. */ |
471 | 471 | ||
472 | if (pass != PASS_SCAN) | 472 | if (pass != PASS_SCAN) |
473 | if (tid_geq(next_commit_ID, info->end_transaction)) | 473 | if (tid_geq(next_commit_ID, info->end_transaction)) |
474 | break; | 474 | break; |
475 | 475 | ||
476 | jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", | 476 | jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", |
477 | next_commit_ID, next_log_block, journal->j_last); | 477 | next_commit_ID, next_log_block, journal->j_last); |
478 | 478 | ||
479 | /* Skip over each chunk of the transaction looking | 479 | /* Skip over each chunk of the transaction looking |
480 | * either the next descriptor block or the final commit | 480 | * either the next descriptor block or the final commit |
481 | * record. */ | 481 | * record. */ |
482 | 482 | ||
483 | jbd_debug(3, "JBD2: checking block %ld\n", next_log_block); | 483 | jbd_debug(3, "JBD2: checking block %ld\n", next_log_block); |
484 | err = jread(&bh, journal, next_log_block); | 484 | err = jread(&bh, journal, next_log_block); |
485 | if (err) | 485 | if (err) |
486 | goto failed; | 486 | goto failed; |
487 | 487 | ||
488 | next_log_block++; | 488 | next_log_block++; |
489 | wrap(journal, next_log_block); | 489 | wrap(journal, next_log_block); |
490 | 490 | ||
491 | /* What kind of buffer is it? | 491 | /* What kind of buffer is it? |
492 | * | 492 | * |
493 | * If it is a descriptor block, check that it has the | 493 | * If it is a descriptor block, check that it has the |
494 | * expected sequence number. Otherwise, we're all done | 494 | * expected sequence number. Otherwise, we're all done |
495 | * here. */ | 495 | * here. */ |
496 | 496 | ||
497 | tmp = (journal_header_t *)bh->b_data; | 497 | tmp = (journal_header_t *)bh->b_data; |
498 | 498 | ||
499 | if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { | 499 | if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { |
500 | brelse(bh); | 500 | brelse(bh); |
501 | break; | 501 | break; |
502 | } | 502 | } |
503 | 503 | ||
504 | blocktype = be32_to_cpu(tmp->h_blocktype); | 504 | blocktype = be32_to_cpu(tmp->h_blocktype); |
505 | sequence = be32_to_cpu(tmp->h_sequence); | 505 | sequence = be32_to_cpu(tmp->h_sequence); |
506 | jbd_debug(3, "Found magic %d, sequence %d\n", | 506 | jbd_debug(3, "Found magic %d, sequence %d\n", |
507 | blocktype, sequence); | 507 | blocktype, sequence); |
508 | 508 | ||
509 | if (sequence != next_commit_ID) { | 509 | if (sequence != next_commit_ID) { |
510 | brelse(bh); | 510 | brelse(bh); |
511 | break; | 511 | break; |
512 | } | 512 | } |
513 | 513 | ||
514 | /* OK, we have a valid descriptor block which matches | 514 | /* OK, we have a valid descriptor block which matches |
515 | * all of the sequence number checks. What are we going | 515 | * all of the sequence number checks. What are we going |
516 | * to do with it? That depends on the pass... */ | 516 | * to do with it? That depends on the pass... */ |
517 | 517 | ||
518 | switch(blocktype) { | 518 | switch(blocktype) { |
519 | case JBD2_DESCRIPTOR_BLOCK: | 519 | case JBD2_DESCRIPTOR_BLOCK: |
520 | /* Verify checksum first */ | 520 | /* Verify checksum first */ |
521 | if (jbd2_journal_has_csum_v2or3(journal)) | 521 | if (jbd2_journal_has_csum_v2or3(journal)) |
522 | descr_csum_size = | 522 | descr_csum_size = |
523 | sizeof(struct jbd2_journal_block_tail); | 523 | sizeof(struct jbd2_journal_block_tail); |
524 | if (descr_csum_size > 0 && | 524 | if (descr_csum_size > 0 && |
525 | !jbd2_descr_block_csum_verify(journal, | 525 | !jbd2_descr_block_csum_verify(journal, |
526 | bh->b_data)) { | 526 | bh->b_data)) { |
527 | printk(KERN_ERR "JBD2: Invalid checksum " | ||
528 | "recovering block %lu in log\n", | ||
529 | next_log_block); | ||
527 | err = -EIO; | 530 | err = -EIO; |
528 | brelse(bh); | 531 | brelse(bh); |
529 | goto failed; | 532 | goto failed; |
530 | } | 533 | } |
531 | 534 | ||
532 | /* If it is a valid descriptor block, replay it | 535 | /* If it is a valid descriptor block, replay it |
533 | * in pass REPLAY; if journal_checksums enabled, then | 536 | * in pass REPLAY; if journal_checksums enabled, then |
534 | * calculate checksums in PASS_SCAN, otherwise, | 537 | * calculate checksums in PASS_SCAN, otherwise, |
535 | * just skip over the blocks it describes. */ | 538 | * just skip over the blocks it describes. */ |
536 | if (pass != PASS_REPLAY) { | 539 | if (pass != PASS_REPLAY) { |
537 | if (pass == PASS_SCAN && | 540 | if (pass == PASS_SCAN && |
538 | JBD2_HAS_COMPAT_FEATURE(journal, | 541 | JBD2_HAS_COMPAT_FEATURE(journal, |
539 | JBD2_FEATURE_COMPAT_CHECKSUM) && | 542 | JBD2_FEATURE_COMPAT_CHECKSUM) && |
540 | !info->end_transaction) { | 543 | !info->end_transaction) { |
541 | if (calc_chksums(journal, bh, | 544 | if (calc_chksums(journal, bh, |
542 | &next_log_block, | 545 | &next_log_block, |
543 | &crc32_sum)) { | 546 | &crc32_sum)) { |
544 | put_bh(bh); | 547 | put_bh(bh); |
545 | break; | 548 | break; |
546 | } | 549 | } |
547 | put_bh(bh); | 550 | put_bh(bh); |
548 | continue; | 551 | continue; |
549 | } | 552 | } |
550 | next_log_block += count_tags(journal, bh); | 553 | next_log_block += count_tags(journal, bh); |
551 | wrap(journal, next_log_block); | 554 | wrap(journal, next_log_block); |
552 | put_bh(bh); | 555 | put_bh(bh); |
553 | continue; | 556 | continue; |
554 | } | 557 | } |
555 | 558 | ||
556 | /* A descriptor block: we can now write all of | 559 | /* A descriptor block: we can now write all of |
557 | * the data blocks. Yay, useful work is finally | 560 | * the data blocks. Yay, useful work is finally |
558 | * getting done here! */ | 561 | * getting done here! */ |
559 | 562 | ||
560 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 563 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
561 | while ((tagp - bh->b_data + tag_bytes) | 564 | while ((tagp - bh->b_data + tag_bytes) |
562 | <= journal->j_blocksize - descr_csum_size) { | 565 | <= journal->j_blocksize - descr_csum_size) { |
563 | unsigned long io_block; | 566 | unsigned long io_block; |
564 | 567 | ||
565 | tag = (journal_block_tag_t *) tagp; | 568 | tag = (journal_block_tag_t *) tagp; |
566 | flags = be16_to_cpu(tag->t_flags); | 569 | flags = be16_to_cpu(tag->t_flags); |
567 | 570 | ||
568 | io_block = next_log_block++; | 571 | io_block = next_log_block++; |
569 | wrap(journal, next_log_block); | 572 | wrap(journal, next_log_block); |
570 | err = jread(&obh, journal, io_block); | 573 | err = jread(&obh, journal, io_block); |
571 | if (err) { | 574 | if (err) { |
572 | /* Recover what we can, but | 575 | /* Recover what we can, but |
573 | * report failure at the end. */ | 576 | * report failure at the end. */ |
574 | success = err; | 577 | success = err; |
575 | printk(KERN_ERR | 578 | printk(KERN_ERR |
576 | "JBD2: IO error %d recovering " | 579 | "JBD2: IO error %d recovering " |
577 | "block %ld in log\n", | 580 | "block %ld in log\n", |
578 | err, io_block); | 581 | err, io_block); |
579 | } else { | 582 | } else { |
580 | unsigned long long blocknr; | 583 | unsigned long long blocknr; |
581 | 584 | ||
582 | J_ASSERT(obh != NULL); | 585 | J_ASSERT(obh != NULL); |
583 | blocknr = read_tag_block(journal, | 586 | blocknr = read_tag_block(journal, |
584 | tag); | 587 | tag); |
585 | 588 | ||
586 | /* If the block has been | 589 | /* If the block has been |
587 | * revoked, then we're all done | 590 | * revoked, then we're all done |
588 | * here. */ | 591 | * here. */ |
589 | if (jbd2_journal_test_revoke | 592 | if (jbd2_journal_test_revoke |
590 | (journal, blocknr, | 593 | (journal, blocknr, |
591 | next_commit_ID)) { | 594 | next_commit_ID)) { |
592 | brelse(obh); | 595 | brelse(obh); |
593 | ++info->nr_revoke_hits; | 596 | ++info->nr_revoke_hits; |
594 | goto skip_write; | 597 | goto skip_write; |
595 | } | 598 | } |
596 | 599 | ||
597 | /* Look for block corruption */ | 600 | /* Look for block corruption */ |
598 | if (!jbd2_block_tag_csum_verify( | 601 | if (!jbd2_block_tag_csum_verify( |
599 | journal, tag, obh->b_data, | 602 | journal, tag, obh->b_data, |
600 | be32_to_cpu(tmp->h_sequence))) { | 603 | be32_to_cpu(tmp->h_sequence))) { |
601 | brelse(obh); | 604 | brelse(obh); |
602 | success = -EIO; | 605 | success = -EIO; |
603 | printk(KERN_ERR "JBD2: Invalid " | 606 | printk(KERN_ERR "JBD2: Invalid " |
604 | "checksum recovering " | 607 | "checksum recovering " |
605 | "block %llu in log\n", | 608 | "block %llu in log\n", |
606 | blocknr); | 609 | blocknr); |
607 | block_error = 1; | 610 | block_error = 1; |
608 | goto skip_write; | 611 | goto skip_write; |
609 | } | 612 | } |
610 | 613 | ||
611 | /* Find a buffer for the new | 614 | /* Find a buffer for the new |
612 | * data being restored */ | 615 | * data being restored */ |
613 | nbh = __getblk(journal->j_fs_dev, | 616 | nbh = __getblk(journal->j_fs_dev, |
614 | blocknr, | 617 | blocknr, |
615 | journal->j_blocksize); | 618 | journal->j_blocksize); |
616 | if (nbh == NULL) { | 619 | if (nbh == NULL) { |
617 | printk(KERN_ERR | 620 | printk(KERN_ERR |
618 | "JBD2: Out of memory " | 621 | "JBD2: Out of memory " |
619 | "during recovery.\n"); | 622 | "during recovery.\n"); |
620 | err = -ENOMEM; | 623 | err = -ENOMEM; |
621 | brelse(bh); | 624 | brelse(bh); |
622 | brelse(obh); | 625 | brelse(obh); |
623 | goto failed; | 626 | goto failed; |
624 | } | 627 | } |
625 | 628 | ||
626 | lock_buffer(nbh); | 629 | lock_buffer(nbh); |
627 | memcpy(nbh->b_data, obh->b_data, | 630 | memcpy(nbh->b_data, obh->b_data, |
628 | journal->j_blocksize); | 631 | journal->j_blocksize); |
629 | if (flags & JBD2_FLAG_ESCAPE) { | 632 | if (flags & JBD2_FLAG_ESCAPE) { |
630 | *((__be32 *)nbh->b_data) = | 633 | *((__be32 *)nbh->b_data) = |
631 | cpu_to_be32(JBD2_MAGIC_NUMBER); | 634 | cpu_to_be32(JBD2_MAGIC_NUMBER); |
632 | } | 635 | } |
633 | 636 | ||
634 | BUFFER_TRACE(nbh, "marking dirty"); | 637 | BUFFER_TRACE(nbh, "marking dirty"); |
635 | set_buffer_uptodate(nbh); | 638 | set_buffer_uptodate(nbh); |
636 | mark_buffer_dirty(nbh); | 639 | mark_buffer_dirty(nbh); |
637 | BUFFER_TRACE(nbh, "marking uptodate"); | 640 | BUFFER_TRACE(nbh, "marking uptodate"); |
638 | ++info->nr_replays; | 641 | ++info->nr_replays; |
639 | /* ll_rw_block(WRITE, 1, &nbh); */ | 642 | /* ll_rw_block(WRITE, 1, &nbh); */ |
640 | unlock_buffer(nbh); | 643 | unlock_buffer(nbh); |
641 | brelse(obh); | 644 | brelse(obh); |
642 | brelse(nbh); | 645 | brelse(nbh); |
643 | } | 646 | } |
644 | 647 | ||
645 | skip_write: | 648 | skip_write: |
646 | tagp += tag_bytes; | 649 | tagp += tag_bytes; |
647 | if (!(flags & JBD2_FLAG_SAME_UUID)) | 650 | if (!(flags & JBD2_FLAG_SAME_UUID)) |
648 | tagp += 16; | 651 | tagp += 16; |
649 | 652 | ||
650 | if (flags & JBD2_FLAG_LAST_TAG) | 653 | if (flags & JBD2_FLAG_LAST_TAG) |
651 | break; | 654 | break; |
652 | } | 655 | } |
653 | 656 | ||
654 | brelse(bh); | 657 | brelse(bh); |
655 | continue; | 658 | continue; |
656 | 659 | ||
657 | case JBD2_COMMIT_BLOCK: | 660 | case JBD2_COMMIT_BLOCK: |
658 | /* How to differentiate between interrupted commit | 661 | /* How to differentiate between interrupted commit |
659 | * and journal corruption ? | 662 | * and journal corruption ? |
660 | * | 663 | * |
661 | * {nth transaction} | 664 | * {nth transaction} |
662 | * Checksum Verification Failed | 665 | * Checksum Verification Failed |
663 | * | | 666 | * | |
664 | * ____________________ | 667 | * ____________________ |
665 | * | | | 668 | * | | |
666 | * async_commit sync_commit | 669 | * async_commit sync_commit |
667 | * | | | 670 | * | | |
668 | * | GO TO NEXT "Journal Corruption" | 671 | * | GO TO NEXT "Journal Corruption" |
669 | * | TRANSACTION | 672 | * | TRANSACTION |
670 | * | | 673 | * | |
671 | * {(n+1)th transanction} | 674 | * {(n+1)th transanction} |
672 | * | | 675 | * | |
673 | * _______|______________ | 676 | * _______|______________ |
674 | * | | | 677 | * | | |
675 | * Commit block found Commit block not found | 678 | * Commit block found Commit block not found |
676 | * | | | 679 | * | | |
677 | * "Journal Corruption" | | 680 | * "Journal Corruption" | |
678 | * _____________|_________ | 681 | * _____________|_________ |
679 | * | | | 682 | * | | |
680 | * nth trans corrupt OR nth trans | 683 | * nth trans corrupt OR nth trans |
681 | * and (n+1)th interrupted interrupted | 684 | * and (n+1)th interrupted interrupted |
682 | * before commit block | 685 | * before commit block |
683 | * could reach the disk. | 686 | * could reach the disk. |
684 | * (Cannot find the difference in above | 687 | * (Cannot find the difference in above |
685 | * mentioned conditions. Hence assume | 688 | * mentioned conditions. Hence assume |
686 | * "Interrupted Commit".) | 689 | * "Interrupted Commit".) |
687 | */ | 690 | */ |
688 | 691 | ||
689 | /* Found an expected commit block: if checksums | 692 | /* Found an expected commit block: if checksums |
690 | * are present verify them in PASS_SCAN; else not | 693 | * are present verify them in PASS_SCAN; else not |
691 | * much to do other than move on to the next sequence | 694 | * much to do other than move on to the next sequence |
692 | * number. */ | 695 | * number. */ |
693 | if (pass == PASS_SCAN && | 696 | if (pass == PASS_SCAN && |
694 | JBD2_HAS_COMPAT_FEATURE(journal, | 697 | JBD2_HAS_COMPAT_FEATURE(journal, |
695 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | 698 | JBD2_FEATURE_COMPAT_CHECKSUM)) { |
696 | int chksum_err, chksum_seen; | 699 | int chksum_err, chksum_seen; |
697 | struct commit_header *cbh = | 700 | struct commit_header *cbh = |
698 | (struct commit_header *)bh->b_data; | 701 | (struct commit_header *)bh->b_data; |
699 | unsigned found_chksum = | 702 | unsigned found_chksum = |
700 | be32_to_cpu(cbh->h_chksum[0]); | 703 | be32_to_cpu(cbh->h_chksum[0]); |
701 | 704 | ||
702 | chksum_err = chksum_seen = 0; | 705 | chksum_err = chksum_seen = 0; |
703 | 706 | ||
704 | if (info->end_transaction) { | 707 | if (info->end_transaction) { |
705 | journal->j_failed_commit = | 708 | journal->j_failed_commit = |
706 | info->end_transaction; | 709 | info->end_transaction; |
707 | brelse(bh); | 710 | brelse(bh); |
708 | break; | 711 | break; |
709 | } | 712 | } |
710 | 713 | ||
711 | if (crc32_sum == found_chksum && | 714 | if (crc32_sum == found_chksum && |
712 | cbh->h_chksum_type == JBD2_CRC32_CHKSUM && | 715 | cbh->h_chksum_type == JBD2_CRC32_CHKSUM && |
713 | cbh->h_chksum_size == | 716 | cbh->h_chksum_size == |
714 | JBD2_CRC32_CHKSUM_SIZE) | 717 | JBD2_CRC32_CHKSUM_SIZE) |
715 | chksum_seen = 1; | 718 | chksum_seen = 1; |
716 | else if (!(cbh->h_chksum_type == 0 && | 719 | else if (!(cbh->h_chksum_type == 0 && |
717 | cbh->h_chksum_size == 0 && | 720 | cbh->h_chksum_size == 0 && |
718 | found_chksum == 0 && | 721 | found_chksum == 0 && |
719 | !chksum_seen)) | 722 | !chksum_seen)) |
720 | /* | 723 | /* |
721 | * If fs is mounted using an old kernel and then | 724 | * If fs is mounted using an old kernel and then |
722 | * kernel with journal_chksum is used then we | 725 | * kernel with journal_chksum is used then we |
723 | * get a situation where the journal flag has | 726 | * get a situation where the journal flag has |
724 | * checksum flag set but checksums are not | 727 | * checksum flag set but checksums are not |
725 | * present i.e chksum = 0, in the individual | 728 | * present i.e chksum = 0, in the individual |
726 | * commit blocks. | 729 | * commit blocks. |
727 | * Hence to avoid checksum failures, in this | 730 | * Hence to avoid checksum failures, in this |
728 | * situation, this extra check is added. | 731 | * situation, this extra check is added. |
729 | */ | 732 | */ |
730 | chksum_err = 1; | 733 | chksum_err = 1; |
731 | 734 | ||
732 | if (chksum_err) { | 735 | if (chksum_err) { |
733 | info->end_transaction = next_commit_ID; | 736 | info->end_transaction = next_commit_ID; |
734 | 737 | ||
735 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 738 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
736 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ | 739 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ |
737 | journal->j_failed_commit = | 740 | journal->j_failed_commit = |
738 | next_commit_ID; | 741 | next_commit_ID; |
739 | brelse(bh); | 742 | brelse(bh); |
740 | break; | 743 | break; |
741 | } | 744 | } |
742 | } | 745 | } |
743 | crc32_sum = ~0; | 746 | crc32_sum = ~0; |
744 | } | 747 | } |
745 | if (pass == PASS_SCAN && | 748 | if (pass == PASS_SCAN && |
746 | !jbd2_commit_block_csum_verify(journal, | 749 | !jbd2_commit_block_csum_verify(journal, |
747 | bh->b_data)) { | 750 | bh->b_data)) { |
748 | info->end_transaction = next_commit_ID; | 751 | info->end_transaction = next_commit_ID; |
749 | 752 | ||
750 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 753 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
751 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 754 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
752 | journal->j_failed_commit = | 755 | journal->j_failed_commit = |
753 | next_commit_ID; | 756 | next_commit_ID; |
754 | brelse(bh); | 757 | brelse(bh); |
755 | break; | 758 | break; |
756 | } | 759 | } |
757 | } | 760 | } |
758 | brelse(bh); | 761 | brelse(bh); |
759 | next_commit_ID++; | 762 | next_commit_ID++; |
760 | continue; | 763 | continue; |
761 | 764 | ||
762 | case JBD2_REVOKE_BLOCK: | 765 | case JBD2_REVOKE_BLOCK: |
763 | /* If we aren't in the REVOKE pass, then we can | 766 | /* If we aren't in the REVOKE pass, then we can |
764 | * just skip over this block. */ | 767 | * just skip over this block. */ |
765 | if (pass != PASS_REVOKE) { | 768 | if (pass != PASS_REVOKE) { |
766 | brelse(bh); | 769 | brelse(bh); |
767 | continue; | 770 | continue; |
768 | } | 771 | } |
769 | 772 | ||
770 | err = scan_revoke_records(journal, bh, | 773 | err = scan_revoke_records(journal, bh, |
771 | next_commit_ID, info); | 774 | next_commit_ID, info); |
772 | brelse(bh); | 775 | brelse(bh); |
773 | if (err) | 776 | if (err) |
774 | goto failed; | 777 | goto failed; |
775 | continue; | 778 | continue; |
776 | 779 | ||
777 | default: | 780 | default: |
778 | jbd_debug(3, "Unrecognised magic %d, end of scan.\n", | 781 | jbd_debug(3, "Unrecognised magic %d, end of scan.\n", |
779 | blocktype); | 782 | blocktype); |
780 | brelse(bh); | 783 | brelse(bh); |
781 | goto done; | 784 | goto done; |
782 | } | 785 | } |
783 | } | 786 | } |
784 | 787 | ||
785 | done: | 788 | done: |
786 | /* | 789 | /* |
787 | * We broke out of the log scan loop: either we came to the | 790 | * We broke out of the log scan loop: either we came to the |
788 | * known end of the log or we found an unexpected block in the | 791 | * known end of the log or we found an unexpected block in the |
789 | * log. If the latter happened, then we know that the "current" | 792 | * log. If the latter happened, then we know that the "current" |
790 | * transaction marks the end of the valid log. | 793 | * transaction marks the end of the valid log. |
791 | */ | 794 | */ |
792 | 795 | ||
793 | if (pass == PASS_SCAN) { | 796 | if (pass == PASS_SCAN) { |
794 | if (!info->end_transaction) | 797 | if (!info->end_transaction) |
795 | info->end_transaction = next_commit_ID; | 798 | info->end_transaction = next_commit_ID; |
796 | } else { | 799 | } else { |
797 | /* It's really bad news if different passes end up at | 800 | /* It's really bad news if different passes end up at |
798 | * different places (but possible due to IO errors). */ | 801 | * different places (but possible due to IO errors). */ |
799 | if (info->end_transaction != next_commit_ID) { | 802 | if (info->end_transaction != next_commit_ID) { |
800 | printk(KERN_ERR "JBD2: recovery pass %d ended at " | 803 | printk(KERN_ERR "JBD2: recovery pass %d ended at " |
801 | "transaction %u, expected %u\n", | 804 | "transaction %u, expected %u\n", |
802 | pass, next_commit_ID, info->end_transaction); | 805 | pass, next_commit_ID, info->end_transaction); |
803 | if (!success) | 806 | if (!success) |
804 | success = -EIO; | 807 | success = -EIO; |
805 | } | 808 | } |
806 | } | 809 | } |
807 | if (block_error && success == 0) | 810 | if (block_error && success == 0) |
808 | success = -EIO; | 811 | success = -EIO; |
809 | return success; | 812 | return success; |
810 | 813 | ||
811 | failed: | 814 | failed: |
812 | return err; | 815 | return err; |
813 | } | 816 | } |
814 | 817 | ||
815 | static int jbd2_revoke_block_csum_verify(journal_t *j, | 818 | static int jbd2_revoke_block_csum_verify(journal_t *j, |
816 | void *buf) | 819 | void *buf) |
817 | { | 820 | { |
818 | struct jbd2_journal_revoke_tail *tail; | 821 | struct jbd2_journal_revoke_tail *tail; |
819 | __be32 provided; | 822 | __be32 provided; |
820 | __u32 calculated; | 823 | __u32 calculated; |
821 | 824 | ||
822 | if (!jbd2_journal_has_csum_v2or3(j)) | 825 | if (!jbd2_journal_has_csum_v2or3(j)) |
823 | return 1; | 826 | return 1; |
824 | 827 | ||
825 | tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - | 828 | tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - |
826 | sizeof(struct jbd2_journal_revoke_tail)); | 829 | sizeof(struct jbd2_journal_revoke_tail)); |
827 | provided = tail->r_checksum; | 830 | provided = tail->r_checksum; |
828 | tail->r_checksum = 0; | 831 | tail->r_checksum = 0; |
829 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | 832 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); |
830 | tail->r_checksum = provided; | 833 | tail->r_checksum = provided; |
831 | 834 | ||
832 | return provided == cpu_to_be32(calculated); | 835 | return provided == cpu_to_be32(calculated); |
833 | } | 836 | } |
834 | 837 | ||
835 | /* Scan a revoke record, marking all blocks mentioned as revoked. */ | 838 | /* Scan a revoke record, marking all blocks mentioned as revoked. */ |
836 | 839 | ||
837 | static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, | 840 | static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, |
838 | tid_t sequence, struct recovery_info *info) | 841 | tid_t sequence, struct recovery_info *info) |
839 | { | 842 | { |
840 | jbd2_journal_revoke_header_t *header; | 843 | jbd2_journal_revoke_header_t *header; |
841 | int offset, max; | 844 | int offset, max; |
842 | int record_len = 4; | 845 | int record_len = 4; |
843 | 846 | ||
844 | header = (jbd2_journal_revoke_header_t *) bh->b_data; | 847 | header = (jbd2_journal_revoke_header_t *) bh->b_data; |
845 | offset = sizeof(jbd2_journal_revoke_header_t); | 848 | offset = sizeof(jbd2_journal_revoke_header_t); |
846 | max = be32_to_cpu(header->r_count); | 849 | max = be32_to_cpu(header->r_count); |
847 | 850 | ||
848 | if (!jbd2_revoke_block_csum_verify(journal, header)) | 851 | if (!jbd2_revoke_block_csum_verify(journal, header)) |
849 | return -EINVAL; | 852 | return -EINVAL; |
850 | 853 | ||
851 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) | 854 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) |
852 | record_len = 8; | 855 | record_len = 8; |
853 | 856 | ||
854 | while (offset + record_len <= max) { | 857 | while (offset + record_len <= max) { |
855 | unsigned long long blocknr; | 858 | unsigned long long blocknr; |
856 | int err; | 859 | int err; |
857 | 860 | ||
858 | if (record_len == 4) | 861 | if (record_len == 4) |
859 | blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); | 862 | blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); |
860 | else | 863 | else |
861 | blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); | 864 | blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); |
862 | offset += record_len; | 865 | offset += record_len; |
863 | err = jbd2_journal_set_revoke(journal, blocknr, sequence); | 866 | err = jbd2_journal_set_revoke(journal, blocknr, sequence); |
864 | if (err) | 867 | if (err) |
865 | return err; | 868 | return err; |
866 | ++info->nr_revokes; | 869 | ++info->nr_revokes; |
867 | } | 870 | } |
868 | return 0; | 871 | return 0; |
869 | } | 872 | } |
870 | 873 |