Commit 562c72aa57c36b178eacc3500a0215651eca9429
Committed by
Al Viro
1 parent
11b80f459a
Exists in
master
and in
4 other branches
fs: move inode_dio_wait calls into ->setattr
Let filesystems handle waiting for direct I/O requests themselves instead of doing it beforehand. This means filesystem-specific locks to prevent new dio referenes from appearing can be held. This is important to allow generalizing i_dio_count to non-DIO_LOCKING filesystems. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Showing 12 changed files with 24 additions and 3 deletions Inline Diff
fs/attr.c
1 | /* | 1 | /* |
2 | * linux/fs/attr.c | 2 | * linux/fs/attr.c |
3 | * | 3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds | 4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | * changes by Thomas Schoebel-Theuer | 5 | * changes by Thomas Schoebel-Theuer |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/time.h> | 9 | #include <linux/time.h> |
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/capability.h> | 12 | #include <linux/capability.h> |
13 | #include <linux/fsnotify.h> | 13 | #include <linux/fsnotify.h> |
14 | #include <linux/fcntl.h> | 14 | #include <linux/fcntl.h> |
15 | #include <linux/security.h> | 15 | #include <linux/security.h> |
16 | 16 | ||
17 | /** | 17 | /** |
18 | * inode_change_ok - check if attribute changes to an inode are allowed | 18 | * inode_change_ok - check if attribute changes to an inode are allowed |
19 | * @inode: inode to check | 19 | * @inode: inode to check |
20 | * @attr: attributes to change | 20 | * @attr: attributes to change |
21 | * | 21 | * |
22 | * Check if we are allowed to change the attributes contained in @attr | 22 | * Check if we are allowed to change the attributes contained in @attr |
23 | * in the given inode. This includes the normal unix access permission | 23 | * in the given inode. This includes the normal unix access permission |
24 | * checks, as well as checks for rlimits and others. | 24 | * checks, as well as checks for rlimits and others. |
25 | * | 25 | * |
26 | * Should be called as the first thing in ->setattr implementations, | 26 | * Should be called as the first thing in ->setattr implementations, |
27 | * possibly after taking additional locks. | 27 | * possibly after taking additional locks. |
28 | */ | 28 | */ |
29 | int inode_change_ok(const struct inode *inode, struct iattr *attr) | 29 | int inode_change_ok(const struct inode *inode, struct iattr *attr) |
30 | { | 30 | { |
31 | unsigned int ia_valid = attr->ia_valid; | 31 | unsigned int ia_valid = attr->ia_valid; |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * First check size constraints. These can't be overriden using | 34 | * First check size constraints. These can't be overriden using |
35 | * ATTR_FORCE. | 35 | * ATTR_FORCE. |
36 | */ | 36 | */ |
37 | if (ia_valid & ATTR_SIZE) { | 37 | if (ia_valid & ATTR_SIZE) { |
38 | int error = inode_newsize_ok(inode, attr->ia_size); | 38 | int error = inode_newsize_ok(inode, attr->ia_size); |
39 | if (error) | 39 | if (error) |
40 | return error; | 40 | return error; |
41 | } | 41 | } |
42 | 42 | ||
43 | /* If force is set do it anyway. */ | 43 | /* If force is set do it anyway. */ |
44 | if (ia_valid & ATTR_FORCE) | 44 | if (ia_valid & ATTR_FORCE) |
45 | return 0; | 45 | return 0; |
46 | 46 | ||
47 | /* Make sure a caller can chown. */ | 47 | /* Make sure a caller can chown. */ |
48 | if ((ia_valid & ATTR_UID) && | 48 | if ((ia_valid & ATTR_UID) && |
49 | (current_fsuid() != inode->i_uid || | 49 | (current_fsuid() != inode->i_uid || |
50 | attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) | 50 | attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) |
51 | return -EPERM; | 51 | return -EPERM; |
52 | 52 | ||
53 | /* Make sure caller can chgrp. */ | 53 | /* Make sure caller can chgrp. */ |
54 | if ((ia_valid & ATTR_GID) && | 54 | if ((ia_valid & ATTR_GID) && |
55 | (current_fsuid() != inode->i_uid || | 55 | (current_fsuid() != inode->i_uid || |
56 | (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && | 56 | (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && |
57 | !capable(CAP_CHOWN)) | 57 | !capable(CAP_CHOWN)) |
58 | return -EPERM; | 58 | return -EPERM; |
59 | 59 | ||
60 | /* Make sure a caller can chmod. */ | 60 | /* Make sure a caller can chmod. */ |
61 | if (ia_valid & ATTR_MODE) { | 61 | if (ia_valid & ATTR_MODE) { |
62 | if (!inode_owner_or_capable(inode)) | 62 | if (!inode_owner_or_capable(inode)) |
63 | return -EPERM; | 63 | return -EPERM; |
64 | /* Also check the setgid bit! */ | 64 | /* Also check the setgid bit! */ |
65 | if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : | 65 | if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : |
66 | inode->i_gid) && !capable(CAP_FSETID)) | 66 | inode->i_gid) && !capable(CAP_FSETID)) |
67 | attr->ia_mode &= ~S_ISGID; | 67 | attr->ia_mode &= ~S_ISGID; |
68 | } | 68 | } |
69 | 69 | ||
70 | /* Check for setting the inode time. */ | 70 | /* Check for setting the inode time. */ |
71 | if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { | 71 | if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { |
72 | if (!inode_owner_or_capable(inode)) | 72 | if (!inode_owner_or_capable(inode)) |
73 | return -EPERM; | 73 | return -EPERM; |
74 | } | 74 | } |
75 | 75 | ||
76 | return 0; | 76 | return 0; |
77 | } | 77 | } |
78 | EXPORT_SYMBOL(inode_change_ok); | 78 | EXPORT_SYMBOL(inode_change_ok); |
79 | 79 | ||
80 | /** | 80 | /** |
81 | * inode_newsize_ok - may this inode be truncated to a given size | 81 | * inode_newsize_ok - may this inode be truncated to a given size |
82 | * @inode: the inode to be truncated | 82 | * @inode: the inode to be truncated |
83 | * @offset: the new size to assign to the inode | 83 | * @offset: the new size to assign to the inode |
84 | * @Returns: 0 on success, -ve errno on failure | 84 | * @Returns: 0 on success, -ve errno on failure |
85 | * | 85 | * |
86 | * inode_newsize_ok must be called with i_mutex held. | 86 | * inode_newsize_ok must be called with i_mutex held. |
87 | * | 87 | * |
88 | * inode_newsize_ok will check filesystem limits and ulimits to check that the | 88 | * inode_newsize_ok will check filesystem limits and ulimits to check that the |
89 | * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ | 89 | * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ |
90 | * when necessary. Caller must not proceed with inode size change if failure is | 90 | * when necessary. Caller must not proceed with inode size change if failure is |
91 | * returned. @inode must be a file (not directory), with appropriate | 91 | * returned. @inode must be a file (not directory), with appropriate |
92 | * permissions to allow truncate (inode_newsize_ok does NOT check these | 92 | * permissions to allow truncate (inode_newsize_ok does NOT check these |
93 | * conditions). | 93 | * conditions). |
94 | */ | 94 | */ |
95 | int inode_newsize_ok(const struct inode *inode, loff_t offset) | 95 | int inode_newsize_ok(const struct inode *inode, loff_t offset) |
96 | { | 96 | { |
97 | if (inode->i_size < offset) { | 97 | if (inode->i_size < offset) { |
98 | unsigned long limit; | 98 | unsigned long limit; |
99 | 99 | ||
100 | limit = rlimit(RLIMIT_FSIZE); | 100 | limit = rlimit(RLIMIT_FSIZE); |
101 | if (limit != RLIM_INFINITY && offset > limit) | 101 | if (limit != RLIM_INFINITY && offset > limit) |
102 | goto out_sig; | 102 | goto out_sig; |
103 | if (offset > inode->i_sb->s_maxbytes) | 103 | if (offset > inode->i_sb->s_maxbytes) |
104 | goto out_big; | 104 | goto out_big; |
105 | } else { | 105 | } else { |
106 | /* | 106 | /* |
107 | * truncation of in-use swapfiles is disallowed - it would | 107 | * truncation of in-use swapfiles is disallowed - it would |
108 | * cause subsequent swapout to scribble on the now-freed | 108 | * cause subsequent swapout to scribble on the now-freed |
109 | * blocks. | 109 | * blocks. |
110 | */ | 110 | */ |
111 | if (IS_SWAPFILE(inode)) | 111 | if (IS_SWAPFILE(inode)) |
112 | return -ETXTBSY; | 112 | return -ETXTBSY; |
113 | } | 113 | } |
114 | 114 | ||
115 | return 0; | 115 | return 0; |
116 | out_sig: | 116 | out_sig: |
117 | send_sig(SIGXFSZ, current, 0); | 117 | send_sig(SIGXFSZ, current, 0); |
118 | out_big: | 118 | out_big: |
119 | return -EFBIG; | 119 | return -EFBIG; |
120 | } | 120 | } |
121 | EXPORT_SYMBOL(inode_newsize_ok); | 121 | EXPORT_SYMBOL(inode_newsize_ok); |
122 | 122 | ||
123 | /** | 123 | /** |
124 | * setattr_copy - copy simple metadata updates into the generic inode | 124 | * setattr_copy - copy simple metadata updates into the generic inode |
125 | * @inode: the inode to be updated | 125 | * @inode: the inode to be updated |
126 | * @attr: the new attributes | 126 | * @attr: the new attributes |
127 | * | 127 | * |
128 | * setattr_copy must be called with i_mutex held. | 128 | * setattr_copy must be called with i_mutex held. |
129 | * | 129 | * |
130 | * setattr_copy updates the inode's metadata with that specified | 130 | * setattr_copy updates the inode's metadata with that specified |
131 | * in attr. Noticeably missing is inode size update, which is more complex | 131 | * in attr. Noticeably missing is inode size update, which is more complex |
132 | * as it requires pagecache updates. | 132 | * as it requires pagecache updates. |
133 | * | 133 | * |
134 | * The inode is not marked as dirty after this operation. The rationale is | 134 | * The inode is not marked as dirty after this operation. The rationale is |
135 | * that for "simple" filesystems, the struct inode is the inode storage. | 135 | * that for "simple" filesystems, the struct inode is the inode storage. |
136 | * The caller is free to mark the inode dirty afterwards if needed. | 136 | * The caller is free to mark the inode dirty afterwards if needed. |
137 | */ | 137 | */ |
138 | void setattr_copy(struct inode *inode, const struct iattr *attr) | 138 | void setattr_copy(struct inode *inode, const struct iattr *attr) |
139 | { | 139 | { |
140 | unsigned int ia_valid = attr->ia_valid; | 140 | unsigned int ia_valid = attr->ia_valid; |
141 | 141 | ||
142 | if (ia_valid & ATTR_UID) | 142 | if (ia_valid & ATTR_UID) |
143 | inode->i_uid = attr->ia_uid; | 143 | inode->i_uid = attr->ia_uid; |
144 | if (ia_valid & ATTR_GID) | 144 | if (ia_valid & ATTR_GID) |
145 | inode->i_gid = attr->ia_gid; | 145 | inode->i_gid = attr->ia_gid; |
146 | if (ia_valid & ATTR_ATIME) | 146 | if (ia_valid & ATTR_ATIME) |
147 | inode->i_atime = timespec_trunc(attr->ia_atime, | 147 | inode->i_atime = timespec_trunc(attr->ia_atime, |
148 | inode->i_sb->s_time_gran); | 148 | inode->i_sb->s_time_gran); |
149 | if (ia_valid & ATTR_MTIME) | 149 | if (ia_valid & ATTR_MTIME) |
150 | inode->i_mtime = timespec_trunc(attr->ia_mtime, | 150 | inode->i_mtime = timespec_trunc(attr->ia_mtime, |
151 | inode->i_sb->s_time_gran); | 151 | inode->i_sb->s_time_gran); |
152 | if (ia_valid & ATTR_CTIME) | 152 | if (ia_valid & ATTR_CTIME) |
153 | inode->i_ctime = timespec_trunc(attr->ia_ctime, | 153 | inode->i_ctime = timespec_trunc(attr->ia_ctime, |
154 | inode->i_sb->s_time_gran); | 154 | inode->i_sb->s_time_gran); |
155 | if (ia_valid & ATTR_MODE) { | 155 | if (ia_valid & ATTR_MODE) { |
156 | umode_t mode = attr->ia_mode; | 156 | umode_t mode = attr->ia_mode; |
157 | 157 | ||
158 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | 158 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) |
159 | mode &= ~S_ISGID; | 159 | mode &= ~S_ISGID; |
160 | inode->i_mode = mode; | 160 | inode->i_mode = mode; |
161 | } | 161 | } |
162 | } | 162 | } |
163 | EXPORT_SYMBOL(setattr_copy); | 163 | EXPORT_SYMBOL(setattr_copy); |
164 | 164 | ||
165 | int notify_change(struct dentry * dentry, struct iattr * attr) | 165 | int notify_change(struct dentry * dentry, struct iattr * attr) |
166 | { | 166 | { |
167 | struct inode *inode = dentry->d_inode; | 167 | struct inode *inode = dentry->d_inode; |
168 | mode_t mode = inode->i_mode; | 168 | mode_t mode = inode->i_mode; |
169 | int error; | 169 | int error; |
170 | struct timespec now; | 170 | struct timespec now; |
171 | unsigned int ia_valid = attr->ia_valid; | 171 | unsigned int ia_valid = attr->ia_valid; |
172 | 172 | ||
173 | if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) { | 173 | if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) { |
174 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) | 174 | if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) |
175 | return -EPERM; | 175 | return -EPERM; |
176 | } | 176 | } |
177 | 177 | ||
178 | if ((ia_valid & ATTR_MODE)) { | 178 | if ((ia_valid & ATTR_MODE)) { |
179 | mode_t amode = attr->ia_mode; | 179 | mode_t amode = attr->ia_mode; |
180 | /* Flag setting protected by i_mutex */ | 180 | /* Flag setting protected by i_mutex */ |
181 | if (is_sxid(amode)) | 181 | if (is_sxid(amode)) |
182 | inode->i_flags &= ~S_NOSEC; | 182 | inode->i_flags &= ~S_NOSEC; |
183 | } | 183 | } |
184 | 184 | ||
185 | now = current_fs_time(inode->i_sb); | 185 | now = current_fs_time(inode->i_sb); |
186 | 186 | ||
187 | attr->ia_ctime = now; | 187 | attr->ia_ctime = now; |
188 | if (!(ia_valid & ATTR_ATIME_SET)) | 188 | if (!(ia_valid & ATTR_ATIME_SET)) |
189 | attr->ia_atime = now; | 189 | attr->ia_atime = now; |
190 | if (!(ia_valid & ATTR_MTIME_SET)) | 190 | if (!(ia_valid & ATTR_MTIME_SET)) |
191 | attr->ia_mtime = now; | 191 | attr->ia_mtime = now; |
192 | if (ia_valid & ATTR_KILL_PRIV) { | 192 | if (ia_valid & ATTR_KILL_PRIV) { |
193 | attr->ia_valid &= ~ATTR_KILL_PRIV; | 193 | attr->ia_valid &= ~ATTR_KILL_PRIV; |
194 | ia_valid &= ~ATTR_KILL_PRIV; | 194 | ia_valid &= ~ATTR_KILL_PRIV; |
195 | error = security_inode_need_killpriv(dentry); | 195 | error = security_inode_need_killpriv(dentry); |
196 | if (error > 0) | 196 | if (error > 0) |
197 | error = security_inode_killpriv(dentry); | 197 | error = security_inode_killpriv(dentry); |
198 | if (error) | 198 | if (error) |
199 | return error; | 199 | return error; |
200 | } | 200 | } |
201 | 201 | ||
202 | /* | 202 | /* |
203 | * We now pass ATTR_KILL_S*ID to the lower level setattr function so | 203 | * We now pass ATTR_KILL_S*ID to the lower level setattr function so |
204 | * that the function has the ability to reinterpret a mode change | 204 | * that the function has the ability to reinterpret a mode change |
205 | * that's due to these bits. This adds an implicit restriction that | 205 | * that's due to these bits. This adds an implicit restriction that |
206 | * no function will ever call notify_change with both ATTR_MODE and | 206 | * no function will ever call notify_change with both ATTR_MODE and |
207 | * ATTR_KILL_S*ID set. | 207 | * ATTR_KILL_S*ID set. |
208 | */ | 208 | */ |
209 | if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) && | 209 | if ((ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) && |
210 | (ia_valid & ATTR_MODE)) | 210 | (ia_valid & ATTR_MODE)) |
211 | BUG(); | 211 | BUG(); |
212 | 212 | ||
213 | if (ia_valid & ATTR_KILL_SUID) { | 213 | if (ia_valid & ATTR_KILL_SUID) { |
214 | if (mode & S_ISUID) { | 214 | if (mode & S_ISUID) { |
215 | ia_valid = attr->ia_valid |= ATTR_MODE; | 215 | ia_valid = attr->ia_valid |= ATTR_MODE; |
216 | attr->ia_mode = (inode->i_mode & ~S_ISUID); | 216 | attr->ia_mode = (inode->i_mode & ~S_ISUID); |
217 | } | 217 | } |
218 | } | 218 | } |
219 | if (ia_valid & ATTR_KILL_SGID) { | 219 | if (ia_valid & ATTR_KILL_SGID) { |
220 | if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { | 220 | if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { |
221 | if (!(ia_valid & ATTR_MODE)) { | 221 | if (!(ia_valid & ATTR_MODE)) { |
222 | ia_valid = attr->ia_valid |= ATTR_MODE; | 222 | ia_valid = attr->ia_valid |= ATTR_MODE; |
223 | attr->ia_mode = inode->i_mode; | 223 | attr->ia_mode = inode->i_mode; |
224 | } | 224 | } |
225 | attr->ia_mode &= ~S_ISGID; | 225 | attr->ia_mode &= ~S_ISGID; |
226 | } | 226 | } |
227 | } | 227 | } |
228 | if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) | 228 | if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) |
229 | return 0; | 229 | return 0; |
230 | 230 | ||
231 | error = security_inode_setattr(dentry, attr); | 231 | error = security_inode_setattr(dentry, attr); |
232 | if (error) | 232 | if (error) |
233 | return error; | 233 | return error; |
234 | 234 | ||
235 | if (ia_valid & ATTR_SIZE) | ||
236 | inode_dio_wait(inode); | ||
237 | |||
238 | if (inode->i_op->setattr) | 235 | if (inode->i_op->setattr) |
239 | error = inode->i_op->setattr(dentry, attr); | 236 | error = inode->i_op->setattr(dentry, attr); |
240 | else | 237 | else |
241 | error = simple_setattr(dentry, attr); | 238 | error = simple_setattr(dentry, attr); |
242 | 239 | ||
243 | if (!error) | 240 | if (!error) |
244 | fsnotify_change(dentry, ia_valid); | 241 | fsnotify_change(dentry, ia_valid); |
245 | 242 | ||
246 | return error; | 243 | return error; |
247 | } | 244 | } |
248 | 245 | ||
249 | EXPORT_SYMBOL(notify_change); | 246 | EXPORT_SYMBOL(notify_change); |
250 | 247 |
fs/ext2/inode.c
1 | /* | 1 | /* |
2 | * linux/fs/ext2/inode.c | 2 | * linux/fs/ext2/inode.c |
3 | * | 3 | * |
4 | * Copyright (C) 1992, 1993, 1994, 1995 | 4 | * Copyright (C) 1992, 1993, 1994, 1995 |
5 | * Remy Card (card@masi.ibp.fr) | 5 | * Remy Card (card@masi.ibp.fr) |
6 | * Laboratoire MASI - Institut Blaise Pascal | 6 | * Laboratoire MASI - Institut Blaise Pascal |
7 | * Universite Pierre et Marie Curie (Paris VI) | 7 | * Universite Pierre et Marie Curie (Paris VI) |
8 | * | 8 | * |
9 | * from | 9 | * from |
10 | * | 10 | * |
11 | * linux/fs/minix/inode.c | 11 | * linux/fs/minix/inode.c |
12 | * | 12 | * |
13 | * Copyright (C) 1991, 1992 Linus Torvalds | 13 | * Copyright (C) 1991, 1992 Linus Torvalds |
14 | * | 14 | * |
15 | * Goal-directed block allocation by Stephen Tweedie | 15 | * Goal-directed block allocation by Stephen Tweedie |
16 | * (sct@dcs.ed.ac.uk), 1993, 1998 | 16 | * (sct@dcs.ed.ac.uk), 1993, 1998 |
17 | * Big-endian to little-endian byte-swapping/bitmaps by | 17 | * Big-endian to little-endian byte-swapping/bitmaps by |
18 | * David S. Miller (davem@caip.rutgers.edu), 1995 | 18 | * David S. Miller (davem@caip.rutgers.edu), 1995 |
19 | * 64-bit file support on 64-bit platforms by Jakub Jelinek | 19 | * 64-bit file support on 64-bit platforms by Jakub Jelinek |
20 | * (jj@sunsite.ms.mff.cuni.cz) | 20 | * (jj@sunsite.ms.mff.cuni.cz) |
21 | * | 21 | * |
22 | * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 | 22 | * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/time.h> | 25 | #include <linux/time.h> |
26 | #include <linux/highuid.h> | 26 | #include <linux/highuid.h> |
27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
28 | #include <linux/quotaops.h> | 28 | #include <linux/quotaops.h> |
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/writeback.h> | 30 | #include <linux/writeback.h> |
31 | #include <linux/buffer_head.h> | 31 | #include <linux/buffer_head.h> |
32 | #include <linux/mpage.h> | 32 | #include <linux/mpage.h> |
33 | #include <linux/fiemap.h> | 33 | #include <linux/fiemap.h> |
34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
35 | #include "ext2.h" | 35 | #include "ext2.h" |
36 | #include "acl.h" | 36 | #include "acl.h" |
37 | #include "xip.h" | 37 | #include "xip.h" |
38 | 38 | ||
39 | MODULE_AUTHOR("Remy Card and others"); | 39 | MODULE_AUTHOR("Remy Card and others"); |
40 | MODULE_DESCRIPTION("Second Extended Filesystem"); | 40 | MODULE_DESCRIPTION("Second Extended Filesystem"); |
41 | MODULE_LICENSE("GPL"); | 41 | MODULE_LICENSE("GPL"); |
42 | 42 | ||
43 | static int __ext2_write_inode(struct inode *inode, int do_sync); | 43 | static int __ext2_write_inode(struct inode *inode, int do_sync); |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Test whether an inode is a fast symlink. | 46 | * Test whether an inode is a fast symlink. |
47 | */ | 47 | */ |
48 | static inline int ext2_inode_is_fast_symlink(struct inode *inode) | 48 | static inline int ext2_inode_is_fast_symlink(struct inode *inode) |
49 | { | 49 | { |
50 | int ea_blocks = EXT2_I(inode)->i_file_acl ? | 50 | int ea_blocks = EXT2_I(inode)->i_file_acl ? |
51 | (inode->i_sb->s_blocksize >> 9) : 0; | 51 | (inode->i_sb->s_blocksize >> 9) : 0; |
52 | 52 | ||
53 | return (S_ISLNK(inode->i_mode) && | 53 | return (S_ISLNK(inode->i_mode) && |
54 | inode->i_blocks - ea_blocks == 0); | 54 | inode->i_blocks - ea_blocks == 0); |
55 | } | 55 | } |
56 | 56 | ||
57 | static void ext2_truncate_blocks(struct inode *inode, loff_t offset); | 57 | static void ext2_truncate_blocks(struct inode *inode, loff_t offset); |
58 | 58 | ||
59 | static void ext2_write_failed(struct address_space *mapping, loff_t to) | 59 | static void ext2_write_failed(struct address_space *mapping, loff_t to) |
60 | { | 60 | { |
61 | struct inode *inode = mapping->host; | 61 | struct inode *inode = mapping->host; |
62 | 62 | ||
63 | if (to > inode->i_size) { | 63 | if (to > inode->i_size) { |
64 | truncate_pagecache(inode, to, inode->i_size); | 64 | truncate_pagecache(inode, to, inode->i_size); |
65 | ext2_truncate_blocks(inode, inode->i_size); | 65 | ext2_truncate_blocks(inode, inode->i_size); |
66 | } | 66 | } |
67 | } | 67 | } |
68 | 68 | ||
69 | /* | 69 | /* |
70 | * Called at the last iput() if i_nlink is zero. | 70 | * Called at the last iput() if i_nlink is zero. |
71 | */ | 71 | */ |
72 | void ext2_evict_inode(struct inode * inode) | 72 | void ext2_evict_inode(struct inode * inode) |
73 | { | 73 | { |
74 | struct ext2_block_alloc_info *rsv; | 74 | struct ext2_block_alloc_info *rsv; |
75 | int want_delete = 0; | 75 | int want_delete = 0; |
76 | 76 | ||
77 | if (!inode->i_nlink && !is_bad_inode(inode)) { | 77 | if (!inode->i_nlink && !is_bad_inode(inode)) { |
78 | want_delete = 1; | 78 | want_delete = 1; |
79 | dquot_initialize(inode); | 79 | dquot_initialize(inode); |
80 | } else { | 80 | } else { |
81 | dquot_drop(inode); | 81 | dquot_drop(inode); |
82 | } | 82 | } |
83 | 83 | ||
84 | truncate_inode_pages(&inode->i_data, 0); | 84 | truncate_inode_pages(&inode->i_data, 0); |
85 | 85 | ||
86 | if (want_delete) { | 86 | if (want_delete) { |
87 | /* set dtime */ | 87 | /* set dtime */ |
88 | EXT2_I(inode)->i_dtime = get_seconds(); | 88 | EXT2_I(inode)->i_dtime = get_seconds(); |
89 | mark_inode_dirty(inode); | 89 | mark_inode_dirty(inode); |
90 | __ext2_write_inode(inode, inode_needs_sync(inode)); | 90 | __ext2_write_inode(inode, inode_needs_sync(inode)); |
91 | /* truncate to 0 */ | 91 | /* truncate to 0 */ |
92 | inode->i_size = 0; | 92 | inode->i_size = 0; |
93 | if (inode->i_blocks) | 93 | if (inode->i_blocks) |
94 | ext2_truncate_blocks(inode, 0); | 94 | ext2_truncate_blocks(inode, 0); |
95 | } | 95 | } |
96 | 96 | ||
97 | invalidate_inode_buffers(inode); | 97 | invalidate_inode_buffers(inode); |
98 | end_writeback(inode); | 98 | end_writeback(inode); |
99 | 99 | ||
100 | ext2_discard_reservation(inode); | 100 | ext2_discard_reservation(inode); |
101 | rsv = EXT2_I(inode)->i_block_alloc_info; | 101 | rsv = EXT2_I(inode)->i_block_alloc_info; |
102 | EXT2_I(inode)->i_block_alloc_info = NULL; | 102 | EXT2_I(inode)->i_block_alloc_info = NULL; |
103 | if (unlikely(rsv)) | 103 | if (unlikely(rsv)) |
104 | kfree(rsv); | 104 | kfree(rsv); |
105 | 105 | ||
106 | if (want_delete) | 106 | if (want_delete) |
107 | ext2_free_inode(inode); | 107 | ext2_free_inode(inode); |
108 | } | 108 | } |
109 | 109 | ||
110 | typedef struct { | 110 | typedef struct { |
111 | __le32 *p; | 111 | __le32 *p; |
112 | __le32 key; | 112 | __le32 key; |
113 | struct buffer_head *bh; | 113 | struct buffer_head *bh; |
114 | } Indirect; | 114 | } Indirect; |
115 | 115 | ||
116 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | 116 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) |
117 | { | 117 | { |
118 | p->key = *(p->p = v); | 118 | p->key = *(p->p = v); |
119 | p->bh = bh; | 119 | p->bh = bh; |
120 | } | 120 | } |
121 | 121 | ||
122 | static inline int verify_chain(Indirect *from, Indirect *to) | 122 | static inline int verify_chain(Indirect *from, Indirect *to) |
123 | { | 123 | { |
124 | while (from <= to && from->key == *from->p) | 124 | while (from <= to && from->key == *from->p) |
125 | from++; | 125 | from++; |
126 | return (from > to); | 126 | return (from > to); |
127 | } | 127 | } |
128 | 128 | ||
129 | /** | 129 | /** |
130 | * ext2_block_to_path - parse the block number into array of offsets | 130 | * ext2_block_to_path - parse the block number into array of offsets |
131 | * @inode: inode in question (we are only interested in its superblock) | 131 | * @inode: inode in question (we are only interested in its superblock) |
132 | * @i_block: block number to be parsed | 132 | * @i_block: block number to be parsed |
133 | * @offsets: array to store the offsets in | 133 | * @offsets: array to store the offsets in |
134 | * @boundary: set this non-zero if the referred-to block is likely to be | 134 | * @boundary: set this non-zero if the referred-to block is likely to be |
135 | * followed (on disk) by an indirect block. | 135 | * followed (on disk) by an indirect block. |
136 | * To store the locations of file's data ext2 uses a data structure common | 136 | * To store the locations of file's data ext2 uses a data structure common |
137 | * for UNIX filesystems - tree of pointers anchored in the inode, with | 137 | * for UNIX filesystems - tree of pointers anchored in the inode, with |
138 | * data blocks at leaves and indirect blocks in intermediate nodes. | 138 | * data blocks at leaves and indirect blocks in intermediate nodes. |
139 | * This function translates the block number into path in that tree - | 139 | * This function translates the block number into path in that tree - |
140 | * return value is the path length and @offsets[n] is the offset of | 140 | * return value is the path length and @offsets[n] is the offset of |
141 | * pointer to (n+1)th node in the nth one. If @block is out of range | 141 | * pointer to (n+1)th node in the nth one. If @block is out of range |
142 | * (negative or too large) warning is printed and zero returned. | 142 | * (negative or too large) warning is printed and zero returned. |
143 | * | 143 | * |
144 | * Note: function doesn't find node addresses, so no IO is needed. All | 144 | * Note: function doesn't find node addresses, so no IO is needed. All |
145 | * we need to know is the capacity of indirect blocks (taken from the | 145 | * we need to know is the capacity of indirect blocks (taken from the |
146 | * inode->i_sb). | 146 | * inode->i_sb). |
147 | */ | 147 | */ |
148 | 148 | ||
149 | /* | 149 | /* |
150 | * Portability note: the last comparison (check that we fit into triple | 150 | * Portability note: the last comparison (check that we fit into triple |
151 | * indirect block) is spelled differently, because otherwise on an | 151 | * indirect block) is spelled differently, because otherwise on an |
152 | * architecture with 32-bit longs and 8Kb pages we might get into trouble | 152 | * architecture with 32-bit longs and 8Kb pages we might get into trouble |
153 | * if our filesystem had 8Kb blocks. We might use long long, but that would | 153 | * if our filesystem had 8Kb blocks. We might use long long, but that would |
154 | * kill us on x86. Oh, well, at least the sign propagation does not matter - | 154 | * kill us on x86. Oh, well, at least the sign propagation does not matter - |
155 | * i_block would have to be negative in the very beginning, so we would not | 155 | * i_block would have to be negative in the very beginning, so we would not |
156 | * get there at all. | 156 | * get there at all. |
157 | */ | 157 | */ |
158 | 158 | ||
159 | static int ext2_block_to_path(struct inode *inode, | 159 | static int ext2_block_to_path(struct inode *inode, |
160 | long i_block, int offsets[4], int *boundary) | 160 | long i_block, int offsets[4], int *boundary) |
161 | { | 161 | { |
162 | int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb); | 162 | int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb); |
163 | int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb); | 163 | int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb); |
164 | const long direct_blocks = EXT2_NDIR_BLOCKS, | 164 | const long direct_blocks = EXT2_NDIR_BLOCKS, |
165 | indirect_blocks = ptrs, | 165 | indirect_blocks = ptrs, |
166 | double_blocks = (1 << (ptrs_bits * 2)); | 166 | double_blocks = (1 << (ptrs_bits * 2)); |
167 | int n = 0; | 167 | int n = 0; |
168 | int final = 0; | 168 | int final = 0; |
169 | 169 | ||
170 | if (i_block < 0) { | 170 | if (i_block < 0) { |
171 | ext2_msg(inode->i_sb, KERN_WARNING, | 171 | ext2_msg(inode->i_sb, KERN_WARNING, |
172 | "warning: %s: block < 0", __func__); | 172 | "warning: %s: block < 0", __func__); |
173 | } else if (i_block < direct_blocks) { | 173 | } else if (i_block < direct_blocks) { |
174 | offsets[n++] = i_block; | 174 | offsets[n++] = i_block; |
175 | final = direct_blocks; | 175 | final = direct_blocks; |
176 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { | 176 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { |
177 | offsets[n++] = EXT2_IND_BLOCK; | 177 | offsets[n++] = EXT2_IND_BLOCK; |
178 | offsets[n++] = i_block; | 178 | offsets[n++] = i_block; |
179 | final = ptrs; | 179 | final = ptrs; |
180 | } else if ((i_block -= indirect_blocks) < double_blocks) { | 180 | } else if ((i_block -= indirect_blocks) < double_blocks) { |
181 | offsets[n++] = EXT2_DIND_BLOCK; | 181 | offsets[n++] = EXT2_DIND_BLOCK; |
182 | offsets[n++] = i_block >> ptrs_bits; | 182 | offsets[n++] = i_block >> ptrs_bits; |
183 | offsets[n++] = i_block & (ptrs - 1); | 183 | offsets[n++] = i_block & (ptrs - 1); |
184 | final = ptrs; | 184 | final = ptrs; |
185 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { | 185 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { |
186 | offsets[n++] = EXT2_TIND_BLOCK; | 186 | offsets[n++] = EXT2_TIND_BLOCK; |
187 | offsets[n++] = i_block >> (ptrs_bits * 2); | 187 | offsets[n++] = i_block >> (ptrs_bits * 2); |
188 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); | 188 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); |
189 | offsets[n++] = i_block & (ptrs - 1); | 189 | offsets[n++] = i_block & (ptrs - 1); |
190 | final = ptrs; | 190 | final = ptrs; |
191 | } else { | 191 | } else { |
192 | ext2_msg(inode->i_sb, KERN_WARNING, | 192 | ext2_msg(inode->i_sb, KERN_WARNING, |
193 | "warning: %s: block is too big", __func__); | 193 | "warning: %s: block is too big", __func__); |
194 | } | 194 | } |
195 | if (boundary) | 195 | if (boundary) |
196 | *boundary = final - 1 - (i_block & (ptrs - 1)); | 196 | *boundary = final - 1 - (i_block & (ptrs - 1)); |
197 | 197 | ||
198 | return n; | 198 | return n; |
199 | } | 199 | } |
200 | 200 | ||
201 | /** | 201 | /** |
202 | * ext2_get_branch - read the chain of indirect blocks leading to data | 202 | * ext2_get_branch - read the chain of indirect blocks leading to data |
203 | * @inode: inode in question | 203 | * @inode: inode in question |
204 | * @depth: depth of the chain (1 - direct pointer, etc.) | 204 | * @depth: depth of the chain (1 - direct pointer, etc.) |
205 | * @offsets: offsets of pointers in inode/indirect blocks | 205 | * @offsets: offsets of pointers in inode/indirect blocks |
206 | * @chain: place to store the result | 206 | * @chain: place to store the result |
207 | * @err: here we store the error value | 207 | * @err: here we store the error value |
208 | * | 208 | * |
209 | * Function fills the array of triples <key, p, bh> and returns %NULL | 209 | * Function fills the array of triples <key, p, bh> and returns %NULL |
210 | * if everything went OK or the pointer to the last filled triple | 210 | * if everything went OK or the pointer to the last filled triple |
211 | * (incomplete one) otherwise. Upon the return chain[i].key contains | 211 | * (incomplete one) otherwise. Upon the return chain[i].key contains |
212 | * the number of (i+1)-th block in the chain (as it is stored in memory, | 212 | * the number of (i+1)-th block in the chain (as it is stored in memory, |
213 | * i.e. little-endian 32-bit), chain[i].p contains the address of that | 213 | * i.e. little-endian 32-bit), chain[i].p contains the address of that |
214 | * number (it points into struct inode for i==0 and into the bh->b_data | 214 | * number (it points into struct inode for i==0 and into the bh->b_data |
215 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect | 215 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect |
216 | * block for i>0 and NULL for i==0. In other words, it holds the block | 216 | * block for i>0 and NULL for i==0. In other words, it holds the block |
217 | * numbers of the chain, addresses they were taken from (and where we can | 217 | * numbers of the chain, addresses they were taken from (and where we can |
218 | * verify that chain did not change) and buffer_heads hosting these | 218 | * verify that chain did not change) and buffer_heads hosting these |
219 | * numbers. | 219 | * numbers. |
220 | * | 220 | * |
221 | * Function stops when it stumbles upon zero pointer (absent block) | 221 | * Function stops when it stumbles upon zero pointer (absent block) |
222 | * (pointer to last triple returned, *@err == 0) | 222 | * (pointer to last triple returned, *@err == 0) |
223 | * or when it gets an IO error reading an indirect block | 223 | * or when it gets an IO error reading an indirect block |
224 | * (ditto, *@err == -EIO) | 224 | * (ditto, *@err == -EIO) |
225 | * or when it notices that chain had been changed while it was reading | 225 | * or when it notices that chain had been changed while it was reading |
226 | * (ditto, *@err == -EAGAIN) | 226 | * (ditto, *@err == -EAGAIN) |
227 | * or when it reads all @depth-1 indirect blocks successfully and finds | 227 | * or when it reads all @depth-1 indirect blocks successfully and finds |
228 | * the whole chain, all way to the data (returns %NULL, *err == 0). | 228 | * the whole chain, all way to the data (returns %NULL, *err == 0). |
229 | */ | 229 | */ |
230 | static Indirect *ext2_get_branch(struct inode *inode, | 230 | static Indirect *ext2_get_branch(struct inode *inode, |
231 | int depth, | 231 | int depth, |
232 | int *offsets, | 232 | int *offsets, |
233 | Indirect chain[4], | 233 | Indirect chain[4], |
234 | int *err) | 234 | int *err) |
235 | { | 235 | { |
236 | struct super_block *sb = inode->i_sb; | 236 | struct super_block *sb = inode->i_sb; |
237 | Indirect *p = chain; | 237 | Indirect *p = chain; |
238 | struct buffer_head *bh; | 238 | struct buffer_head *bh; |
239 | 239 | ||
240 | *err = 0; | 240 | *err = 0; |
241 | /* i_data is not going away, no lock needed */ | 241 | /* i_data is not going away, no lock needed */ |
242 | add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets); | 242 | add_chain (chain, NULL, EXT2_I(inode)->i_data + *offsets); |
243 | if (!p->key) | 243 | if (!p->key) |
244 | goto no_block; | 244 | goto no_block; |
245 | while (--depth) { | 245 | while (--depth) { |
246 | bh = sb_bread(sb, le32_to_cpu(p->key)); | 246 | bh = sb_bread(sb, le32_to_cpu(p->key)); |
247 | if (!bh) | 247 | if (!bh) |
248 | goto failure; | 248 | goto failure; |
249 | read_lock(&EXT2_I(inode)->i_meta_lock); | 249 | read_lock(&EXT2_I(inode)->i_meta_lock); |
250 | if (!verify_chain(chain, p)) | 250 | if (!verify_chain(chain, p)) |
251 | goto changed; | 251 | goto changed; |
252 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); | 252 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); |
253 | read_unlock(&EXT2_I(inode)->i_meta_lock); | 253 | read_unlock(&EXT2_I(inode)->i_meta_lock); |
254 | if (!p->key) | 254 | if (!p->key) |
255 | goto no_block; | 255 | goto no_block; |
256 | } | 256 | } |
257 | return NULL; | 257 | return NULL; |
258 | 258 | ||
259 | changed: | 259 | changed: |
260 | read_unlock(&EXT2_I(inode)->i_meta_lock); | 260 | read_unlock(&EXT2_I(inode)->i_meta_lock); |
261 | brelse(bh); | 261 | brelse(bh); |
262 | *err = -EAGAIN; | 262 | *err = -EAGAIN; |
263 | goto no_block; | 263 | goto no_block; |
264 | failure: | 264 | failure: |
265 | *err = -EIO; | 265 | *err = -EIO; |
266 | no_block: | 266 | no_block: |
267 | return p; | 267 | return p; |
268 | } | 268 | } |
269 | 269 | ||
270 | /** | 270 | /** |
271 | * ext2_find_near - find a place for allocation with sufficient locality | 271 | * ext2_find_near - find a place for allocation with sufficient locality |
272 | * @inode: owner | 272 | * @inode: owner |
273 | * @ind: descriptor of indirect block. | 273 | * @ind: descriptor of indirect block. |
274 | * | 274 | * |
275 | * This function returns the preferred place for block allocation. | 275 | * This function returns the preferred place for block allocation. |
276 | * It is used when heuristic for sequential allocation fails. | 276 | * It is used when heuristic for sequential allocation fails. |
277 | * Rules are: | 277 | * Rules are: |
278 | * + if there is a block to the left of our position - allocate near it. | 278 | * + if there is a block to the left of our position - allocate near it. |
279 | * + if pointer will live in indirect block - allocate near that block. | 279 | * + if pointer will live in indirect block - allocate near that block. |
280 | * + if pointer will live in inode - allocate in the same cylinder group. | 280 | * + if pointer will live in inode - allocate in the same cylinder group. |
281 | * | 281 | * |
282 | * In the latter case we colour the starting block by the callers PID to | 282 | * In the latter case we colour the starting block by the callers PID to |
283 | * prevent it from clashing with concurrent allocations for a different inode | 283 | * prevent it from clashing with concurrent allocations for a different inode |
284 | * in the same block group. The PID is used here so that functionally related | 284 | * in the same block group. The PID is used here so that functionally related |
285 | * files will be close-by on-disk. | 285 | * files will be close-by on-disk. |
286 | * | 286 | * |
287 | * Caller must make sure that @ind is valid and will stay that way. | 287 | * Caller must make sure that @ind is valid and will stay that way. |
288 | */ | 288 | */ |
289 | 289 | ||
290 | static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind) | 290 | static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind) |
291 | { | 291 | { |
292 | struct ext2_inode_info *ei = EXT2_I(inode); | 292 | struct ext2_inode_info *ei = EXT2_I(inode); |
293 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; | 293 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; |
294 | __le32 *p; | 294 | __le32 *p; |
295 | ext2_fsblk_t bg_start; | 295 | ext2_fsblk_t bg_start; |
296 | ext2_fsblk_t colour; | 296 | ext2_fsblk_t colour; |
297 | 297 | ||
298 | /* Try to find previous block */ | 298 | /* Try to find previous block */ |
299 | for (p = ind->p - 1; p >= start; p--) | 299 | for (p = ind->p - 1; p >= start; p--) |
300 | if (*p) | 300 | if (*p) |
301 | return le32_to_cpu(*p); | 301 | return le32_to_cpu(*p); |
302 | 302 | ||
303 | /* No such thing, so let's try location of indirect block */ | 303 | /* No such thing, so let's try location of indirect block */ |
304 | if (ind->bh) | 304 | if (ind->bh) |
305 | return ind->bh->b_blocknr; | 305 | return ind->bh->b_blocknr; |
306 | 306 | ||
307 | /* | 307 | /* |
308 | * It is going to be referred from inode itself? OK, just put it into | 308 | * It is going to be referred from inode itself? OK, just put it into |
309 | * the same cylinder group then. | 309 | * the same cylinder group then. |
310 | */ | 310 | */ |
311 | bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group); | 311 | bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group); |
312 | colour = (current->pid % 16) * | 312 | colour = (current->pid % 16) * |
313 | (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16); | 313 | (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16); |
314 | return bg_start + colour; | 314 | return bg_start + colour; |
315 | } | 315 | } |
316 | 316 | ||
317 | /** | 317 | /** |
318 | * ext2_find_goal - find a preferred place for allocation. | 318 | * ext2_find_goal - find a preferred place for allocation. |
319 | * @inode: owner | 319 | * @inode: owner |
320 | * @block: block we want | 320 | * @block: block we want |
321 | * @partial: pointer to the last triple within a chain | 321 | * @partial: pointer to the last triple within a chain |
322 | * | 322 | * |
323 | * Returns preferred place for a block (the goal). | 323 | * Returns preferred place for a block (the goal). |
324 | */ | 324 | */ |
325 | 325 | ||
326 | static inline ext2_fsblk_t ext2_find_goal(struct inode *inode, long block, | 326 | static inline ext2_fsblk_t ext2_find_goal(struct inode *inode, long block, |
327 | Indirect *partial) | 327 | Indirect *partial) |
328 | { | 328 | { |
329 | struct ext2_block_alloc_info *block_i; | 329 | struct ext2_block_alloc_info *block_i; |
330 | 330 | ||
331 | block_i = EXT2_I(inode)->i_block_alloc_info; | 331 | block_i = EXT2_I(inode)->i_block_alloc_info; |
332 | 332 | ||
333 | /* | 333 | /* |
334 | * try the heuristic for sequential allocation, | 334 | * try the heuristic for sequential allocation, |
335 | * failing that at least try to get decent locality. | 335 | * failing that at least try to get decent locality. |
336 | */ | 336 | */ |
337 | if (block_i && (block == block_i->last_alloc_logical_block + 1) | 337 | if (block_i && (block == block_i->last_alloc_logical_block + 1) |
338 | && (block_i->last_alloc_physical_block != 0)) { | 338 | && (block_i->last_alloc_physical_block != 0)) { |
339 | return block_i->last_alloc_physical_block + 1; | 339 | return block_i->last_alloc_physical_block + 1; |
340 | } | 340 | } |
341 | 341 | ||
342 | return ext2_find_near(inode, partial); | 342 | return ext2_find_near(inode, partial); |
343 | } | 343 | } |
344 | 344 | ||
345 | /** | 345 | /** |
346 | * ext2_blks_to_allocate: Look up the block map and count the number | 346 | * ext2_blks_to_allocate: Look up the block map and count the number |
347 | * of direct blocks need to be allocated for the given branch. | 347 | * of direct blocks need to be allocated for the given branch. |
348 | * | 348 | * |
349 | * @branch: chain of indirect blocks | 349 | * @branch: chain of indirect blocks |
350 | * @k: number of blocks need for indirect blocks | 350 | * @k: number of blocks need for indirect blocks |
351 | * @blks: number of data blocks to be mapped. | 351 | * @blks: number of data blocks to be mapped. |
352 | * @blocks_to_boundary: the offset in the indirect block | 352 | * @blocks_to_boundary: the offset in the indirect block |
353 | * | 353 | * |
354 | * return the total number of blocks to be allocate, including the | 354 | * return the total number of blocks to be allocate, including the |
355 | * direct and indirect blocks. | 355 | * direct and indirect blocks. |
356 | */ | 356 | */ |
357 | static int | 357 | static int |
358 | ext2_blks_to_allocate(Indirect * branch, int k, unsigned long blks, | 358 | ext2_blks_to_allocate(Indirect * branch, int k, unsigned long blks, |
359 | int blocks_to_boundary) | 359 | int blocks_to_boundary) |
360 | { | 360 | { |
361 | unsigned long count = 0; | 361 | unsigned long count = 0; |
362 | 362 | ||
363 | /* | 363 | /* |
364 | * Simple case, [t,d]Indirect block(s) has not allocated yet | 364 | * Simple case, [t,d]Indirect block(s) has not allocated yet |
365 | * then it's clear blocks on that path have not allocated | 365 | * then it's clear blocks on that path have not allocated |
366 | */ | 366 | */ |
367 | if (k > 0) { | 367 | if (k > 0) { |
368 | /* right now don't hanel cross boundary allocation */ | 368 | /* right now don't hanel cross boundary allocation */ |
369 | if (blks < blocks_to_boundary + 1) | 369 | if (blks < blocks_to_boundary + 1) |
370 | count += blks; | 370 | count += blks; |
371 | else | 371 | else |
372 | count += blocks_to_boundary + 1; | 372 | count += blocks_to_boundary + 1; |
373 | return count; | 373 | return count; |
374 | } | 374 | } |
375 | 375 | ||
376 | count++; | 376 | count++; |
377 | while (count < blks && count <= blocks_to_boundary | 377 | while (count < blks && count <= blocks_to_boundary |
378 | && le32_to_cpu(*(branch[0].p + count)) == 0) { | 378 | && le32_to_cpu(*(branch[0].p + count)) == 0) { |
379 | count++; | 379 | count++; |
380 | } | 380 | } |
381 | return count; | 381 | return count; |
382 | } | 382 | } |
383 | 383 | ||
384 | /** | 384 | /** |
385 | * ext2_alloc_blocks: multiple allocate blocks needed for a branch | 385 | * ext2_alloc_blocks: multiple allocate blocks needed for a branch |
386 | * @indirect_blks: the number of blocks need to allocate for indirect | 386 | * @indirect_blks: the number of blocks need to allocate for indirect |
387 | * blocks | 387 | * blocks |
388 | * | 388 | * |
389 | * @new_blocks: on return it will store the new block numbers for | 389 | * @new_blocks: on return it will store the new block numbers for |
390 | * the indirect blocks(if needed) and the first direct block, | 390 | * the indirect blocks(if needed) and the first direct block, |
391 | * @blks: on return it will store the total number of allocated | 391 | * @blks: on return it will store the total number of allocated |
392 | * direct blocks | 392 | * direct blocks |
393 | */ | 393 | */ |
394 | static int ext2_alloc_blocks(struct inode *inode, | 394 | static int ext2_alloc_blocks(struct inode *inode, |
395 | ext2_fsblk_t goal, int indirect_blks, int blks, | 395 | ext2_fsblk_t goal, int indirect_blks, int blks, |
396 | ext2_fsblk_t new_blocks[4], int *err) | 396 | ext2_fsblk_t new_blocks[4], int *err) |
397 | { | 397 | { |
398 | int target, i; | 398 | int target, i; |
399 | unsigned long count = 0; | 399 | unsigned long count = 0; |
400 | int index = 0; | 400 | int index = 0; |
401 | ext2_fsblk_t current_block = 0; | 401 | ext2_fsblk_t current_block = 0; |
402 | int ret = 0; | 402 | int ret = 0; |
403 | 403 | ||
404 | /* | 404 | /* |
405 | * Here we try to allocate the requested multiple blocks at once, | 405 | * Here we try to allocate the requested multiple blocks at once, |
406 | * on a best-effort basis. | 406 | * on a best-effort basis. |
407 | * To build a branch, we should allocate blocks for | 407 | * To build a branch, we should allocate blocks for |
408 | * the indirect blocks(if not allocated yet), and at least | 408 | * the indirect blocks(if not allocated yet), and at least |
409 | * the first direct block of this branch. That's the | 409 | * the first direct block of this branch. That's the |
410 | * minimum number of blocks need to allocate(required) | 410 | * minimum number of blocks need to allocate(required) |
411 | */ | 411 | */ |
412 | target = blks + indirect_blks; | 412 | target = blks + indirect_blks; |
413 | 413 | ||
414 | while (1) { | 414 | while (1) { |
415 | count = target; | 415 | count = target; |
416 | /* allocating blocks for indirect blocks and direct blocks */ | 416 | /* allocating blocks for indirect blocks and direct blocks */ |
417 | current_block = ext2_new_blocks(inode,goal,&count,err); | 417 | current_block = ext2_new_blocks(inode,goal,&count,err); |
418 | if (*err) | 418 | if (*err) |
419 | goto failed_out; | 419 | goto failed_out; |
420 | 420 | ||
421 | target -= count; | 421 | target -= count; |
422 | /* allocate blocks for indirect blocks */ | 422 | /* allocate blocks for indirect blocks */ |
423 | while (index < indirect_blks && count) { | 423 | while (index < indirect_blks && count) { |
424 | new_blocks[index++] = current_block++; | 424 | new_blocks[index++] = current_block++; |
425 | count--; | 425 | count--; |
426 | } | 426 | } |
427 | 427 | ||
428 | if (count > 0) | 428 | if (count > 0) |
429 | break; | 429 | break; |
430 | } | 430 | } |
431 | 431 | ||
432 | /* save the new block number for the first direct block */ | 432 | /* save the new block number for the first direct block */ |
433 | new_blocks[index] = current_block; | 433 | new_blocks[index] = current_block; |
434 | 434 | ||
435 | /* total number of blocks allocated for direct blocks */ | 435 | /* total number of blocks allocated for direct blocks */ |
436 | ret = count; | 436 | ret = count; |
437 | *err = 0; | 437 | *err = 0; |
438 | return ret; | 438 | return ret; |
439 | failed_out: | 439 | failed_out: |
440 | for (i = 0; i <index; i++) | 440 | for (i = 0; i <index; i++) |
441 | ext2_free_blocks(inode, new_blocks[i], 1); | 441 | ext2_free_blocks(inode, new_blocks[i], 1); |
442 | if (index) | 442 | if (index) |
443 | mark_inode_dirty(inode); | 443 | mark_inode_dirty(inode); |
444 | return ret; | 444 | return ret; |
445 | } | 445 | } |
446 | 446 | ||
447 | /** | 447 | /** |
448 | * ext2_alloc_branch - allocate and set up a chain of blocks. | 448 | * ext2_alloc_branch - allocate and set up a chain of blocks. |
449 | * @inode: owner | 449 | * @inode: owner |
450 | * @num: depth of the chain (number of blocks to allocate) | 450 | * @num: depth of the chain (number of blocks to allocate) |
451 | * @offsets: offsets (in the blocks) to store the pointers to next. | 451 | * @offsets: offsets (in the blocks) to store the pointers to next. |
452 | * @branch: place to store the chain in. | 452 | * @branch: place to store the chain in. |
453 | * | 453 | * |
454 | * This function allocates @num blocks, zeroes out all but the last one, | 454 | * This function allocates @num blocks, zeroes out all but the last one, |
455 | * links them into chain and (if we are synchronous) writes them to disk. | 455 | * links them into chain and (if we are synchronous) writes them to disk. |
456 | * In other words, it prepares a branch that can be spliced onto the | 456 | * In other words, it prepares a branch that can be spliced onto the |
457 | * inode. It stores the information about that chain in the branch[], in | 457 | * inode. It stores the information about that chain in the branch[], in |
458 | * the same format as ext2_get_branch() would do. We are calling it after | 458 | * the same format as ext2_get_branch() would do. We are calling it after |
459 | * we had read the existing part of chain and partial points to the last | 459 | * we had read the existing part of chain and partial points to the last |
460 | * triple of that (one with zero ->key). Upon the exit we have the same | 460 | * triple of that (one with zero ->key). Upon the exit we have the same |
461 | * picture as after the successful ext2_get_block(), except that in one | 461 | * picture as after the successful ext2_get_block(), except that in one |
462 | * place chain is disconnected - *branch->p is still zero (we did not | 462 | * place chain is disconnected - *branch->p is still zero (we did not |
463 | * set the last link), but branch->key contains the number that should | 463 | * set the last link), but branch->key contains the number that should |
464 | * be placed into *branch->p to fill that gap. | 464 | * be placed into *branch->p to fill that gap. |
465 | * | 465 | * |
466 | * If allocation fails we free all blocks we've allocated (and forget | 466 | * If allocation fails we free all blocks we've allocated (and forget |
467 | * their buffer_heads) and return the error value the from failed | 467 | * their buffer_heads) and return the error value the from failed |
468 | * ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain | 468 | * ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain |
469 | * as described above and return 0. | 469 | * as described above and return 0. |
470 | */ | 470 | */ |
471 | 471 | ||
472 | static int ext2_alloc_branch(struct inode *inode, | 472 | static int ext2_alloc_branch(struct inode *inode, |
473 | int indirect_blks, int *blks, ext2_fsblk_t goal, | 473 | int indirect_blks, int *blks, ext2_fsblk_t goal, |
474 | int *offsets, Indirect *branch) | 474 | int *offsets, Indirect *branch) |
475 | { | 475 | { |
476 | int blocksize = inode->i_sb->s_blocksize; | 476 | int blocksize = inode->i_sb->s_blocksize; |
477 | int i, n = 0; | 477 | int i, n = 0; |
478 | int err = 0; | 478 | int err = 0; |
479 | struct buffer_head *bh; | 479 | struct buffer_head *bh; |
480 | int num; | 480 | int num; |
481 | ext2_fsblk_t new_blocks[4]; | 481 | ext2_fsblk_t new_blocks[4]; |
482 | ext2_fsblk_t current_block; | 482 | ext2_fsblk_t current_block; |
483 | 483 | ||
484 | num = ext2_alloc_blocks(inode, goal, indirect_blks, | 484 | num = ext2_alloc_blocks(inode, goal, indirect_blks, |
485 | *blks, new_blocks, &err); | 485 | *blks, new_blocks, &err); |
486 | if (err) | 486 | if (err) |
487 | return err; | 487 | return err; |
488 | 488 | ||
489 | branch[0].key = cpu_to_le32(new_blocks[0]); | 489 | branch[0].key = cpu_to_le32(new_blocks[0]); |
490 | /* | 490 | /* |
491 | * metadata blocks and data blocks are allocated. | 491 | * metadata blocks and data blocks are allocated. |
492 | */ | 492 | */ |
493 | for (n = 1; n <= indirect_blks; n++) { | 493 | for (n = 1; n <= indirect_blks; n++) { |
494 | /* | 494 | /* |
495 | * Get buffer_head for parent block, zero it out | 495 | * Get buffer_head for parent block, zero it out |
496 | * and set the pointer to new one, then send | 496 | * and set the pointer to new one, then send |
497 | * parent to disk. | 497 | * parent to disk. |
498 | */ | 498 | */ |
499 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 499 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
500 | branch[n].bh = bh; | 500 | branch[n].bh = bh; |
501 | lock_buffer(bh); | 501 | lock_buffer(bh); |
502 | memset(bh->b_data, 0, blocksize); | 502 | memset(bh->b_data, 0, blocksize); |
503 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | 503 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; |
504 | branch[n].key = cpu_to_le32(new_blocks[n]); | 504 | branch[n].key = cpu_to_le32(new_blocks[n]); |
505 | *branch[n].p = branch[n].key; | 505 | *branch[n].p = branch[n].key; |
506 | if ( n == indirect_blks) { | 506 | if ( n == indirect_blks) { |
507 | current_block = new_blocks[n]; | 507 | current_block = new_blocks[n]; |
508 | /* | 508 | /* |
509 | * End of chain, update the last new metablock of | 509 | * End of chain, update the last new metablock of |
510 | * the chain to point to the new allocated | 510 | * the chain to point to the new allocated |
511 | * data blocks numbers | 511 | * data blocks numbers |
512 | */ | 512 | */ |
513 | for (i=1; i < num; i++) | 513 | for (i=1; i < num; i++) |
514 | *(branch[n].p + i) = cpu_to_le32(++current_block); | 514 | *(branch[n].p + i) = cpu_to_le32(++current_block); |
515 | } | 515 | } |
516 | set_buffer_uptodate(bh); | 516 | set_buffer_uptodate(bh); |
517 | unlock_buffer(bh); | 517 | unlock_buffer(bh); |
518 | mark_buffer_dirty_inode(bh, inode); | 518 | mark_buffer_dirty_inode(bh, inode); |
519 | /* We used to sync bh here if IS_SYNC(inode). | 519 | /* We used to sync bh here if IS_SYNC(inode). |
520 | * But we now rely upon generic_write_sync() | 520 | * But we now rely upon generic_write_sync() |
521 | * and b_inode_buffers. But not for directories. | 521 | * and b_inode_buffers. But not for directories. |
522 | */ | 522 | */ |
523 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) | 523 | if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) |
524 | sync_dirty_buffer(bh); | 524 | sync_dirty_buffer(bh); |
525 | } | 525 | } |
526 | *blks = num; | 526 | *blks = num; |
527 | return err; | 527 | return err; |
528 | } | 528 | } |
529 | 529 | ||
530 | /** | 530 | /** |
531 | * ext2_splice_branch - splice the allocated branch onto inode. | 531 | * ext2_splice_branch - splice the allocated branch onto inode. |
532 | * @inode: owner | 532 | * @inode: owner |
533 | * @block: (logical) number of block we are adding | 533 | * @block: (logical) number of block we are adding |
534 | * @where: location of missing link | 534 | * @where: location of missing link |
535 | * @num: number of indirect blocks we are adding | 535 | * @num: number of indirect blocks we are adding |
536 | * @blks: number of direct blocks we are adding | 536 | * @blks: number of direct blocks we are adding |
537 | * | 537 | * |
538 | * This function fills the missing link and does all housekeeping needed in | 538 | * This function fills the missing link and does all housekeeping needed in |
539 | * inode (->i_blocks, etc.). In case of success we end up with the full | 539 | * inode (->i_blocks, etc.). In case of success we end up with the full |
540 | * chain to new block and return 0. | 540 | * chain to new block and return 0. |
541 | */ | 541 | */ |
542 | static void ext2_splice_branch(struct inode *inode, | 542 | static void ext2_splice_branch(struct inode *inode, |
543 | long block, Indirect *where, int num, int blks) | 543 | long block, Indirect *where, int num, int blks) |
544 | { | 544 | { |
545 | int i; | 545 | int i; |
546 | struct ext2_block_alloc_info *block_i; | 546 | struct ext2_block_alloc_info *block_i; |
547 | ext2_fsblk_t current_block; | 547 | ext2_fsblk_t current_block; |
548 | 548 | ||
549 | block_i = EXT2_I(inode)->i_block_alloc_info; | 549 | block_i = EXT2_I(inode)->i_block_alloc_info; |
550 | 550 | ||
551 | /* XXX LOCKING probably should have i_meta_lock ?*/ | 551 | /* XXX LOCKING probably should have i_meta_lock ?*/ |
552 | /* That's it */ | 552 | /* That's it */ |
553 | 553 | ||
554 | *where->p = where->key; | 554 | *where->p = where->key; |
555 | 555 | ||
556 | /* | 556 | /* |
557 | * Update the host buffer_head or inode to point to more just allocated | 557 | * Update the host buffer_head or inode to point to more just allocated |
558 | * direct blocks blocks | 558 | * direct blocks blocks |
559 | */ | 559 | */ |
560 | if (num == 0 && blks > 1) { | 560 | if (num == 0 && blks > 1) { |
561 | current_block = le32_to_cpu(where->key) + 1; | 561 | current_block = le32_to_cpu(where->key) + 1; |
562 | for (i = 1; i < blks; i++) | 562 | for (i = 1; i < blks; i++) |
563 | *(where->p + i ) = cpu_to_le32(current_block++); | 563 | *(where->p + i ) = cpu_to_le32(current_block++); |
564 | } | 564 | } |
565 | 565 | ||
566 | /* | 566 | /* |
567 | * update the most recently allocated logical & physical block | 567 | * update the most recently allocated logical & physical block |
568 | * in i_block_alloc_info, to assist find the proper goal block for next | 568 | * in i_block_alloc_info, to assist find the proper goal block for next |
569 | * allocation | 569 | * allocation |
570 | */ | 570 | */ |
571 | if (block_i) { | 571 | if (block_i) { |
572 | block_i->last_alloc_logical_block = block + blks - 1; | 572 | block_i->last_alloc_logical_block = block + blks - 1; |
573 | block_i->last_alloc_physical_block = | 573 | block_i->last_alloc_physical_block = |
574 | le32_to_cpu(where[num].key) + blks - 1; | 574 | le32_to_cpu(where[num].key) + blks - 1; |
575 | } | 575 | } |
576 | 576 | ||
577 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 577 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
578 | 578 | ||
579 | /* had we spliced it onto indirect block? */ | 579 | /* had we spliced it onto indirect block? */ |
580 | if (where->bh) | 580 | if (where->bh) |
581 | mark_buffer_dirty_inode(where->bh, inode); | 581 | mark_buffer_dirty_inode(where->bh, inode); |
582 | 582 | ||
583 | inode->i_ctime = CURRENT_TIME_SEC; | 583 | inode->i_ctime = CURRENT_TIME_SEC; |
584 | mark_inode_dirty(inode); | 584 | mark_inode_dirty(inode); |
585 | } | 585 | } |
586 | 586 | ||
587 | /* | 587 | /* |
588 | * Allocation strategy is simple: if we have to allocate something, we will | 588 | * Allocation strategy is simple: if we have to allocate something, we will |
589 | * have to go the whole way to leaf. So let's do it before attaching anything | 589 | * have to go the whole way to leaf. So let's do it before attaching anything |
590 | * to tree, set linkage between the newborn blocks, write them if sync is | 590 | * to tree, set linkage between the newborn blocks, write them if sync is |
591 | * required, recheck the path, free and repeat if check fails, otherwise | 591 | * required, recheck the path, free and repeat if check fails, otherwise |
592 | * set the last missing link (that will protect us from any truncate-generated | 592 | * set the last missing link (that will protect us from any truncate-generated |
593 | * removals - all blocks on the path are immune now) and possibly force the | 593 | * removals - all blocks on the path are immune now) and possibly force the |
594 | * write on the parent block. | 594 | * write on the parent block. |
595 | * That has a nice additional property: no special recovery from the failed | 595 | * That has a nice additional property: no special recovery from the failed |
596 | * allocations is needed - we simply release blocks and do not touch anything | 596 | * allocations is needed - we simply release blocks and do not touch anything |
597 | * reachable from inode. | 597 | * reachable from inode. |
598 | * | 598 | * |
599 | * `handle' can be NULL if create == 0. | 599 | * `handle' can be NULL if create == 0. |
600 | * | 600 | * |
601 | * return > 0, # of blocks mapped or allocated. | 601 | * return > 0, # of blocks mapped or allocated. |
602 | * return = 0, if plain lookup failed. | 602 | * return = 0, if plain lookup failed. |
603 | * return < 0, error case. | 603 | * return < 0, error case. |
604 | */ | 604 | */ |
605 | static int ext2_get_blocks(struct inode *inode, | 605 | static int ext2_get_blocks(struct inode *inode, |
606 | sector_t iblock, unsigned long maxblocks, | 606 | sector_t iblock, unsigned long maxblocks, |
607 | struct buffer_head *bh_result, | 607 | struct buffer_head *bh_result, |
608 | int create) | 608 | int create) |
609 | { | 609 | { |
610 | int err = -EIO; | 610 | int err = -EIO; |
611 | int offsets[4]; | 611 | int offsets[4]; |
612 | Indirect chain[4]; | 612 | Indirect chain[4]; |
613 | Indirect *partial; | 613 | Indirect *partial; |
614 | ext2_fsblk_t goal; | 614 | ext2_fsblk_t goal; |
615 | int indirect_blks; | 615 | int indirect_blks; |
616 | int blocks_to_boundary = 0; | 616 | int blocks_to_boundary = 0; |
617 | int depth; | 617 | int depth; |
618 | struct ext2_inode_info *ei = EXT2_I(inode); | 618 | struct ext2_inode_info *ei = EXT2_I(inode); |
619 | int count = 0; | 619 | int count = 0; |
620 | ext2_fsblk_t first_block = 0; | 620 | ext2_fsblk_t first_block = 0; |
621 | 621 | ||
622 | depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); | 622 | depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); |
623 | 623 | ||
624 | if (depth == 0) | 624 | if (depth == 0) |
625 | return (err); | 625 | return (err); |
626 | 626 | ||
627 | partial = ext2_get_branch(inode, depth, offsets, chain, &err); | 627 | partial = ext2_get_branch(inode, depth, offsets, chain, &err); |
628 | /* Simplest case - block found, no allocation needed */ | 628 | /* Simplest case - block found, no allocation needed */ |
629 | if (!partial) { | 629 | if (!partial) { |
630 | first_block = le32_to_cpu(chain[depth - 1].key); | 630 | first_block = le32_to_cpu(chain[depth - 1].key); |
631 | clear_buffer_new(bh_result); /* What's this do? */ | 631 | clear_buffer_new(bh_result); /* What's this do? */ |
632 | count++; | 632 | count++; |
633 | /*map more blocks*/ | 633 | /*map more blocks*/ |
634 | while (count < maxblocks && count <= blocks_to_boundary) { | 634 | while (count < maxblocks && count <= blocks_to_boundary) { |
635 | ext2_fsblk_t blk; | 635 | ext2_fsblk_t blk; |
636 | 636 | ||
637 | if (!verify_chain(chain, chain + depth - 1)) { | 637 | if (!verify_chain(chain, chain + depth - 1)) { |
638 | /* | 638 | /* |
639 | * Indirect block might be removed by | 639 | * Indirect block might be removed by |
640 | * truncate while we were reading it. | 640 | * truncate while we were reading it. |
641 | * Handling of that case: forget what we've | 641 | * Handling of that case: forget what we've |
642 | * got now, go to reread. | 642 | * got now, go to reread. |
643 | */ | 643 | */ |
644 | err = -EAGAIN; | 644 | err = -EAGAIN; |
645 | count = 0; | 645 | count = 0; |
646 | break; | 646 | break; |
647 | } | 647 | } |
648 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | 648 | blk = le32_to_cpu(*(chain[depth-1].p + count)); |
649 | if (blk == first_block + count) | 649 | if (blk == first_block + count) |
650 | count++; | 650 | count++; |
651 | else | 651 | else |
652 | break; | 652 | break; |
653 | } | 653 | } |
654 | if (err != -EAGAIN) | 654 | if (err != -EAGAIN) |
655 | goto got_it; | 655 | goto got_it; |
656 | } | 656 | } |
657 | 657 | ||
658 | /* Next simple case - plain lookup or failed read of indirect block */ | 658 | /* Next simple case - plain lookup or failed read of indirect block */ |
659 | if (!create || err == -EIO) | 659 | if (!create || err == -EIO) |
660 | goto cleanup; | 660 | goto cleanup; |
661 | 661 | ||
662 | mutex_lock(&ei->truncate_mutex); | 662 | mutex_lock(&ei->truncate_mutex); |
663 | /* | 663 | /* |
664 | * If the indirect block is missing while we are reading | 664 | * If the indirect block is missing while we are reading |
665 | * the chain(ext2_get_branch() returns -EAGAIN err), or | 665 | * the chain(ext2_get_branch() returns -EAGAIN err), or |
666 | * if the chain has been changed after we grab the semaphore, | 666 | * if the chain has been changed after we grab the semaphore, |
667 | * (either because another process truncated this branch, or | 667 | * (either because another process truncated this branch, or |
668 | * another get_block allocated this branch) re-grab the chain to see if | 668 | * another get_block allocated this branch) re-grab the chain to see if |
669 | * the request block has been allocated or not. | 669 | * the request block has been allocated or not. |
670 | * | 670 | * |
671 | * Since we already block the truncate/other get_block | 671 | * Since we already block the truncate/other get_block |
672 | * at this point, we will have the current copy of the chain when we | 672 | * at this point, we will have the current copy of the chain when we |
673 | * splice the branch into the tree. | 673 | * splice the branch into the tree. |
674 | */ | 674 | */ |
675 | if (err == -EAGAIN || !verify_chain(chain, partial)) { | 675 | if (err == -EAGAIN || !verify_chain(chain, partial)) { |
676 | while (partial > chain) { | 676 | while (partial > chain) { |
677 | brelse(partial->bh); | 677 | brelse(partial->bh); |
678 | partial--; | 678 | partial--; |
679 | } | 679 | } |
680 | partial = ext2_get_branch(inode, depth, offsets, chain, &err); | 680 | partial = ext2_get_branch(inode, depth, offsets, chain, &err); |
681 | if (!partial) { | 681 | if (!partial) { |
682 | count++; | 682 | count++; |
683 | mutex_unlock(&ei->truncate_mutex); | 683 | mutex_unlock(&ei->truncate_mutex); |
684 | if (err) | 684 | if (err) |
685 | goto cleanup; | 685 | goto cleanup; |
686 | clear_buffer_new(bh_result); | 686 | clear_buffer_new(bh_result); |
687 | goto got_it; | 687 | goto got_it; |
688 | } | 688 | } |
689 | } | 689 | } |
690 | 690 | ||
691 | /* | 691 | /* |
692 | * Okay, we need to do block allocation. Lazily initialize the block | 692 | * Okay, we need to do block allocation. Lazily initialize the block |
693 | * allocation info here if necessary | 693 | * allocation info here if necessary |
694 | */ | 694 | */ |
695 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) | 695 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) |
696 | ext2_init_block_alloc_info(inode); | 696 | ext2_init_block_alloc_info(inode); |
697 | 697 | ||
698 | goal = ext2_find_goal(inode, iblock, partial); | 698 | goal = ext2_find_goal(inode, iblock, partial); |
699 | 699 | ||
700 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 700 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
701 | indirect_blks = (chain + depth) - partial - 1; | 701 | indirect_blks = (chain + depth) - partial - 1; |
702 | /* | 702 | /* |
703 | * Next look up the indirect map to count the totoal number of | 703 | * Next look up the indirect map to count the totoal number of |
704 | * direct blocks to allocate for this branch. | 704 | * direct blocks to allocate for this branch. |
705 | */ | 705 | */ |
706 | count = ext2_blks_to_allocate(partial, indirect_blks, | 706 | count = ext2_blks_to_allocate(partial, indirect_blks, |
707 | maxblocks, blocks_to_boundary); | 707 | maxblocks, blocks_to_boundary); |
708 | /* | 708 | /* |
709 | * XXX ???? Block out ext2_truncate while we alter the tree | 709 | * XXX ???? Block out ext2_truncate while we alter the tree |
710 | */ | 710 | */ |
711 | err = ext2_alloc_branch(inode, indirect_blks, &count, goal, | 711 | err = ext2_alloc_branch(inode, indirect_blks, &count, goal, |
712 | offsets + (partial - chain), partial); | 712 | offsets + (partial - chain), partial); |
713 | 713 | ||
714 | if (err) { | 714 | if (err) { |
715 | mutex_unlock(&ei->truncate_mutex); | 715 | mutex_unlock(&ei->truncate_mutex); |
716 | goto cleanup; | 716 | goto cleanup; |
717 | } | 717 | } |
718 | 718 | ||
719 | if (ext2_use_xip(inode->i_sb)) { | 719 | if (ext2_use_xip(inode->i_sb)) { |
720 | /* | 720 | /* |
721 | * we need to clear the block | 721 | * we need to clear the block |
722 | */ | 722 | */ |
723 | err = ext2_clear_xip_target (inode, | 723 | err = ext2_clear_xip_target (inode, |
724 | le32_to_cpu(chain[depth-1].key)); | 724 | le32_to_cpu(chain[depth-1].key)); |
725 | if (err) { | 725 | if (err) { |
726 | mutex_unlock(&ei->truncate_mutex); | 726 | mutex_unlock(&ei->truncate_mutex); |
727 | goto cleanup; | 727 | goto cleanup; |
728 | } | 728 | } |
729 | } | 729 | } |
730 | 730 | ||
731 | ext2_splice_branch(inode, iblock, partial, indirect_blks, count); | 731 | ext2_splice_branch(inode, iblock, partial, indirect_blks, count); |
732 | mutex_unlock(&ei->truncate_mutex); | 732 | mutex_unlock(&ei->truncate_mutex); |
733 | set_buffer_new(bh_result); | 733 | set_buffer_new(bh_result); |
734 | got_it: | 734 | got_it: |
735 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 735 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); |
736 | if (count > blocks_to_boundary) | 736 | if (count > blocks_to_boundary) |
737 | set_buffer_boundary(bh_result); | 737 | set_buffer_boundary(bh_result); |
738 | err = count; | 738 | err = count; |
739 | /* Clean up and exit */ | 739 | /* Clean up and exit */ |
740 | partial = chain + depth - 1; /* the whole chain */ | 740 | partial = chain + depth - 1; /* the whole chain */ |
741 | cleanup: | 741 | cleanup: |
742 | while (partial > chain) { | 742 | while (partial > chain) { |
743 | brelse(partial->bh); | 743 | brelse(partial->bh); |
744 | partial--; | 744 | partial--; |
745 | } | 745 | } |
746 | return err; | 746 | return err; |
747 | } | 747 | } |
748 | 748 | ||
749 | int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) | 749 | int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) |
750 | { | 750 | { |
751 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 751 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
752 | int ret = ext2_get_blocks(inode, iblock, max_blocks, | 752 | int ret = ext2_get_blocks(inode, iblock, max_blocks, |
753 | bh_result, create); | 753 | bh_result, create); |
754 | if (ret > 0) { | 754 | if (ret > 0) { |
755 | bh_result->b_size = (ret << inode->i_blkbits); | 755 | bh_result->b_size = (ret << inode->i_blkbits); |
756 | ret = 0; | 756 | ret = 0; |
757 | } | 757 | } |
758 | return ret; | 758 | return ret; |
759 | 759 | ||
760 | } | 760 | } |
761 | 761 | ||
762 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 762 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
763 | u64 start, u64 len) | 763 | u64 start, u64 len) |
764 | { | 764 | { |
765 | return generic_block_fiemap(inode, fieinfo, start, len, | 765 | return generic_block_fiemap(inode, fieinfo, start, len, |
766 | ext2_get_block); | 766 | ext2_get_block); |
767 | } | 767 | } |
768 | 768 | ||
769 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) | 769 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) |
770 | { | 770 | { |
771 | return block_write_full_page(page, ext2_get_block, wbc); | 771 | return block_write_full_page(page, ext2_get_block, wbc); |
772 | } | 772 | } |
773 | 773 | ||
774 | static int ext2_readpage(struct file *file, struct page *page) | 774 | static int ext2_readpage(struct file *file, struct page *page) |
775 | { | 775 | { |
776 | return mpage_readpage(page, ext2_get_block); | 776 | return mpage_readpage(page, ext2_get_block); |
777 | } | 777 | } |
778 | 778 | ||
779 | static int | 779 | static int |
780 | ext2_readpages(struct file *file, struct address_space *mapping, | 780 | ext2_readpages(struct file *file, struct address_space *mapping, |
781 | struct list_head *pages, unsigned nr_pages) | 781 | struct list_head *pages, unsigned nr_pages) |
782 | { | 782 | { |
783 | return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); | 783 | return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); |
784 | } | 784 | } |
785 | 785 | ||
786 | static int | 786 | static int |
787 | ext2_write_begin(struct file *file, struct address_space *mapping, | 787 | ext2_write_begin(struct file *file, struct address_space *mapping, |
788 | loff_t pos, unsigned len, unsigned flags, | 788 | loff_t pos, unsigned len, unsigned flags, |
789 | struct page **pagep, void **fsdata) | 789 | struct page **pagep, void **fsdata) |
790 | { | 790 | { |
791 | int ret; | 791 | int ret; |
792 | 792 | ||
793 | ret = block_write_begin(mapping, pos, len, flags, pagep, | 793 | ret = block_write_begin(mapping, pos, len, flags, pagep, |
794 | ext2_get_block); | 794 | ext2_get_block); |
795 | if (ret < 0) | 795 | if (ret < 0) |
796 | ext2_write_failed(mapping, pos + len); | 796 | ext2_write_failed(mapping, pos + len); |
797 | return ret; | 797 | return ret; |
798 | } | 798 | } |
799 | 799 | ||
800 | static int ext2_write_end(struct file *file, struct address_space *mapping, | 800 | static int ext2_write_end(struct file *file, struct address_space *mapping, |
801 | loff_t pos, unsigned len, unsigned copied, | 801 | loff_t pos, unsigned len, unsigned copied, |
802 | struct page *page, void *fsdata) | 802 | struct page *page, void *fsdata) |
803 | { | 803 | { |
804 | int ret; | 804 | int ret; |
805 | 805 | ||
806 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 806 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
807 | if (ret < len) | 807 | if (ret < len) |
808 | ext2_write_failed(mapping, pos + len); | 808 | ext2_write_failed(mapping, pos + len); |
809 | return ret; | 809 | return ret; |
810 | } | 810 | } |
811 | 811 | ||
812 | static int | 812 | static int |
813 | ext2_nobh_write_begin(struct file *file, struct address_space *mapping, | 813 | ext2_nobh_write_begin(struct file *file, struct address_space *mapping, |
814 | loff_t pos, unsigned len, unsigned flags, | 814 | loff_t pos, unsigned len, unsigned flags, |
815 | struct page **pagep, void **fsdata) | 815 | struct page **pagep, void **fsdata) |
816 | { | 816 | { |
817 | int ret; | 817 | int ret; |
818 | 818 | ||
819 | ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, | 819 | ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, |
820 | ext2_get_block); | 820 | ext2_get_block); |
821 | if (ret < 0) | 821 | if (ret < 0) |
822 | ext2_write_failed(mapping, pos + len); | 822 | ext2_write_failed(mapping, pos + len); |
823 | return ret; | 823 | return ret; |
824 | } | 824 | } |
825 | 825 | ||
826 | static int ext2_nobh_writepage(struct page *page, | 826 | static int ext2_nobh_writepage(struct page *page, |
827 | struct writeback_control *wbc) | 827 | struct writeback_control *wbc) |
828 | { | 828 | { |
829 | return nobh_writepage(page, ext2_get_block, wbc); | 829 | return nobh_writepage(page, ext2_get_block, wbc); |
830 | } | 830 | } |
831 | 831 | ||
832 | static sector_t ext2_bmap(struct address_space *mapping, sector_t block) | 832 | static sector_t ext2_bmap(struct address_space *mapping, sector_t block) |
833 | { | 833 | { |
834 | return generic_block_bmap(mapping,block,ext2_get_block); | 834 | return generic_block_bmap(mapping,block,ext2_get_block); |
835 | } | 835 | } |
836 | 836 | ||
837 | static ssize_t | 837 | static ssize_t |
838 | ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 838 | ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, |
839 | loff_t offset, unsigned long nr_segs) | 839 | loff_t offset, unsigned long nr_segs) |
840 | { | 840 | { |
841 | struct file *file = iocb->ki_filp; | 841 | struct file *file = iocb->ki_filp; |
842 | struct address_space *mapping = file->f_mapping; | 842 | struct address_space *mapping = file->f_mapping; |
843 | struct inode *inode = mapping->host; | 843 | struct inode *inode = mapping->host; |
844 | ssize_t ret; | 844 | ssize_t ret; |
845 | 845 | ||
846 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, | 846 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, |
847 | iov, offset, nr_segs, ext2_get_block, NULL); | 847 | iov, offset, nr_segs, ext2_get_block, NULL); |
848 | if (ret < 0 && (rw & WRITE)) | 848 | if (ret < 0 && (rw & WRITE)) |
849 | ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); | 849 | ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); |
850 | return ret; | 850 | return ret; |
851 | } | 851 | } |
852 | 852 | ||
853 | static int | 853 | static int |
854 | ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) | 854 | ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) |
855 | { | 855 | { |
856 | return mpage_writepages(mapping, wbc, ext2_get_block); | 856 | return mpage_writepages(mapping, wbc, ext2_get_block); |
857 | } | 857 | } |
858 | 858 | ||
859 | const struct address_space_operations ext2_aops = { | 859 | const struct address_space_operations ext2_aops = { |
860 | .readpage = ext2_readpage, | 860 | .readpage = ext2_readpage, |
861 | .readpages = ext2_readpages, | 861 | .readpages = ext2_readpages, |
862 | .writepage = ext2_writepage, | 862 | .writepage = ext2_writepage, |
863 | .write_begin = ext2_write_begin, | 863 | .write_begin = ext2_write_begin, |
864 | .write_end = ext2_write_end, | 864 | .write_end = ext2_write_end, |
865 | .bmap = ext2_bmap, | 865 | .bmap = ext2_bmap, |
866 | .direct_IO = ext2_direct_IO, | 866 | .direct_IO = ext2_direct_IO, |
867 | .writepages = ext2_writepages, | 867 | .writepages = ext2_writepages, |
868 | .migratepage = buffer_migrate_page, | 868 | .migratepage = buffer_migrate_page, |
869 | .is_partially_uptodate = block_is_partially_uptodate, | 869 | .is_partially_uptodate = block_is_partially_uptodate, |
870 | .error_remove_page = generic_error_remove_page, | 870 | .error_remove_page = generic_error_remove_page, |
871 | }; | 871 | }; |
872 | 872 | ||
873 | const struct address_space_operations ext2_aops_xip = { | 873 | const struct address_space_operations ext2_aops_xip = { |
874 | .bmap = ext2_bmap, | 874 | .bmap = ext2_bmap, |
875 | .get_xip_mem = ext2_get_xip_mem, | 875 | .get_xip_mem = ext2_get_xip_mem, |
876 | }; | 876 | }; |
877 | 877 | ||
878 | const struct address_space_operations ext2_nobh_aops = { | 878 | const struct address_space_operations ext2_nobh_aops = { |
879 | .readpage = ext2_readpage, | 879 | .readpage = ext2_readpage, |
880 | .readpages = ext2_readpages, | 880 | .readpages = ext2_readpages, |
881 | .writepage = ext2_nobh_writepage, | 881 | .writepage = ext2_nobh_writepage, |
882 | .write_begin = ext2_nobh_write_begin, | 882 | .write_begin = ext2_nobh_write_begin, |
883 | .write_end = nobh_write_end, | 883 | .write_end = nobh_write_end, |
884 | .bmap = ext2_bmap, | 884 | .bmap = ext2_bmap, |
885 | .direct_IO = ext2_direct_IO, | 885 | .direct_IO = ext2_direct_IO, |
886 | .writepages = ext2_writepages, | 886 | .writepages = ext2_writepages, |
887 | .migratepage = buffer_migrate_page, | 887 | .migratepage = buffer_migrate_page, |
888 | .error_remove_page = generic_error_remove_page, | 888 | .error_remove_page = generic_error_remove_page, |
889 | }; | 889 | }; |
890 | 890 | ||
891 | /* | 891 | /* |
892 | * Probably it should be a library function... search for first non-zero word | 892 | * Probably it should be a library function... search for first non-zero word |
893 | * or memcmp with zero_page, whatever is better for particular architecture. | 893 | * or memcmp with zero_page, whatever is better for particular architecture. |
894 | * Linus? | 894 | * Linus? |
895 | */ | 895 | */ |
896 | static inline int all_zeroes(__le32 *p, __le32 *q) | 896 | static inline int all_zeroes(__le32 *p, __le32 *q) |
897 | { | 897 | { |
898 | while (p < q) | 898 | while (p < q) |
899 | if (*p++) | 899 | if (*p++) |
900 | return 0; | 900 | return 0; |
901 | return 1; | 901 | return 1; |
902 | } | 902 | } |
903 | 903 | ||
904 | /** | 904 | /** |
905 | * ext2_find_shared - find the indirect blocks for partial truncation. | 905 | * ext2_find_shared - find the indirect blocks for partial truncation. |
906 | * @inode: inode in question | 906 | * @inode: inode in question |
907 | * @depth: depth of the affected branch | 907 | * @depth: depth of the affected branch |
908 | * @offsets: offsets of pointers in that branch (see ext2_block_to_path) | 908 | * @offsets: offsets of pointers in that branch (see ext2_block_to_path) |
909 | * @chain: place to store the pointers to partial indirect blocks | 909 | * @chain: place to store the pointers to partial indirect blocks |
910 | * @top: place to the (detached) top of branch | 910 | * @top: place to the (detached) top of branch |
911 | * | 911 | * |
912 | * This is a helper function used by ext2_truncate(). | 912 | * This is a helper function used by ext2_truncate(). |
913 | * | 913 | * |
914 | * When we do truncate() we may have to clean the ends of several indirect | 914 | * When we do truncate() we may have to clean the ends of several indirect |
915 | * blocks but leave the blocks themselves alive. Block is partially | 915 | * blocks but leave the blocks themselves alive. Block is partially |
916 | * truncated if some data below the new i_size is referred from it (and | 916 | * truncated if some data below the new i_size is referred from it (and |
917 | * it is on the path to the first completely truncated data block, indeed). | 917 | * it is on the path to the first completely truncated data block, indeed). |
918 | * We have to free the top of that path along with everything to the right | 918 | * We have to free the top of that path along with everything to the right |
919 | * of the path. Since no allocation past the truncation point is possible | 919 | * of the path. Since no allocation past the truncation point is possible |
920 | * until ext2_truncate() finishes, we may safely do the latter, but top | 920 | * until ext2_truncate() finishes, we may safely do the latter, but top |
921 | * of branch may require special attention - pageout below the truncation | 921 | * of branch may require special attention - pageout below the truncation |
922 | * point might try to populate it. | 922 | * point might try to populate it. |
923 | * | 923 | * |
924 | * We atomically detach the top of branch from the tree, store the block | 924 | * We atomically detach the top of branch from the tree, store the block |
925 | * number of its root in *@top, pointers to buffer_heads of partially | 925 | * number of its root in *@top, pointers to buffer_heads of partially |
926 | * truncated blocks - in @chain[].bh and pointers to their last elements | 926 | * truncated blocks - in @chain[].bh and pointers to their last elements |
927 | * that should not be removed - in @chain[].p. Return value is the pointer | 927 | * that should not be removed - in @chain[].p. Return value is the pointer |
928 | * to last filled element of @chain. | 928 | * to last filled element of @chain. |
929 | * | 929 | * |
930 | * The work left to caller to do the actual freeing of subtrees: | 930 | * The work left to caller to do the actual freeing of subtrees: |
931 | * a) free the subtree starting from *@top | 931 | * a) free the subtree starting from *@top |
932 | * b) free the subtrees whose roots are stored in | 932 | * b) free the subtrees whose roots are stored in |
933 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) | 933 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) |
934 | * c) free the subtrees growing from the inode past the @chain[0].p | 934 | * c) free the subtrees growing from the inode past the @chain[0].p |
935 | * (no partially truncated stuff there). | 935 | * (no partially truncated stuff there). |
936 | */ | 936 | */ |
937 | 937 | ||
938 | static Indirect *ext2_find_shared(struct inode *inode, | 938 | static Indirect *ext2_find_shared(struct inode *inode, |
939 | int depth, | 939 | int depth, |
940 | int offsets[4], | 940 | int offsets[4], |
941 | Indirect chain[4], | 941 | Indirect chain[4], |
942 | __le32 *top) | 942 | __le32 *top) |
943 | { | 943 | { |
944 | Indirect *partial, *p; | 944 | Indirect *partial, *p; |
945 | int k, err; | 945 | int k, err; |
946 | 946 | ||
947 | *top = 0; | 947 | *top = 0; |
948 | for (k = depth; k > 1 && !offsets[k-1]; k--) | 948 | for (k = depth; k > 1 && !offsets[k-1]; k--) |
949 | ; | 949 | ; |
950 | partial = ext2_get_branch(inode, k, offsets, chain, &err); | 950 | partial = ext2_get_branch(inode, k, offsets, chain, &err); |
951 | if (!partial) | 951 | if (!partial) |
952 | partial = chain + k-1; | 952 | partial = chain + k-1; |
953 | /* | 953 | /* |
954 | * If the branch acquired continuation since we've looked at it - | 954 | * If the branch acquired continuation since we've looked at it - |
955 | * fine, it should all survive and (new) top doesn't belong to us. | 955 | * fine, it should all survive and (new) top doesn't belong to us. |
956 | */ | 956 | */ |
957 | write_lock(&EXT2_I(inode)->i_meta_lock); | 957 | write_lock(&EXT2_I(inode)->i_meta_lock); |
958 | if (!partial->key && *partial->p) { | 958 | if (!partial->key && *partial->p) { |
959 | write_unlock(&EXT2_I(inode)->i_meta_lock); | 959 | write_unlock(&EXT2_I(inode)->i_meta_lock); |
960 | goto no_top; | 960 | goto no_top; |
961 | } | 961 | } |
962 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) | 962 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) |
963 | ; | 963 | ; |
964 | /* | 964 | /* |
965 | * OK, we've found the last block that must survive. The rest of our | 965 | * OK, we've found the last block that must survive. The rest of our |
966 | * branch should be detached before unlocking. However, if that rest | 966 | * branch should be detached before unlocking. However, if that rest |
967 | * of branch is all ours and does not grow immediately from the inode | 967 | * of branch is all ours and does not grow immediately from the inode |
968 | * it's easier to cheat and just decrement partial->p. | 968 | * it's easier to cheat and just decrement partial->p. |
969 | */ | 969 | */ |
970 | if (p == chain + k - 1 && p > chain) { | 970 | if (p == chain + k - 1 && p > chain) { |
971 | p->p--; | 971 | p->p--; |
972 | } else { | 972 | } else { |
973 | *top = *p->p; | 973 | *top = *p->p; |
974 | *p->p = 0; | 974 | *p->p = 0; |
975 | } | 975 | } |
976 | write_unlock(&EXT2_I(inode)->i_meta_lock); | 976 | write_unlock(&EXT2_I(inode)->i_meta_lock); |
977 | 977 | ||
978 | while(partial > p) | 978 | while(partial > p) |
979 | { | 979 | { |
980 | brelse(partial->bh); | 980 | brelse(partial->bh); |
981 | partial--; | 981 | partial--; |
982 | } | 982 | } |
983 | no_top: | 983 | no_top: |
984 | return partial; | 984 | return partial; |
985 | } | 985 | } |
986 | 986 | ||
987 | /** | 987 | /** |
988 | * ext2_free_data - free a list of data blocks | 988 | * ext2_free_data - free a list of data blocks |
989 | * @inode: inode we are dealing with | 989 | * @inode: inode we are dealing with |
990 | * @p: array of block numbers | 990 | * @p: array of block numbers |
991 | * @q: points immediately past the end of array | 991 | * @q: points immediately past the end of array |
992 | * | 992 | * |
993 | * We are freeing all blocks referred from that array (numbers are | 993 | * We are freeing all blocks referred from that array (numbers are |
994 | * stored as little-endian 32-bit) and updating @inode->i_blocks | 994 | * stored as little-endian 32-bit) and updating @inode->i_blocks |
995 | * appropriately. | 995 | * appropriately. |
996 | */ | 996 | */ |
997 | static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) | 997 | static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) |
998 | { | 998 | { |
999 | unsigned long block_to_free = 0, count = 0; | 999 | unsigned long block_to_free = 0, count = 0; |
1000 | unsigned long nr; | 1000 | unsigned long nr; |
1001 | 1001 | ||
1002 | for ( ; p < q ; p++) { | 1002 | for ( ; p < q ; p++) { |
1003 | nr = le32_to_cpu(*p); | 1003 | nr = le32_to_cpu(*p); |
1004 | if (nr) { | 1004 | if (nr) { |
1005 | *p = 0; | 1005 | *p = 0; |
1006 | /* accumulate blocks to free if they're contiguous */ | 1006 | /* accumulate blocks to free if they're contiguous */ |
1007 | if (count == 0) | 1007 | if (count == 0) |
1008 | goto free_this; | 1008 | goto free_this; |
1009 | else if (block_to_free == nr - count) | 1009 | else if (block_to_free == nr - count) |
1010 | count++; | 1010 | count++; |
1011 | else { | 1011 | else { |
1012 | ext2_free_blocks (inode, block_to_free, count); | 1012 | ext2_free_blocks (inode, block_to_free, count); |
1013 | mark_inode_dirty(inode); | 1013 | mark_inode_dirty(inode); |
1014 | free_this: | 1014 | free_this: |
1015 | block_to_free = nr; | 1015 | block_to_free = nr; |
1016 | count = 1; | 1016 | count = 1; |
1017 | } | 1017 | } |
1018 | } | 1018 | } |
1019 | } | 1019 | } |
1020 | if (count > 0) { | 1020 | if (count > 0) { |
1021 | ext2_free_blocks (inode, block_to_free, count); | 1021 | ext2_free_blocks (inode, block_to_free, count); |
1022 | mark_inode_dirty(inode); | 1022 | mark_inode_dirty(inode); |
1023 | } | 1023 | } |
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | /** | 1026 | /** |
1027 | * ext2_free_branches - free an array of branches | 1027 | * ext2_free_branches - free an array of branches |
1028 | * @inode: inode we are dealing with | 1028 | * @inode: inode we are dealing with |
1029 | * @p: array of block numbers | 1029 | * @p: array of block numbers |
1030 | * @q: pointer immediately past the end of array | 1030 | * @q: pointer immediately past the end of array |
1031 | * @depth: depth of the branches to free | 1031 | * @depth: depth of the branches to free |
1032 | * | 1032 | * |
1033 | * We are freeing all blocks referred from these branches (numbers are | 1033 | * We are freeing all blocks referred from these branches (numbers are |
1034 | * stored as little-endian 32-bit) and updating @inode->i_blocks | 1034 | * stored as little-endian 32-bit) and updating @inode->i_blocks |
1035 | * appropriately. | 1035 | * appropriately. |
1036 | */ | 1036 | */ |
1037 | static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth) | 1037 | static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int depth) |
1038 | { | 1038 | { |
1039 | struct buffer_head * bh; | 1039 | struct buffer_head * bh; |
1040 | unsigned long nr; | 1040 | unsigned long nr; |
1041 | 1041 | ||
1042 | if (depth--) { | 1042 | if (depth--) { |
1043 | int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); | 1043 | int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); |
1044 | for ( ; p < q ; p++) { | 1044 | for ( ; p < q ; p++) { |
1045 | nr = le32_to_cpu(*p); | 1045 | nr = le32_to_cpu(*p); |
1046 | if (!nr) | 1046 | if (!nr) |
1047 | continue; | 1047 | continue; |
1048 | *p = 0; | 1048 | *p = 0; |
1049 | bh = sb_bread(inode->i_sb, nr); | 1049 | bh = sb_bread(inode->i_sb, nr); |
1050 | /* | 1050 | /* |
1051 | * A read failure? Report error and clear slot | 1051 | * A read failure? Report error and clear slot |
1052 | * (should be rare). | 1052 | * (should be rare). |
1053 | */ | 1053 | */ |
1054 | if (!bh) { | 1054 | if (!bh) { |
1055 | ext2_error(inode->i_sb, "ext2_free_branches", | 1055 | ext2_error(inode->i_sb, "ext2_free_branches", |
1056 | "Read failure, inode=%ld, block=%ld", | 1056 | "Read failure, inode=%ld, block=%ld", |
1057 | inode->i_ino, nr); | 1057 | inode->i_ino, nr); |
1058 | continue; | 1058 | continue; |
1059 | } | 1059 | } |
1060 | ext2_free_branches(inode, | 1060 | ext2_free_branches(inode, |
1061 | (__le32*)bh->b_data, | 1061 | (__le32*)bh->b_data, |
1062 | (__le32*)bh->b_data + addr_per_block, | 1062 | (__le32*)bh->b_data + addr_per_block, |
1063 | depth); | 1063 | depth); |
1064 | bforget(bh); | 1064 | bforget(bh); |
1065 | ext2_free_blocks(inode, nr, 1); | 1065 | ext2_free_blocks(inode, nr, 1); |
1066 | mark_inode_dirty(inode); | 1066 | mark_inode_dirty(inode); |
1067 | } | 1067 | } |
1068 | } else | 1068 | } else |
1069 | ext2_free_data(inode, p, q); | 1069 | ext2_free_data(inode, p, q); |
1070 | } | 1070 | } |
1071 | 1071 | ||
1072 | static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) | 1072 | static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) |
1073 | { | 1073 | { |
1074 | __le32 *i_data = EXT2_I(inode)->i_data; | 1074 | __le32 *i_data = EXT2_I(inode)->i_data; |
1075 | struct ext2_inode_info *ei = EXT2_I(inode); | 1075 | struct ext2_inode_info *ei = EXT2_I(inode); |
1076 | int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); | 1076 | int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); |
1077 | int offsets[4]; | 1077 | int offsets[4]; |
1078 | Indirect chain[4]; | 1078 | Indirect chain[4]; |
1079 | Indirect *partial; | 1079 | Indirect *partial; |
1080 | __le32 nr = 0; | 1080 | __le32 nr = 0; |
1081 | int n; | 1081 | int n; |
1082 | long iblock; | 1082 | long iblock; |
1083 | unsigned blocksize; | 1083 | unsigned blocksize; |
1084 | blocksize = inode->i_sb->s_blocksize; | 1084 | blocksize = inode->i_sb->s_blocksize; |
1085 | iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); | 1085 | iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); |
1086 | 1086 | ||
1087 | n = ext2_block_to_path(inode, iblock, offsets, NULL); | 1087 | n = ext2_block_to_path(inode, iblock, offsets, NULL); |
1088 | if (n == 0) | 1088 | if (n == 0) |
1089 | return; | 1089 | return; |
1090 | 1090 | ||
1091 | /* | 1091 | /* |
1092 | * From here we block out all ext2_get_block() callers who want to | 1092 | * From here we block out all ext2_get_block() callers who want to |
1093 | * modify the block allocation tree. | 1093 | * modify the block allocation tree. |
1094 | */ | 1094 | */ |
1095 | mutex_lock(&ei->truncate_mutex); | 1095 | mutex_lock(&ei->truncate_mutex); |
1096 | 1096 | ||
1097 | if (n == 1) { | 1097 | if (n == 1) { |
1098 | ext2_free_data(inode, i_data+offsets[0], | 1098 | ext2_free_data(inode, i_data+offsets[0], |
1099 | i_data + EXT2_NDIR_BLOCKS); | 1099 | i_data + EXT2_NDIR_BLOCKS); |
1100 | goto do_indirects; | 1100 | goto do_indirects; |
1101 | } | 1101 | } |
1102 | 1102 | ||
1103 | partial = ext2_find_shared(inode, n, offsets, chain, &nr); | 1103 | partial = ext2_find_shared(inode, n, offsets, chain, &nr); |
1104 | /* Kill the top of shared branch (already detached) */ | 1104 | /* Kill the top of shared branch (already detached) */ |
1105 | if (nr) { | 1105 | if (nr) { |
1106 | if (partial == chain) | 1106 | if (partial == chain) |
1107 | mark_inode_dirty(inode); | 1107 | mark_inode_dirty(inode); |
1108 | else | 1108 | else |
1109 | mark_buffer_dirty_inode(partial->bh, inode); | 1109 | mark_buffer_dirty_inode(partial->bh, inode); |
1110 | ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial); | 1110 | ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial); |
1111 | } | 1111 | } |
1112 | /* Clear the ends of indirect blocks on the shared branch */ | 1112 | /* Clear the ends of indirect blocks on the shared branch */ |
1113 | while (partial > chain) { | 1113 | while (partial > chain) { |
1114 | ext2_free_branches(inode, | 1114 | ext2_free_branches(inode, |
1115 | partial->p + 1, | 1115 | partial->p + 1, |
1116 | (__le32*)partial->bh->b_data+addr_per_block, | 1116 | (__le32*)partial->bh->b_data+addr_per_block, |
1117 | (chain+n-1) - partial); | 1117 | (chain+n-1) - partial); |
1118 | mark_buffer_dirty_inode(partial->bh, inode); | 1118 | mark_buffer_dirty_inode(partial->bh, inode); |
1119 | brelse (partial->bh); | 1119 | brelse (partial->bh); |
1120 | partial--; | 1120 | partial--; |
1121 | } | 1121 | } |
1122 | do_indirects: | 1122 | do_indirects: |
1123 | /* Kill the remaining (whole) subtrees */ | 1123 | /* Kill the remaining (whole) subtrees */ |
1124 | switch (offsets[0]) { | 1124 | switch (offsets[0]) { |
1125 | default: | 1125 | default: |
1126 | nr = i_data[EXT2_IND_BLOCK]; | 1126 | nr = i_data[EXT2_IND_BLOCK]; |
1127 | if (nr) { | 1127 | if (nr) { |
1128 | i_data[EXT2_IND_BLOCK] = 0; | 1128 | i_data[EXT2_IND_BLOCK] = 0; |
1129 | mark_inode_dirty(inode); | 1129 | mark_inode_dirty(inode); |
1130 | ext2_free_branches(inode, &nr, &nr+1, 1); | 1130 | ext2_free_branches(inode, &nr, &nr+1, 1); |
1131 | } | 1131 | } |
1132 | case EXT2_IND_BLOCK: | 1132 | case EXT2_IND_BLOCK: |
1133 | nr = i_data[EXT2_DIND_BLOCK]; | 1133 | nr = i_data[EXT2_DIND_BLOCK]; |
1134 | if (nr) { | 1134 | if (nr) { |
1135 | i_data[EXT2_DIND_BLOCK] = 0; | 1135 | i_data[EXT2_DIND_BLOCK] = 0; |
1136 | mark_inode_dirty(inode); | 1136 | mark_inode_dirty(inode); |
1137 | ext2_free_branches(inode, &nr, &nr+1, 2); | 1137 | ext2_free_branches(inode, &nr, &nr+1, 2); |
1138 | } | 1138 | } |
1139 | case EXT2_DIND_BLOCK: | 1139 | case EXT2_DIND_BLOCK: |
1140 | nr = i_data[EXT2_TIND_BLOCK]; | 1140 | nr = i_data[EXT2_TIND_BLOCK]; |
1141 | if (nr) { | 1141 | if (nr) { |
1142 | i_data[EXT2_TIND_BLOCK] = 0; | 1142 | i_data[EXT2_TIND_BLOCK] = 0; |
1143 | mark_inode_dirty(inode); | 1143 | mark_inode_dirty(inode); |
1144 | ext2_free_branches(inode, &nr, &nr+1, 3); | 1144 | ext2_free_branches(inode, &nr, &nr+1, 3); |
1145 | } | 1145 | } |
1146 | case EXT2_TIND_BLOCK: | 1146 | case EXT2_TIND_BLOCK: |
1147 | ; | 1147 | ; |
1148 | } | 1148 | } |
1149 | 1149 | ||
1150 | ext2_discard_reservation(inode); | 1150 | ext2_discard_reservation(inode); |
1151 | 1151 | ||
1152 | mutex_unlock(&ei->truncate_mutex); | 1152 | mutex_unlock(&ei->truncate_mutex); |
1153 | } | 1153 | } |
1154 | 1154 | ||
1155 | static void ext2_truncate_blocks(struct inode *inode, loff_t offset) | 1155 | static void ext2_truncate_blocks(struct inode *inode, loff_t offset) |
1156 | { | 1156 | { |
1157 | /* | 1157 | /* |
1158 | * XXX: it seems like a bug here that we don't allow | 1158 | * XXX: it seems like a bug here that we don't allow |
1159 | * IS_APPEND inode to have blocks-past-i_size trimmed off. | 1159 | * IS_APPEND inode to have blocks-past-i_size trimmed off. |
1160 | * review and fix this. | 1160 | * review and fix this. |
1161 | * | 1161 | * |
1162 | * Also would be nice to be able to handle IO errors and such, | 1162 | * Also would be nice to be able to handle IO errors and such, |
1163 | * but that's probably too much to ask. | 1163 | * but that's probably too much to ask. |
1164 | */ | 1164 | */ |
1165 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1165 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
1166 | S_ISLNK(inode->i_mode))) | 1166 | S_ISLNK(inode->i_mode))) |
1167 | return; | 1167 | return; |
1168 | if (ext2_inode_is_fast_symlink(inode)) | 1168 | if (ext2_inode_is_fast_symlink(inode)) |
1169 | return; | 1169 | return; |
1170 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 1170 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
1171 | return; | 1171 | return; |
1172 | __ext2_truncate_blocks(inode, offset); | 1172 | __ext2_truncate_blocks(inode, offset); |
1173 | } | 1173 | } |
1174 | 1174 | ||
1175 | static int ext2_setsize(struct inode *inode, loff_t newsize) | 1175 | static int ext2_setsize(struct inode *inode, loff_t newsize) |
1176 | { | 1176 | { |
1177 | int error; | 1177 | int error; |
1178 | 1178 | ||
1179 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1179 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
1180 | S_ISLNK(inode->i_mode))) | 1180 | S_ISLNK(inode->i_mode))) |
1181 | return -EINVAL; | 1181 | return -EINVAL; |
1182 | if (ext2_inode_is_fast_symlink(inode)) | 1182 | if (ext2_inode_is_fast_symlink(inode)) |
1183 | return -EINVAL; | 1183 | return -EINVAL; |
1184 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 1184 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
1185 | return -EPERM; | 1185 | return -EPERM; |
1186 | 1186 | ||
1187 | inode_dio_wait(inode); | ||
1188 | |||
1187 | if (mapping_is_xip(inode->i_mapping)) | 1189 | if (mapping_is_xip(inode->i_mapping)) |
1188 | error = xip_truncate_page(inode->i_mapping, newsize); | 1190 | error = xip_truncate_page(inode->i_mapping, newsize); |
1189 | else if (test_opt(inode->i_sb, NOBH)) | 1191 | else if (test_opt(inode->i_sb, NOBH)) |
1190 | error = nobh_truncate_page(inode->i_mapping, | 1192 | error = nobh_truncate_page(inode->i_mapping, |
1191 | newsize, ext2_get_block); | 1193 | newsize, ext2_get_block); |
1192 | else | 1194 | else |
1193 | error = block_truncate_page(inode->i_mapping, | 1195 | error = block_truncate_page(inode->i_mapping, |
1194 | newsize, ext2_get_block); | 1196 | newsize, ext2_get_block); |
1195 | if (error) | 1197 | if (error) |
1196 | return error; | 1198 | return error; |
1197 | 1199 | ||
1198 | truncate_setsize(inode, newsize); | 1200 | truncate_setsize(inode, newsize); |
1199 | __ext2_truncate_blocks(inode, newsize); | 1201 | __ext2_truncate_blocks(inode, newsize); |
1200 | 1202 | ||
1201 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | 1203 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; |
1202 | if (inode_needs_sync(inode)) { | 1204 | if (inode_needs_sync(inode)) { |
1203 | sync_mapping_buffers(inode->i_mapping); | 1205 | sync_mapping_buffers(inode->i_mapping); |
1204 | sync_inode_metadata(inode, 1); | 1206 | sync_inode_metadata(inode, 1); |
1205 | } else { | 1207 | } else { |
1206 | mark_inode_dirty(inode); | 1208 | mark_inode_dirty(inode); |
1207 | } | 1209 | } |
1208 | 1210 | ||
1209 | return 0; | 1211 | return 0; |
1210 | } | 1212 | } |
1211 | 1213 | ||
1212 | static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino, | 1214 | static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino, |
1213 | struct buffer_head **p) | 1215 | struct buffer_head **p) |
1214 | { | 1216 | { |
1215 | struct buffer_head * bh; | 1217 | struct buffer_head * bh; |
1216 | unsigned long block_group; | 1218 | unsigned long block_group; |
1217 | unsigned long block; | 1219 | unsigned long block; |
1218 | unsigned long offset; | 1220 | unsigned long offset; |
1219 | struct ext2_group_desc * gdp; | 1221 | struct ext2_group_desc * gdp; |
1220 | 1222 | ||
1221 | *p = NULL; | 1223 | *p = NULL; |
1222 | if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) || | 1224 | if ((ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb)) || |
1223 | ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) | 1225 | ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) |
1224 | goto Einval; | 1226 | goto Einval; |
1225 | 1227 | ||
1226 | block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); | 1228 | block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); |
1227 | gdp = ext2_get_group_desc(sb, block_group, NULL); | 1229 | gdp = ext2_get_group_desc(sb, block_group, NULL); |
1228 | if (!gdp) | 1230 | if (!gdp) |
1229 | goto Egdp; | 1231 | goto Egdp; |
1230 | /* | 1232 | /* |
1231 | * Figure out the offset within the block group inode table | 1233 | * Figure out the offset within the block group inode table |
1232 | */ | 1234 | */ |
1233 | offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb); | 1235 | offset = ((ino - 1) % EXT2_INODES_PER_GROUP(sb)) * EXT2_INODE_SIZE(sb); |
1234 | block = le32_to_cpu(gdp->bg_inode_table) + | 1236 | block = le32_to_cpu(gdp->bg_inode_table) + |
1235 | (offset >> EXT2_BLOCK_SIZE_BITS(sb)); | 1237 | (offset >> EXT2_BLOCK_SIZE_BITS(sb)); |
1236 | if (!(bh = sb_bread(sb, block))) | 1238 | if (!(bh = sb_bread(sb, block))) |
1237 | goto Eio; | 1239 | goto Eio; |
1238 | 1240 | ||
1239 | *p = bh; | 1241 | *p = bh; |
1240 | offset &= (EXT2_BLOCK_SIZE(sb) - 1); | 1242 | offset &= (EXT2_BLOCK_SIZE(sb) - 1); |
1241 | return (struct ext2_inode *) (bh->b_data + offset); | 1243 | return (struct ext2_inode *) (bh->b_data + offset); |
1242 | 1244 | ||
1243 | Einval: | 1245 | Einval: |
1244 | ext2_error(sb, "ext2_get_inode", "bad inode number: %lu", | 1246 | ext2_error(sb, "ext2_get_inode", "bad inode number: %lu", |
1245 | (unsigned long) ino); | 1247 | (unsigned long) ino); |
1246 | return ERR_PTR(-EINVAL); | 1248 | return ERR_PTR(-EINVAL); |
1247 | Eio: | 1249 | Eio: |
1248 | ext2_error(sb, "ext2_get_inode", | 1250 | ext2_error(sb, "ext2_get_inode", |
1249 | "unable to read inode block - inode=%lu, block=%lu", | 1251 | "unable to read inode block - inode=%lu, block=%lu", |
1250 | (unsigned long) ino, block); | 1252 | (unsigned long) ino, block); |
1251 | Egdp: | 1253 | Egdp: |
1252 | return ERR_PTR(-EIO); | 1254 | return ERR_PTR(-EIO); |
1253 | } | 1255 | } |
1254 | 1256 | ||
1255 | void ext2_set_inode_flags(struct inode *inode) | 1257 | void ext2_set_inode_flags(struct inode *inode) |
1256 | { | 1258 | { |
1257 | unsigned int flags = EXT2_I(inode)->i_flags; | 1259 | unsigned int flags = EXT2_I(inode)->i_flags; |
1258 | 1260 | ||
1259 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | 1261 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); |
1260 | if (flags & EXT2_SYNC_FL) | 1262 | if (flags & EXT2_SYNC_FL) |
1261 | inode->i_flags |= S_SYNC; | 1263 | inode->i_flags |= S_SYNC; |
1262 | if (flags & EXT2_APPEND_FL) | 1264 | if (flags & EXT2_APPEND_FL) |
1263 | inode->i_flags |= S_APPEND; | 1265 | inode->i_flags |= S_APPEND; |
1264 | if (flags & EXT2_IMMUTABLE_FL) | 1266 | if (flags & EXT2_IMMUTABLE_FL) |
1265 | inode->i_flags |= S_IMMUTABLE; | 1267 | inode->i_flags |= S_IMMUTABLE; |
1266 | if (flags & EXT2_NOATIME_FL) | 1268 | if (flags & EXT2_NOATIME_FL) |
1267 | inode->i_flags |= S_NOATIME; | 1269 | inode->i_flags |= S_NOATIME; |
1268 | if (flags & EXT2_DIRSYNC_FL) | 1270 | if (flags & EXT2_DIRSYNC_FL) |
1269 | inode->i_flags |= S_DIRSYNC; | 1271 | inode->i_flags |= S_DIRSYNC; |
1270 | } | 1272 | } |
1271 | 1273 | ||
1272 | /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ | 1274 | /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ |
1273 | void ext2_get_inode_flags(struct ext2_inode_info *ei) | 1275 | void ext2_get_inode_flags(struct ext2_inode_info *ei) |
1274 | { | 1276 | { |
1275 | unsigned int flags = ei->vfs_inode.i_flags; | 1277 | unsigned int flags = ei->vfs_inode.i_flags; |
1276 | 1278 | ||
1277 | ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL| | 1279 | ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL| |
1278 | EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL); | 1280 | EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL); |
1279 | if (flags & S_SYNC) | 1281 | if (flags & S_SYNC) |
1280 | ei->i_flags |= EXT2_SYNC_FL; | 1282 | ei->i_flags |= EXT2_SYNC_FL; |
1281 | if (flags & S_APPEND) | 1283 | if (flags & S_APPEND) |
1282 | ei->i_flags |= EXT2_APPEND_FL; | 1284 | ei->i_flags |= EXT2_APPEND_FL; |
1283 | if (flags & S_IMMUTABLE) | 1285 | if (flags & S_IMMUTABLE) |
1284 | ei->i_flags |= EXT2_IMMUTABLE_FL; | 1286 | ei->i_flags |= EXT2_IMMUTABLE_FL; |
1285 | if (flags & S_NOATIME) | 1287 | if (flags & S_NOATIME) |
1286 | ei->i_flags |= EXT2_NOATIME_FL; | 1288 | ei->i_flags |= EXT2_NOATIME_FL; |
1287 | if (flags & S_DIRSYNC) | 1289 | if (flags & S_DIRSYNC) |
1288 | ei->i_flags |= EXT2_DIRSYNC_FL; | 1290 | ei->i_flags |= EXT2_DIRSYNC_FL; |
1289 | } | 1291 | } |
1290 | 1292 | ||
1291 | struct inode *ext2_iget (struct super_block *sb, unsigned long ino) | 1293 | struct inode *ext2_iget (struct super_block *sb, unsigned long ino) |
1292 | { | 1294 | { |
1293 | struct ext2_inode_info *ei; | 1295 | struct ext2_inode_info *ei; |
1294 | struct buffer_head * bh; | 1296 | struct buffer_head * bh; |
1295 | struct ext2_inode *raw_inode; | 1297 | struct ext2_inode *raw_inode; |
1296 | struct inode *inode; | 1298 | struct inode *inode; |
1297 | long ret = -EIO; | 1299 | long ret = -EIO; |
1298 | int n; | 1300 | int n; |
1299 | 1301 | ||
1300 | inode = iget_locked(sb, ino); | 1302 | inode = iget_locked(sb, ino); |
1301 | if (!inode) | 1303 | if (!inode) |
1302 | return ERR_PTR(-ENOMEM); | 1304 | return ERR_PTR(-ENOMEM); |
1303 | if (!(inode->i_state & I_NEW)) | 1305 | if (!(inode->i_state & I_NEW)) |
1304 | return inode; | 1306 | return inode; |
1305 | 1307 | ||
1306 | ei = EXT2_I(inode); | 1308 | ei = EXT2_I(inode); |
1307 | ei->i_block_alloc_info = NULL; | 1309 | ei->i_block_alloc_info = NULL; |
1308 | 1310 | ||
1309 | raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); | 1311 | raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); |
1310 | if (IS_ERR(raw_inode)) { | 1312 | if (IS_ERR(raw_inode)) { |
1311 | ret = PTR_ERR(raw_inode); | 1313 | ret = PTR_ERR(raw_inode); |
1312 | goto bad_inode; | 1314 | goto bad_inode; |
1313 | } | 1315 | } |
1314 | 1316 | ||
1315 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 1317 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
1316 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 1318 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
1317 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 1319 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
1318 | if (!(test_opt (inode->i_sb, NO_UID32))) { | 1320 | if (!(test_opt (inode->i_sb, NO_UID32))) { |
1319 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 1321 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
1320 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 1322 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
1321 | } | 1323 | } |
1322 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 1324 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
1323 | inode->i_size = le32_to_cpu(raw_inode->i_size); | 1325 | inode->i_size = le32_to_cpu(raw_inode->i_size); |
1324 | inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); | 1326 | inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); |
1325 | inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); | 1327 | inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); |
1326 | inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); | 1328 | inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); |
1327 | inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; | 1329 | inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; |
1328 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 1330 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
1329 | /* We now have enough fields to check if the inode was active or not. | 1331 | /* We now have enough fields to check if the inode was active or not. |
1330 | * This is needed because nfsd might try to access dead inodes | 1332 | * This is needed because nfsd might try to access dead inodes |
1331 | * the test is that same one that e2fsck uses | 1333 | * the test is that same one that e2fsck uses |
1332 | * NeilBrown 1999oct15 | 1334 | * NeilBrown 1999oct15 |
1333 | */ | 1335 | */ |
1334 | if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { | 1336 | if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { |
1335 | /* this inode is deleted */ | 1337 | /* this inode is deleted */ |
1336 | brelse (bh); | 1338 | brelse (bh); |
1337 | ret = -ESTALE; | 1339 | ret = -ESTALE; |
1338 | goto bad_inode; | 1340 | goto bad_inode; |
1339 | } | 1341 | } |
1340 | inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); | 1342 | inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); |
1341 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); | 1343 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); |
1342 | ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); | 1344 | ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); |
1343 | ei->i_frag_no = raw_inode->i_frag; | 1345 | ei->i_frag_no = raw_inode->i_frag; |
1344 | ei->i_frag_size = raw_inode->i_fsize; | 1346 | ei->i_frag_size = raw_inode->i_fsize; |
1345 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); | 1347 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); |
1346 | ei->i_dir_acl = 0; | 1348 | ei->i_dir_acl = 0; |
1347 | if (S_ISREG(inode->i_mode)) | 1349 | if (S_ISREG(inode->i_mode)) |
1348 | inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; | 1350 | inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; |
1349 | else | 1351 | else |
1350 | ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); | 1352 | ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); |
1351 | ei->i_dtime = 0; | 1353 | ei->i_dtime = 0; |
1352 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 1354 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
1353 | ei->i_state = 0; | 1355 | ei->i_state = 0; |
1354 | ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); | 1356 | ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); |
1355 | ei->i_dir_start_lookup = 0; | 1357 | ei->i_dir_start_lookup = 0; |
1356 | 1358 | ||
1357 | /* | 1359 | /* |
1358 | * NOTE! The in-memory inode i_data array is in little-endian order | 1360 | * NOTE! The in-memory inode i_data array is in little-endian order |
1359 | * even on big-endian machines: we do NOT byteswap the block numbers! | 1361 | * even on big-endian machines: we do NOT byteswap the block numbers! |
1360 | */ | 1362 | */ |
1361 | for (n = 0; n < EXT2_N_BLOCKS; n++) | 1363 | for (n = 0; n < EXT2_N_BLOCKS; n++) |
1362 | ei->i_data[n] = raw_inode->i_block[n]; | 1364 | ei->i_data[n] = raw_inode->i_block[n]; |
1363 | 1365 | ||
1364 | if (S_ISREG(inode->i_mode)) { | 1366 | if (S_ISREG(inode->i_mode)) { |
1365 | inode->i_op = &ext2_file_inode_operations; | 1367 | inode->i_op = &ext2_file_inode_operations; |
1366 | if (ext2_use_xip(inode->i_sb)) { | 1368 | if (ext2_use_xip(inode->i_sb)) { |
1367 | inode->i_mapping->a_ops = &ext2_aops_xip; | 1369 | inode->i_mapping->a_ops = &ext2_aops_xip; |
1368 | inode->i_fop = &ext2_xip_file_operations; | 1370 | inode->i_fop = &ext2_xip_file_operations; |
1369 | } else if (test_opt(inode->i_sb, NOBH)) { | 1371 | } else if (test_opt(inode->i_sb, NOBH)) { |
1370 | inode->i_mapping->a_ops = &ext2_nobh_aops; | 1372 | inode->i_mapping->a_ops = &ext2_nobh_aops; |
1371 | inode->i_fop = &ext2_file_operations; | 1373 | inode->i_fop = &ext2_file_operations; |
1372 | } else { | 1374 | } else { |
1373 | inode->i_mapping->a_ops = &ext2_aops; | 1375 | inode->i_mapping->a_ops = &ext2_aops; |
1374 | inode->i_fop = &ext2_file_operations; | 1376 | inode->i_fop = &ext2_file_operations; |
1375 | } | 1377 | } |
1376 | } else if (S_ISDIR(inode->i_mode)) { | 1378 | } else if (S_ISDIR(inode->i_mode)) { |
1377 | inode->i_op = &ext2_dir_inode_operations; | 1379 | inode->i_op = &ext2_dir_inode_operations; |
1378 | inode->i_fop = &ext2_dir_operations; | 1380 | inode->i_fop = &ext2_dir_operations; |
1379 | if (test_opt(inode->i_sb, NOBH)) | 1381 | if (test_opt(inode->i_sb, NOBH)) |
1380 | inode->i_mapping->a_ops = &ext2_nobh_aops; | 1382 | inode->i_mapping->a_ops = &ext2_nobh_aops; |
1381 | else | 1383 | else |
1382 | inode->i_mapping->a_ops = &ext2_aops; | 1384 | inode->i_mapping->a_ops = &ext2_aops; |
1383 | } else if (S_ISLNK(inode->i_mode)) { | 1385 | } else if (S_ISLNK(inode->i_mode)) { |
1384 | if (ext2_inode_is_fast_symlink(inode)) { | 1386 | if (ext2_inode_is_fast_symlink(inode)) { |
1385 | inode->i_op = &ext2_fast_symlink_inode_operations; | 1387 | inode->i_op = &ext2_fast_symlink_inode_operations; |
1386 | nd_terminate_link(ei->i_data, inode->i_size, | 1388 | nd_terminate_link(ei->i_data, inode->i_size, |
1387 | sizeof(ei->i_data) - 1); | 1389 | sizeof(ei->i_data) - 1); |
1388 | } else { | 1390 | } else { |
1389 | inode->i_op = &ext2_symlink_inode_operations; | 1391 | inode->i_op = &ext2_symlink_inode_operations; |
1390 | if (test_opt(inode->i_sb, NOBH)) | 1392 | if (test_opt(inode->i_sb, NOBH)) |
1391 | inode->i_mapping->a_ops = &ext2_nobh_aops; | 1393 | inode->i_mapping->a_ops = &ext2_nobh_aops; |
1392 | else | 1394 | else |
1393 | inode->i_mapping->a_ops = &ext2_aops; | 1395 | inode->i_mapping->a_ops = &ext2_aops; |
1394 | } | 1396 | } |
1395 | } else { | 1397 | } else { |
1396 | inode->i_op = &ext2_special_inode_operations; | 1398 | inode->i_op = &ext2_special_inode_operations; |
1397 | if (raw_inode->i_block[0]) | 1399 | if (raw_inode->i_block[0]) |
1398 | init_special_inode(inode, inode->i_mode, | 1400 | init_special_inode(inode, inode->i_mode, |
1399 | old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); | 1401 | old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); |
1400 | else | 1402 | else |
1401 | init_special_inode(inode, inode->i_mode, | 1403 | init_special_inode(inode, inode->i_mode, |
1402 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 1404 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
1403 | } | 1405 | } |
1404 | brelse (bh); | 1406 | brelse (bh); |
1405 | ext2_set_inode_flags(inode); | 1407 | ext2_set_inode_flags(inode); |
1406 | unlock_new_inode(inode); | 1408 | unlock_new_inode(inode); |
1407 | return inode; | 1409 | return inode; |
1408 | 1410 | ||
1409 | bad_inode: | 1411 | bad_inode: |
1410 | iget_failed(inode); | 1412 | iget_failed(inode); |
1411 | return ERR_PTR(ret); | 1413 | return ERR_PTR(ret); |
1412 | } | 1414 | } |
1413 | 1415 | ||
1414 | static int __ext2_write_inode(struct inode *inode, int do_sync) | 1416 | static int __ext2_write_inode(struct inode *inode, int do_sync) |
1415 | { | 1417 | { |
1416 | struct ext2_inode_info *ei = EXT2_I(inode); | 1418 | struct ext2_inode_info *ei = EXT2_I(inode); |
1417 | struct super_block *sb = inode->i_sb; | 1419 | struct super_block *sb = inode->i_sb; |
1418 | ino_t ino = inode->i_ino; | 1420 | ino_t ino = inode->i_ino; |
1419 | uid_t uid = inode->i_uid; | 1421 | uid_t uid = inode->i_uid; |
1420 | gid_t gid = inode->i_gid; | 1422 | gid_t gid = inode->i_gid; |
1421 | struct buffer_head * bh; | 1423 | struct buffer_head * bh; |
1422 | struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); | 1424 | struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); |
1423 | int n; | 1425 | int n; |
1424 | int err = 0; | 1426 | int err = 0; |
1425 | 1427 | ||
1426 | if (IS_ERR(raw_inode)) | 1428 | if (IS_ERR(raw_inode)) |
1427 | return -EIO; | 1429 | return -EIO; |
1428 | 1430 | ||
1429 | /* For fields not not tracking in the in-memory inode, | 1431 | /* For fields not not tracking in the in-memory inode, |
1430 | * initialise them to zero for new inodes. */ | 1432 | * initialise them to zero for new inodes. */ |
1431 | if (ei->i_state & EXT2_STATE_NEW) | 1433 | if (ei->i_state & EXT2_STATE_NEW) |
1432 | memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); | 1434 | memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); |
1433 | 1435 | ||
1434 | ext2_get_inode_flags(ei); | 1436 | ext2_get_inode_flags(ei); |
1435 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 1437 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
1436 | if (!(test_opt(sb, NO_UID32))) { | 1438 | if (!(test_opt(sb, NO_UID32))) { |
1437 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); | 1439 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); |
1438 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); | 1440 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); |
1439 | /* | 1441 | /* |
1440 | * Fix up interoperability with old kernels. Otherwise, old inodes get | 1442 | * Fix up interoperability with old kernels. Otherwise, old inodes get |
1441 | * re-used with the upper 16 bits of the uid/gid intact | 1443 | * re-used with the upper 16 bits of the uid/gid intact |
1442 | */ | 1444 | */ |
1443 | if (!ei->i_dtime) { | 1445 | if (!ei->i_dtime) { |
1444 | raw_inode->i_uid_high = cpu_to_le16(high_16_bits(uid)); | 1446 | raw_inode->i_uid_high = cpu_to_le16(high_16_bits(uid)); |
1445 | raw_inode->i_gid_high = cpu_to_le16(high_16_bits(gid)); | 1447 | raw_inode->i_gid_high = cpu_to_le16(high_16_bits(gid)); |
1446 | } else { | 1448 | } else { |
1447 | raw_inode->i_uid_high = 0; | 1449 | raw_inode->i_uid_high = 0; |
1448 | raw_inode->i_gid_high = 0; | 1450 | raw_inode->i_gid_high = 0; |
1449 | } | 1451 | } |
1450 | } else { | 1452 | } else { |
1451 | raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(uid)); | 1453 | raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(uid)); |
1452 | raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(gid)); | 1454 | raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(gid)); |
1453 | raw_inode->i_uid_high = 0; | 1455 | raw_inode->i_uid_high = 0; |
1454 | raw_inode->i_gid_high = 0; | 1456 | raw_inode->i_gid_high = 0; |
1455 | } | 1457 | } |
1456 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | 1458 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); |
1457 | raw_inode->i_size = cpu_to_le32(inode->i_size); | 1459 | raw_inode->i_size = cpu_to_le32(inode->i_size); |
1458 | raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); | 1460 | raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); |
1459 | raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | 1461 | raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); |
1460 | raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); | 1462 | raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); |
1461 | 1463 | ||
1462 | raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); | 1464 | raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); |
1463 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 1465 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
1464 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); | 1466 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); |
1465 | raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); | 1467 | raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); |
1466 | raw_inode->i_frag = ei->i_frag_no; | 1468 | raw_inode->i_frag = ei->i_frag_no; |
1467 | raw_inode->i_fsize = ei->i_frag_size; | 1469 | raw_inode->i_fsize = ei->i_frag_size; |
1468 | raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); | 1470 | raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); |
1469 | if (!S_ISREG(inode->i_mode)) | 1471 | if (!S_ISREG(inode->i_mode)) |
1470 | raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); | 1472 | raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); |
1471 | else { | 1473 | else { |
1472 | raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32); | 1474 | raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32); |
1473 | if (inode->i_size > 0x7fffffffULL) { | 1475 | if (inode->i_size > 0x7fffffffULL) { |
1474 | if (!EXT2_HAS_RO_COMPAT_FEATURE(sb, | 1476 | if (!EXT2_HAS_RO_COMPAT_FEATURE(sb, |
1475 | EXT2_FEATURE_RO_COMPAT_LARGE_FILE) || | 1477 | EXT2_FEATURE_RO_COMPAT_LARGE_FILE) || |
1476 | EXT2_SB(sb)->s_es->s_rev_level == | 1478 | EXT2_SB(sb)->s_es->s_rev_level == |
1477 | cpu_to_le32(EXT2_GOOD_OLD_REV)) { | 1479 | cpu_to_le32(EXT2_GOOD_OLD_REV)) { |
1478 | /* If this is the first large file | 1480 | /* If this is the first large file |
1479 | * created, add a flag to the superblock. | 1481 | * created, add a flag to the superblock. |
1480 | */ | 1482 | */ |
1481 | spin_lock(&EXT2_SB(sb)->s_lock); | 1483 | spin_lock(&EXT2_SB(sb)->s_lock); |
1482 | ext2_update_dynamic_rev(sb); | 1484 | ext2_update_dynamic_rev(sb); |
1483 | EXT2_SET_RO_COMPAT_FEATURE(sb, | 1485 | EXT2_SET_RO_COMPAT_FEATURE(sb, |
1484 | EXT2_FEATURE_RO_COMPAT_LARGE_FILE); | 1486 | EXT2_FEATURE_RO_COMPAT_LARGE_FILE); |
1485 | spin_unlock(&EXT2_SB(sb)->s_lock); | 1487 | spin_unlock(&EXT2_SB(sb)->s_lock); |
1486 | ext2_write_super(sb); | 1488 | ext2_write_super(sb); |
1487 | } | 1489 | } |
1488 | } | 1490 | } |
1489 | } | 1491 | } |
1490 | 1492 | ||
1491 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | 1493 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
1492 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 1494 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { |
1493 | if (old_valid_dev(inode->i_rdev)) { | 1495 | if (old_valid_dev(inode->i_rdev)) { |
1494 | raw_inode->i_block[0] = | 1496 | raw_inode->i_block[0] = |
1495 | cpu_to_le32(old_encode_dev(inode->i_rdev)); | 1497 | cpu_to_le32(old_encode_dev(inode->i_rdev)); |
1496 | raw_inode->i_block[1] = 0; | 1498 | raw_inode->i_block[1] = 0; |
1497 | } else { | 1499 | } else { |
1498 | raw_inode->i_block[0] = 0; | 1500 | raw_inode->i_block[0] = 0; |
1499 | raw_inode->i_block[1] = | 1501 | raw_inode->i_block[1] = |
1500 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | 1502 | cpu_to_le32(new_encode_dev(inode->i_rdev)); |
1501 | raw_inode->i_block[2] = 0; | 1503 | raw_inode->i_block[2] = 0; |
1502 | } | 1504 | } |
1503 | } else for (n = 0; n < EXT2_N_BLOCKS; n++) | 1505 | } else for (n = 0; n < EXT2_N_BLOCKS; n++) |
1504 | raw_inode->i_block[n] = ei->i_data[n]; | 1506 | raw_inode->i_block[n] = ei->i_data[n]; |
1505 | mark_buffer_dirty(bh); | 1507 | mark_buffer_dirty(bh); |
1506 | if (do_sync) { | 1508 | if (do_sync) { |
1507 | sync_dirty_buffer(bh); | 1509 | sync_dirty_buffer(bh); |
1508 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | 1510 | if (buffer_req(bh) && !buffer_uptodate(bh)) { |
1509 | printk ("IO error syncing ext2 inode [%s:%08lx]\n", | 1511 | printk ("IO error syncing ext2 inode [%s:%08lx]\n", |
1510 | sb->s_id, (unsigned long) ino); | 1512 | sb->s_id, (unsigned long) ino); |
1511 | err = -EIO; | 1513 | err = -EIO; |
1512 | } | 1514 | } |
1513 | } | 1515 | } |
1514 | ei->i_state &= ~EXT2_STATE_NEW; | 1516 | ei->i_state &= ~EXT2_STATE_NEW; |
1515 | brelse (bh); | 1517 | brelse (bh); |
1516 | return err; | 1518 | return err; |
1517 | } | 1519 | } |
1518 | 1520 | ||
1519 | int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) | 1521 | int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) |
1520 | { | 1522 | { |
1521 | return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); | 1523 | return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); |
1522 | } | 1524 | } |
1523 | 1525 | ||
1524 | int ext2_setattr(struct dentry *dentry, struct iattr *iattr) | 1526 | int ext2_setattr(struct dentry *dentry, struct iattr *iattr) |
1525 | { | 1527 | { |
1526 | struct inode *inode = dentry->d_inode; | 1528 | struct inode *inode = dentry->d_inode; |
1527 | int error; | 1529 | int error; |
1528 | 1530 | ||
1529 | error = inode_change_ok(inode, iattr); | 1531 | error = inode_change_ok(inode, iattr); |
1530 | if (error) | 1532 | if (error) |
1531 | return error; | 1533 | return error; |
1532 | 1534 | ||
1533 | if (is_quota_modification(inode, iattr)) | 1535 | if (is_quota_modification(inode, iattr)) |
1534 | dquot_initialize(inode); | 1536 | dquot_initialize(inode); |
1535 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || | 1537 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || |
1536 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { | 1538 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { |
1537 | error = dquot_transfer(inode, iattr); | 1539 | error = dquot_transfer(inode, iattr); |
1538 | if (error) | 1540 | if (error) |
1539 | return error; | 1541 | return error; |
1540 | } | 1542 | } |
1541 | if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) { | 1543 | if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) { |
1542 | error = ext2_setsize(inode, iattr->ia_size); | 1544 | error = ext2_setsize(inode, iattr->ia_size); |
1543 | if (error) | 1545 | if (error) |
1544 | return error; | 1546 | return error; |
1545 | } | 1547 | } |
1546 | setattr_copy(inode, iattr); | 1548 | setattr_copy(inode, iattr); |
1547 | if (iattr->ia_valid & ATTR_MODE) | 1549 | if (iattr->ia_valid & ATTR_MODE) |
1548 | error = ext2_acl_chmod(inode); | 1550 | error = ext2_acl_chmod(inode); |
1549 | mark_inode_dirty(inode); | 1551 | mark_inode_dirty(inode); |
1550 | 1552 | ||
1551 | return error; | 1553 | return error; |
1552 | } | 1554 | } |
1553 | 1555 |
fs/ext3/inode.c
1 | /* | 1 | /* |
2 | * linux/fs/ext3/inode.c | 2 | * linux/fs/ext3/inode.c |
3 | * | 3 | * |
4 | * Copyright (C) 1992, 1993, 1994, 1995 | 4 | * Copyright (C) 1992, 1993, 1994, 1995 |
5 | * Remy Card (card@masi.ibp.fr) | 5 | * Remy Card (card@masi.ibp.fr) |
6 | * Laboratoire MASI - Institut Blaise Pascal | 6 | * Laboratoire MASI - Institut Blaise Pascal |
7 | * Universite Pierre et Marie Curie (Paris VI) | 7 | * Universite Pierre et Marie Curie (Paris VI) |
8 | * | 8 | * |
9 | * from | 9 | * from |
10 | * | 10 | * |
11 | * linux/fs/minix/inode.c | 11 | * linux/fs/minix/inode.c |
12 | * | 12 | * |
13 | * Copyright (C) 1991, 1992 Linus Torvalds | 13 | * Copyright (C) 1991, 1992 Linus Torvalds |
14 | * | 14 | * |
15 | * Goal-directed block allocation by Stephen Tweedie | 15 | * Goal-directed block allocation by Stephen Tweedie |
16 | * (sct@redhat.com), 1993, 1998 | 16 | * (sct@redhat.com), 1993, 1998 |
17 | * Big-endian to little-endian byte-swapping/bitmaps by | 17 | * Big-endian to little-endian byte-swapping/bitmaps by |
18 | * David S. Miller (davem@caip.rutgers.edu), 1995 | 18 | * David S. Miller (davem@caip.rutgers.edu), 1995 |
19 | * 64-bit file support on 64-bit platforms by Jakub Jelinek | 19 | * 64-bit file support on 64-bit platforms by Jakub Jelinek |
20 | * (jj@sunsite.ms.mff.cuni.cz) | 20 | * (jj@sunsite.ms.mff.cuni.cz) |
21 | * | 21 | * |
22 | * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 | 22 | * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/time.h> | 27 | #include <linux/time.h> |
28 | #include <linux/ext3_jbd.h> | 28 | #include <linux/ext3_jbd.h> |
29 | #include <linux/jbd.h> | 29 | #include <linux/jbd.h> |
30 | #include <linux/highuid.h> | 30 | #include <linux/highuid.h> |
31 | #include <linux/pagemap.h> | 31 | #include <linux/pagemap.h> |
32 | #include <linux/quotaops.h> | 32 | #include <linux/quotaops.h> |
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/buffer_head.h> | 34 | #include <linux/buffer_head.h> |
35 | #include <linux/writeback.h> | 35 | #include <linux/writeback.h> |
36 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <linux/fiemap.h> | 39 | #include <linux/fiemap.h> |
40 | #include <linux/namei.h> | 40 | #include <linux/namei.h> |
41 | #include "xattr.h" | 41 | #include "xattr.h" |
42 | #include "acl.h" | 42 | #include "acl.h" |
43 | 43 | ||
44 | static int ext3_writepage_trans_blocks(struct inode *inode); | 44 | static int ext3_writepage_trans_blocks(struct inode *inode); |
45 | 45 | ||
46 | /* | 46 | /* |
47 | * Test whether an inode is a fast symlink. | 47 | * Test whether an inode is a fast symlink. |
48 | */ | 48 | */ |
49 | static int ext3_inode_is_fast_symlink(struct inode *inode) | 49 | static int ext3_inode_is_fast_symlink(struct inode *inode) |
50 | { | 50 | { |
51 | int ea_blocks = EXT3_I(inode)->i_file_acl ? | 51 | int ea_blocks = EXT3_I(inode)->i_file_acl ? |
52 | (inode->i_sb->s_blocksize >> 9) : 0; | 52 | (inode->i_sb->s_blocksize >> 9) : 0; |
53 | 53 | ||
54 | return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); | 54 | return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); |
55 | } | 55 | } |
56 | 56 | ||
57 | /* | 57 | /* |
58 | * The ext3 forget function must perform a revoke if we are freeing data | 58 | * The ext3 forget function must perform a revoke if we are freeing data |
59 | * which has been journaled. Metadata (eg. indirect blocks) must be | 59 | * which has been journaled. Metadata (eg. indirect blocks) must be |
60 | * revoked in all cases. | 60 | * revoked in all cases. |
61 | * | 61 | * |
62 | * "bh" may be NULL: a metadata block may have been freed from memory | 62 | * "bh" may be NULL: a metadata block may have been freed from memory |
63 | * but there may still be a record of it in the journal, and that record | 63 | * but there may still be a record of it in the journal, and that record |
64 | * still needs to be revoked. | 64 | * still needs to be revoked. |
65 | */ | 65 | */ |
66 | int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, | 66 | int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, |
67 | struct buffer_head *bh, ext3_fsblk_t blocknr) | 67 | struct buffer_head *bh, ext3_fsblk_t blocknr) |
68 | { | 68 | { |
69 | int err; | 69 | int err; |
70 | 70 | ||
71 | might_sleep(); | 71 | might_sleep(); |
72 | 72 | ||
73 | BUFFER_TRACE(bh, "enter"); | 73 | BUFFER_TRACE(bh, "enter"); |
74 | 74 | ||
75 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " | 75 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " |
76 | "data mode %lx\n", | 76 | "data mode %lx\n", |
77 | bh, is_metadata, inode->i_mode, | 77 | bh, is_metadata, inode->i_mode, |
78 | test_opt(inode->i_sb, DATA_FLAGS)); | 78 | test_opt(inode->i_sb, DATA_FLAGS)); |
79 | 79 | ||
80 | /* Never use the revoke function if we are doing full data | 80 | /* Never use the revoke function if we are doing full data |
81 | * journaling: there is no need to, and a V1 superblock won't | 81 | * journaling: there is no need to, and a V1 superblock won't |
82 | * support it. Otherwise, only skip the revoke on un-journaled | 82 | * support it. Otherwise, only skip the revoke on un-journaled |
83 | * data blocks. */ | 83 | * data blocks. */ |
84 | 84 | ||
85 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA || | 85 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA || |
86 | (!is_metadata && !ext3_should_journal_data(inode))) { | 86 | (!is_metadata && !ext3_should_journal_data(inode))) { |
87 | if (bh) { | 87 | if (bh) { |
88 | BUFFER_TRACE(bh, "call journal_forget"); | 88 | BUFFER_TRACE(bh, "call journal_forget"); |
89 | return ext3_journal_forget(handle, bh); | 89 | return ext3_journal_forget(handle, bh); |
90 | } | 90 | } |
91 | return 0; | 91 | return 0; |
92 | } | 92 | } |
93 | 93 | ||
94 | /* | 94 | /* |
95 | * data!=journal && (is_metadata || should_journal_data(inode)) | 95 | * data!=journal && (is_metadata || should_journal_data(inode)) |
96 | */ | 96 | */ |
97 | BUFFER_TRACE(bh, "call ext3_journal_revoke"); | 97 | BUFFER_TRACE(bh, "call ext3_journal_revoke"); |
98 | err = ext3_journal_revoke(handle, blocknr, bh); | 98 | err = ext3_journal_revoke(handle, blocknr, bh); |
99 | if (err) | 99 | if (err) |
100 | ext3_abort(inode->i_sb, __func__, | 100 | ext3_abort(inode->i_sb, __func__, |
101 | "error %d when attempting revoke", err); | 101 | "error %d when attempting revoke", err); |
102 | BUFFER_TRACE(bh, "exit"); | 102 | BUFFER_TRACE(bh, "exit"); |
103 | return err; | 103 | return err; |
104 | } | 104 | } |
105 | 105 | ||
106 | /* | 106 | /* |
107 | * Work out how many blocks we need to proceed with the next chunk of a | 107 | * Work out how many blocks we need to proceed with the next chunk of a |
108 | * truncate transaction. | 108 | * truncate transaction. |
109 | */ | 109 | */ |
110 | static unsigned long blocks_for_truncate(struct inode *inode) | 110 | static unsigned long blocks_for_truncate(struct inode *inode) |
111 | { | 111 | { |
112 | unsigned long needed; | 112 | unsigned long needed; |
113 | 113 | ||
114 | needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); | 114 | needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); |
115 | 115 | ||
116 | /* Give ourselves just enough room to cope with inodes in which | 116 | /* Give ourselves just enough room to cope with inodes in which |
117 | * i_blocks is corrupt: we've seen disk corruptions in the past | 117 | * i_blocks is corrupt: we've seen disk corruptions in the past |
118 | * which resulted in random data in an inode which looked enough | 118 | * which resulted in random data in an inode which looked enough |
119 | * like a regular file for ext3 to try to delete it. Things | 119 | * like a regular file for ext3 to try to delete it. Things |
120 | * will go a bit crazy if that happens, but at least we should | 120 | * will go a bit crazy if that happens, but at least we should |
121 | * try not to panic the whole kernel. */ | 121 | * try not to panic the whole kernel. */ |
122 | if (needed < 2) | 122 | if (needed < 2) |
123 | needed = 2; | 123 | needed = 2; |
124 | 124 | ||
125 | /* But we need to bound the transaction so we don't overflow the | 125 | /* But we need to bound the transaction so we don't overflow the |
126 | * journal. */ | 126 | * journal. */ |
127 | if (needed > EXT3_MAX_TRANS_DATA) | 127 | if (needed > EXT3_MAX_TRANS_DATA) |
128 | needed = EXT3_MAX_TRANS_DATA; | 128 | needed = EXT3_MAX_TRANS_DATA; |
129 | 129 | ||
130 | return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed; | 130 | return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed; |
131 | } | 131 | } |
132 | 132 | ||
133 | /* | 133 | /* |
134 | * Truncate transactions can be complex and absolutely huge. So we need to | 134 | * Truncate transactions can be complex and absolutely huge. So we need to |
135 | * be able to restart the transaction at a conventient checkpoint to make | 135 | * be able to restart the transaction at a conventient checkpoint to make |
136 | * sure we don't overflow the journal. | 136 | * sure we don't overflow the journal. |
137 | * | 137 | * |
138 | * start_transaction gets us a new handle for a truncate transaction, | 138 | * start_transaction gets us a new handle for a truncate transaction, |
139 | * and extend_transaction tries to extend the existing one a bit. If | 139 | * and extend_transaction tries to extend the existing one a bit. If |
140 | * extend fails, we need to propagate the failure up and restart the | 140 | * extend fails, we need to propagate the failure up and restart the |
141 | * transaction in the top-level truncate loop. --sct | 141 | * transaction in the top-level truncate loop. --sct |
142 | */ | 142 | */ |
143 | static handle_t *start_transaction(struct inode *inode) | 143 | static handle_t *start_transaction(struct inode *inode) |
144 | { | 144 | { |
145 | handle_t *result; | 145 | handle_t *result; |
146 | 146 | ||
147 | result = ext3_journal_start(inode, blocks_for_truncate(inode)); | 147 | result = ext3_journal_start(inode, blocks_for_truncate(inode)); |
148 | if (!IS_ERR(result)) | 148 | if (!IS_ERR(result)) |
149 | return result; | 149 | return result; |
150 | 150 | ||
151 | ext3_std_error(inode->i_sb, PTR_ERR(result)); | 151 | ext3_std_error(inode->i_sb, PTR_ERR(result)); |
152 | return result; | 152 | return result; |
153 | } | 153 | } |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * Try to extend this transaction for the purposes of truncation. | 156 | * Try to extend this transaction for the purposes of truncation. |
157 | * | 157 | * |
158 | * Returns 0 if we managed to create more room. If we can't create more | 158 | * Returns 0 if we managed to create more room. If we can't create more |
159 | * room, and the transaction must be restarted we return 1. | 159 | * room, and the transaction must be restarted we return 1. |
160 | */ | 160 | */ |
161 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | 161 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) |
162 | { | 162 | { |
163 | if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) | 163 | if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) |
164 | return 0; | 164 | return 0; |
165 | if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) | 165 | if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) |
166 | return 0; | 166 | return 0; |
167 | return 1; | 167 | return 1; |
168 | } | 168 | } |
169 | 169 | ||
170 | /* | 170 | /* |
171 | * Restart the transaction associated with *handle. This does a commit, | 171 | * Restart the transaction associated with *handle. This does a commit, |
172 | * so before we call here everything must be consistently dirtied against | 172 | * so before we call here everything must be consistently dirtied against |
173 | * this transaction. | 173 | * this transaction. |
174 | */ | 174 | */ |
175 | static int truncate_restart_transaction(handle_t *handle, struct inode *inode) | 175 | static int truncate_restart_transaction(handle_t *handle, struct inode *inode) |
176 | { | 176 | { |
177 | int ret; | 177 | int ret; |
178 | 178 | ||
179 | jbd_debug(2, "restarting handle %p\n", handle); | 179 | jbd_debug(2, "restarting handle %p\n", handle); |
180 | /* | 180 | /* |
181 | * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle | 181 | * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle |
182 | * At this moment, get_block can be called only for blocks inside | 182 | * At this moment, get_block can be called only for blocks inside |
183 | * i_size since page cache has been already dropped and writes are | 183 | * i_size since page cache has been already dropped and writes are |
184 | * blocked by i_mutex. So we can safely drop the truncate_mutex. | 184 | * blocked by i_mutex. So we can safely drop the truncate_mutex. |
185 | */ | 185 | */ |
186 | mutex_unlock(&EXT3_I(inode)->truncate_mutex); | 186 | mutex_unlock(&EXT3_I(inode)->truncate_mutex); |
187 | ret = ext3_journal_restart(handle, blocks_for_truncate(inode)); | 187 | ret = ext3_journal_restart(handle, blocks_for_truncate(inode)); |
188 | mutex_lock(&EXT3_I(inode)->truncate_mutex); | 188 | mutex_lock(&EXT3_I(inode)->truncate_mutex); |
189 | return ret; | 189 | return ret; |
190 | } | 190 | } |
191 | 191 | ||
192 | /* | 192 | /* |
193 | * Called at inode eviction from icache | 193 | * Called at inode eviction from icache |
194 | */ | 194 | */ |
195 | void ext3_evict_inode (struct inode *inode) | 195 | void ext3_evict_inode (struct inode *inode) |
196 | { | 196 | { |
197 | struct ext3_block_alloc_info *rsv; | 197 | struct ext3_block_alloc_info *rsv; |
198 | handle_t *handle; | 198 | handle_t *handle; |
199 | int want_delete = 0; | 199 | int want_delete = 0; |
200 | 200 | ||
201 | if (!inode->i_nlink && !is_bad_inode(inode)) { | 201 | if (!inode->i_nlink && !is_bad_inode(inode)) { |
202 | dquot_initialize(inode); | 202 | dquot_initialize(inode); |
203 | want_delete = 1; | 203 | want_delete = 1; |
204 | } | 204 | } |
205 | 205 | ||
206 | truncate_inode_pages(&inode->i_data, 0); | 206 | truncate_inode_pages(&inode->i_data, 0); |
207 | 207 | ||
208 | ext3_discard_reservation(inode); | 208 | ext3_discard_reservation(inode); |
209 | rsv = EXT3_I(inode)->i_block_alloc_info; | 209 | rsv = EXT3_I(inode)->i_block_alloc_info; |
210 | EXT3_I(inode)->i_block_alloc_info = NULL; | 210 | EXT3_I(inode)->i_block_alloc_info = NULL; |
211 | if (unlikely(rsv)) | 211 | if (unlikely(rsv)) |
212 | kfree(rsv); | 212 | kfree(rsv); |
213 | 213 | ||
214 | if (!want_delete) | 214 | if (!want_delete) |
215 | goto no_delete; | 215 | goto no_delete; |
216 | 216 | ||
217 | handle = start_transaction(inode); | 217 | handle = start_transaction(inode); |
218 | if (IS_ERR(handle)) { | 218 | if (IS_ERR(handle)) { |
219 | /* | 219 | /* |
220 | * If we're going to skip the normal cleanup, we still need to | 220 | * If we're going to skip the normal cleanup, we still need to |
221 | * make sure that the in-core orphan linked list is properly | 221 | * make sure that the in-core orphan linked list is properly |
222 | * cleaned up. | 222 | * cleaned up. |
223 | */ | 223 | */ |
224 | ext3_orphan_del(NULL, inode); | 224 | ext3_orphan_del(NULL, inode); |
225 | goto no_delete; | 225 | goto no_delete; |
226 | } | 226 | } |
227 | 227 | ||
228 | if (IS_SYNC(inode)) | 228 | if (IS_SYNC(inode)) |
229 | handle->h_sync = 1; | 229 | handle->h_sync = 1; |
230 | inode->i_size = 0; | 230 | inode->i_size = 0; |
231 | if (inode->i_blocks) | 231 | if (inode->i_blocks) |
232 | ext3_truncate(inode); | 232 | ext3_truncate(inode); |
233 | /* | 233 | /* |
234 | * Kill off the orphan record which ext3_truncate created. | 234 | * Kill off the orphan record which ext3_truncate created. |
235 | * AKPM: I think this can be inside the above `if'. | 235 | * AKPM: I think this can be inside the above `if'. |
236 | * Note that ext3_orphan_del() has to be able to cope with the | 236 | * Note that ext3_orphan_del() has to be able to cope with the |
237 | * deletion of a non-existent orphan - this is because we don't | 237 | * deletion of a non-existent orphan - this is because we don't |
238 | * know if ext3_truncate() actually created an orphan record. | 238 | * know if ext3_truncate() actually created an orphan record. |
239 | * (Well, we could do this if we need to, but heck - it works) | 239 | * (Well, we could do this if we need to, but heck - it works) |
240 | */ | 240 | */ |
241 | ext3_orphan_del(handle, inode); | 241 | ext3_orphan_del(handle, inode); |
242 | EXT3_I(inode)->i_dtime = get_seconds(); | 242 | EXT3_I(inode)->i_dtime = get_seconds(); |
243 | 243 | ||
244 | /* | 244 | /* |
245 | * One subtle ordering requirement: if anything has gone wrong | 245 | * One subtle ordering requirement: if anything has gone wrong |
246 | * (transaction abort, IO errors, whatever), then we can still | 246 | * (transaction abort, IO errors, whatever), then we can still |
247 | * do these next steps (the fs will already have been marked as | 247 | * do these next steps (the fs will already have been marked as |
248 | * having errors), but we can't free the inode if the mark_dirty | 248 | * having errors), but we can't free the inode if the mark_dirty |
249 | * fails. | 249 | * fails. |
250 | */ | 250 | */ |
251 | if (ext3_mark_inode_dirty(handle, inode)) { | 251 | if (ext3_mark_inode_dirty(handle, inode)) { |
252 | /* If that failed, just dquot_drop() and be done with that */ | 252 | /* If that failed, just dquot_drop() and be done with that */ |
253 | dquot_drop(inode); | 253 | dquot_drop(inode); |
254 | end_writeback(inode); | 254 | end_writeback(inode); |
255 | } else { | 255 | } else { |
256 | ext3_xattr_delete_inode(handle, inode); | 256 | ext3_xattr_delete_inode(handle, inode); |
257 | dquot_free_inode(inode); | 257 | dquot_free_inode(inode); |
258 | dquot_drop(inode); | 258 | dquot_drop(inode); |
259 | end_writeback(inode); | 259 | end_writeback(inode); |
260 | ext3_free_inode(handle, inode); | 260 | ext3_free_inode(handle, inode); |
261 | } | 261 | } |
262 | ext3_journal_stop(handle); | 262 | ext3_journal_stop(handle); |
263 | return; | 263 | return; |
264 | no_delete: | 264 | no_delete: |
265 | end_writeback(inode); | 265 | end_writeback(inode); |
266 | dquot_drop(inode); | 266 | dquot_drop(inode); |
267 | } | 267 | } |
268 | 268 | ||
269 | typedef struct { | 269 | typedef struct { |
270 | __le32 *p; | 270 | __le32 *p; |
271 | __le32 key; | 271 | __le32 key; |
272 | struct buffer_head *bh; | 272 | struct buffer_head *bh; |
273 | } Indirect; | 273 | } Indirect; |
274 | 274 | ||
275 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | 275 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) |
276 | { | 276 | { |
277 | p->key = *(p->p = v); | 277 | p->key = *(p->p = v); |
278 | p->bh = bh; | 278 | p->bh = bh; |
279 | } | 279 | } |
280 | 280 | ||
281 | static int verify_chain(Indirect *from, Indirect *to) | 281 | static int verify_chain(Indirect *from, Indirect *to) |
282 | { | 282 | { |
283 | while (from <= to && from->key == *from->p) | 283 | while (from <= to && from->key == *from->p) |
284 | from++; | 284 | from++; |
285 | return (from > to); | 285 | return (from > to); |
286 | } | 286 | } |
287 | 287 | ||
288 | /** | 288 | /** |
289 | * ext3_block_to_path - parse the block number into array of offsets | 289 | * ext3_block_to_path - parse the block number into array of offsets |
290 | * @inode: inode in question (we are only interested in its superblock) | 290 | * @inode: inode in question (we are only interested in its superblock) |
291 | * @i_block: block number to be parsed | 291 | * @i_block: block number to be parsed |
292 | * @offsets: array to store the offsets in | 292 | * @offsets: array to store the offsets in |
293 | * @boundary: set this non-zero if the referred-to block is likely to be | 293 | * @boundary: set this non-zero if the referred-to block is likely to be |
294 | * followed (on disk) by an indirect block. | 294 | * followed (on disk) by an indirect block. |
295 | * | 295 | * |
296 | * To store the locations of file's data ext3 uses a data structure common | 296 | * To store the locations of file's data ext3 uses a data structure common |
297 | * for UNIX filesystems - tree of pointers anchored in the inode, with | 297 | * for UNIX filesystems - tree of pointers anchored in the inode, with |
298 | * data blocks at leaves and indirect blocks in intermediate nodes. | 298 | * data blocks at leaves and indirect blocks in intermediate nodes. |
299 | * This function translates the block number into path in that tree - | 299 | * This function translates the block number into path in that tree - |
300 | * return value is the path length and @offsets[n] is the offset of | 300 | * return value is the path length and @offsets[n] is the offset of |
301 | * pointer to (n+1)th node in the nth one. If @block is out of range | 301 | * pointer to (n+1)th node in the nth one. If @block is out of range |
302 | * (negative or too large) warning is printed and zero returned. | 302 | * (negative or too large) warning is printed and zero returned. |
303 | * | 303 | * |
304 | * Note: function doesn't find node addresses, so no IO is needed. All | 304 | * Note: function doesn't find node addresses, so no IO is needed. All |
305 | * we need to know is the capacity of indirect blocks (taken from the | 305 | * we need to know is the capacity of indirect blocks (taken from the |
306 | * inode->i_sb). | 306 | * inode->i_sb). |
307 | */ | 307 | */ |
308 | 308 | ||
309 | /* | 309 | /* |
310 | * Portability note: the last comparison (check that we fit into triple | 310 | * Portability note: the last comparison (check that we fit into triple |
311 | * indirect block) is spelled differently, because otherwise on an | 311 | * indirect block) is spelled differently, because otherwise on an |
312 | * architecture with 32-bit longs and 8Kb pages we might get into trouble | 312 | * architecture with 32-bit longs and 8Kb pages we might get into trouble |
313 | * if our filesystem had 8Kb blocks. We might use long long, but that would | 313 | * if our filesystem had 8Kb blocks. We might use long long, but that would |
314 | * kill us on x86. Oh, well, at least the sign propagation does not matter - | 314 | * kill us on x86. Oh, well, at least the sign propagation does not matter - |
315 | * i_block would have to be negative in the very beginning, so we would not | 315 | * i_block would have to be negative in the very beginning, so we would not |
316 | * get there at all. | 316 | * get there at all. |
317 | */ | 317 | */ |
318 | 318 | ||
319 | static int ext3_block_to_path(struct inode *inode, | 319 | static int ext3_block_to_path(struct inode *inode, |
320 | long i_block, int offsets[4], int *boundary) | 320 | long i_block, int offsets[4], int *boundary) |
321 | { | 321 | { |
322 | int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb); | 322 | int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb); |
323 | int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb); | 323 | int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb); |
324 | const long direct_blocks = EXT3_NDIR_BLOCKS, | 324 | const long direct_blocks = EXT3_NDIR_BLOCKS, |
325 | indirect_blocks = ptrs, | 325 | indirect_blocks = ptrs, |
326 | double_blocks = (1 << (ptrs_bits * 2)); | 326 | double_blocks = (1 << (ptrs_bits * 2)); |
327 | int n = 0; | 327 | int n = 0; |
328 | int final = 0; | 328 | int final = 0; |
329 | 329 | ||
330 | if (i_block < 0) { | 330 | if (i_block < 0) { |
331 | ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0"); | 331 | ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0"); |
332 | } else if (i_block < direct_blocks) { | 332 | } else if (i_block < direct_blocks) { |
333 | offsets[n++] = i_block; | 333 | offsets[n++] = i_block; |
334 | final = direct_blocks; | 334 | final = direct_blocks; |
335 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { | 335 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { |
336 | offsets[n++] = EXT3_IND_BLOCK; | 336 | offsets[n++] = EXT3_IND_BLOCK; |
337 | offsets[n++] = i_block; | 337 | offsets[n++] = i_block; |
338 | final = ptrs; | 338 | final = ptrs; |
339 | } else if ((i_block -= indirect_blocks) < double_blocks) { | 339 | } else if ((i_block -= indirect_blocks) < double_blocks) { |
340 | offsets[n++] = EXT3_DIND_BLOCK; | 340 | offsets[n++] = EXT3_DIND_BLOCK; |
341 | offsets[n++] = i_block >> ptrs_bits; | 341 | offsets[n++] = i_block >> ptrs_bits; |
342 | offsets[n++] = i_block & (ptrs - 1); | 342 | offsets[n++] = i_block & (ptrs - 1); |
343 | final = ptrs; | 343 | final = ptrs; |
344 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { | 344 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { |
345 | offsets[n++] = EXT3_TIND_BLOCK; | 345 | offsets[n++] = EXT3_TIND_BLOCK; |
346 | offsets[n++] = i_block >> (ptrs_bits * 2); | 346 | offsets[n++] = i_block >> (ptrs_bits * 2); |
347 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); | 347 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); |
348 | offsets[n++] = i_block & (ptrs - 1); | 348 | offsets[n++] = i_block & (ptrs - 1); |
349 | final = ptrs; | 349 | final = ptrs; |
350 | } else { | 350 | } else { |
351 | ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big"); | 351 | ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big"); |
352 | } | 352 | } |
353 | if (boundary) | 353 | if (boundary) |
354 | *boundary = final - 1 - (i_block & (ptrs - 1)); | 354 | *boundary = final - 1 - (i_block & (ptrs - 1)); |
355 | return n; | 355 | return n; |
356 | } | 356 | } |
357 | 357 | ||
358 | /** | 358 | /** |
359 | * ext3_get_branch - read the chain of indirect blocks leading to data | 359 | * ext3_get_branch - read the chain of indirect blocks leading to data |
360 | * @inode: inode in question | 360 | * @inode: inode in question |
361 | * @depth: depth of the chain (1 - direct pointer, etc.) | 361 | * @depth: depth of the chain (1 - direct pointer, etc.) |
362 | * @offsets: offsets of pointers in inode/indirect blocks | 362 | * @offsets: offsets of pointers in inode/indirect blocks |
363 | * @chain: place to store the result | 363 | * @chain: place to store the result |
364 | * @err: here we store the error value | 364 | * @err: here we store the error value |
365 | * | 365 | * |
366 | * Function fills the array of triples <key, p, bh> and returns %NULL | 366 | * Function fills the array of triples <key, p, bh> and returns %NULL |
367 | * if everything went OK or the pointer to the last filled triple | 367 | * if everything went OK or the pointer to the last filled triple |
368 | * (incomplete one) otherwise. Upon the return chain[i].key contains | 368 | * (incomplete one) otherwise. Upon the return chain[i].key contains |
369 | * the number of (i+1)-th block in the chain (as it is stored in memory, | 369 | * the number of (i+1)-th block in the chain (as it is stored in memory, |
370 | * i.e. little-endian 32-bit), chain[i].p contains the address of that | 370 | * i.e. little-endian 32-bit), chain[i].p contains the address of that |
371 | * number (it points into struct inode for i==0 and into the bh->b_data | 371 | * number (it points into struct inode for i==0 and into the bh->b_data |
372 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect | 372 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect |
373 | * block for i>0 and NULL for i==0. In other words, it holds the block | 373 | * block for i>0 and NULL for i==0. In other words, it holds the block |
374 | * numbers of the chain, addresses they were taken from (and where we can | 374 | * numbers of the chain, addresses they were taken from (and where we can |
375 | * verify that chain did not change) and buffer_heads hosting these | 375 | * verify that chain did not change) and buffer_heads hosting these |
376 | * numbers. | 376 | * numbers. |
377 | * | 377 | * |
378 | * Function stops when it stumbles upon zero pointer (absent block) | 378 | * Function stops when it stumbles upon zero pointer (absent block) |
379 | * (pointer to last triple returned, *@err == 0) | 379 | * (pointer to last triple returned, *@err == 0) |
380 | * or when it gets an IO error reading an indirect block | 380 | * or when it gets an IO error reading an indirect block |
381 | * (ditto, *@err == -EIO) | 381 | * (ditto, *@err == -EIO) |
382 | * or when it notices that chain had been changed while it was reading | 382 | * or when it notices that chain had been changed while it was reading |
383 | * (ditto, *@err == -EAGAIN) | 383 | * (ditto, *@err == -EAGAIN) |
384 | * or when it reads all @depth-1 indirect blocks successfully and finds | 384 | * or when it reads all @depth-1 indirect blocks successfully and finds |
385 | * the whole chain, all way to the data (returns %NULL, *err == 0). | 385 | * the whole chain, all way to the data (returns %NULL, *err == 0). |
386 | */ | 386 | */ |
387 | static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets, | 387 | static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets, |
388 | Indirect chain[4], int *err) | 388 | Indirect chain[4], int *err) |
389 | { | 389 | { |
390 | struct super_block *sb = inode->i_sb; | 390 | struct super_block *sb = inode->i_sb; |
391 | Indirect *p = chain; | 391 | Indirect *p = chain; |
392 | struct buffer_head *bh; | 392 | struct buffer_head *bh; |
393 | 393 | ||
394 | *err = 0; | 394 | *err = 0; |
395 | /* i_data is not going away, no lock needed */ | 395 | /* i_data is not going away, no lock needed */ |
396 | add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); | 396 | add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); |
397 | if (!p->key) | 397 | if (!p->key) |
398 | goto no_block; | 398 | goto no_block; |
399 | while (--depth) { | 399 | while (--depth) { |
400 | bh = sb_bread(sb, le32_to_cpu(p->key)); | 400 | bh = sb_bread(sb, le32_to_cpu(p->key)); |
401 | if (!bh) | 401 | if (!bh) |
402 | goto failure; | 402 | goto failure; |
403 | /* Reader: pointers */ | 403 | /* Reader: pointers */ |
404 | if (!verify_chain(chain, p)) | 404 | if (!verify_chain(chain, p)) |
405 | goto changed; | 405 | goto changed; |
406 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); | 406 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); |
407 | /* Reader: end */ | 407 | /* Reader: end */ |
408 | if (!p->key) | 408 | if (!p->key) |
409 | goto no_block; | 409 | goto no_block; |
410 | } | 410 | } |
411 | return NULL; | 411 | return NULL; |
412 | 412 | ||
413 | changed: | 413 | changed: |
414 | brelse(bh); | 414 | brelse(bh); |
415 | *err = -EAGAIN; | 415 | *err = -EAGAIN; |
416 | goto no_block; | 416 | goto no_block; |
417 | failure: | 417 | failure: |
418 | *err = -EIO; | 418 | *err = -EIO; |
419 | no_block: | 419 | no_block: |
420 | return p; | 420 | return p; |
421 | } | 421 | } |
422 | 422 | ||
423 | /** | 423 | /** |
424 | * ext3_find_near - find a place for allocation with sufficient locality | 424 | * ext3_find_near - find a place for allocation with sufficient locality |
425 | * @inode: owner | 425 | * @inode: owner |
426 | * @ind: descriptor of indirect block. | 426 | * @ind: descriptor of indirect block. |
427 | * | 427 | * |
428 | * This function returns the preferred place for block allocation. | 428 | * This function returns the preferred place for block allocation. |
429 | * It is used when heuristic for sequential allocation fails. | 429 | * It is used when heuristic for sequential allocation fails. |
430 | * Rules are: | 430 | * Rules are: |
431 | * + if there is a block to the left of our position - allocate near it. | 431 | * + if there is a block to the left of our position - allocate near it. |
432 | * + if pointer will live in indirect block - allocate near that block. | 432 | * + if pointer will live in indirect block - allocate near that block. |
433 | * + if pointer will live in inode - allocate in the same | 433 | * + if pointer will live in inode - allocate in the same |
434 | * cylinder group. | 434 | * cylinder group. |
435 | * | 435 | * |
436 | * In the latter case we colour the starting block by the callers PID to | 436 | * In the latter case we colour the starting block by the callers PID to |
437 | * prevent it from clashing with concurrent allocations for a different inode | 437 | * prevent it from clashing with concurrent allocations for a different inode |
438 | * in the same block group. The PID is used here so that functionally related | 438 | * in the same block group. The PID is used here so that functionally related |
439 | * files will be close-by on-disk. | 439 | * files will be close-by on-disk. |
440 | * | 440 | * |
441 | * Caller must make sure that @ind is valid and will stay that way. | 441 | * Caller must make sure that @ind is valid and will stay that way. |
442 | */ | 442 | */ |
443 | static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) | 443 | static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) |
444 | { | 444 | { |
445 | struct ext3_inode_info *ei = EXT3_I(inode); | 445 | struct ext3_inode_info *ei = EXT3_I(inode); |
446 | __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; | 446 | __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; |
447 | __le32 *p; | 447 | __le32 *p; |
448 | ext3_fsblk_t bg_start; | 448 | ext3_fsblk_t bg_start; |
449 | ext3_grpblk_t colour; | 449 | ext3_grpblk_t colour; |
450 | 450 | ||
451 | /* Try to find previous block */ | 451 | /* Try to find previous block */ |
452 | for (p = ind->p - 1; p >= start; p--) { | 452 | for (p = ind->p - 1; p >= start; p--) { |
453 | if (*p) | 453 | if (*p) |
454 | return le32_to_cpu(*p); | 454 | return le32_to_cpu(*p); |
455 | } | 455 | } |
456 | 456 | ||
457 | /* No such thing, so let's try location of indirect block */ | 457 | /* No such thing, so let's try location of indirect block */ |
458 | if (ind->bh) | 458 | if (ind->bh) |
459 | return ind->bh->b_blocknr; | 459 | return ind->bh->b_blocknr; |
460 | 460 | ||
461 | /* | 461 | /* |
462 | * It is going to be referred to from the inode itself? OK, just put it | 462 | * It is going to be referred to from the inode itself? OK, just put it |
463 | * into the same cylinder group then. | 463 | * into the same cylinder group then. |
464 | */ | 464 | */ |
465 | bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group); | 465 | bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group); |
466 | colour = (current->pid % 16) * | 466 | colour = (current->pid % 16) * |
467 | (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); | 467 | (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); |
468 | return bg_start + colour; | 468 | return bg_start + colour; |
469 | } | 469 | } |
470 | 470 | ||
471 | /** | 471 | /** |
472 | * ext3_find_goal - find a preferred place for allocation. | 472 | * ext3_find_goal - find a preferred place for allocation. |
473 | * @inode: owner | 473 | * @inode: owner |
474 | * @block: block we want | 474 | * @block: block we want |
475 | * @partial: pointer to the last triple within a chain | 475 | * @partial: pointer to the last triple within a chain |
476 | * | 476 | * |
477 | * Normally this function find the preferred place for block allocation, | 477 | * Normally this function find the preferred place for block allocation, |
478 | * returns it. | 478 | * returns it. |
479 | */ | 479 | */ |
480 | 480 | ||
481 | static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, | 481 | static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, |
482 | Indirect *partial) | 482 | Indirect *partial) |
483 | { | 483 | { |
484 | struct ext3_block_alloc_info *block_i; | 484 | struct ext3_block_alloc_info *block_i; |
485 | 485 | ||
486 | block_i = EXT3_I(inode)->i_block_alloc_info; | 486 | block_i = EXT3_I(inode)->i_block_alloc_info; |
487 | 487 | ||
488 | /* | 488 | /* |
489 | * try the heuristic for sequential allocation, | 489 | * try the heuristic for sequential allocation, |
490 | * failing that at least try to get decent locality. | 490 | * failing that at least try to get decent locality. |
491 | */ | 491 | */ |
492 | if (block_i && (block == block_i->last_alloc_logical_block + 1) | 492 | if (block_i && (block == block_i->last_alloc_logical_block + 1) |
493 | && (block_i->last_alloc_physical_block != 0)) { | 493 | && (block_i->last_alloc_physical_block != 0)) { |
494 | return block_i->last_alloc_physical_block + 1; | 494 | return block_i->last_alloc_physical_block + 1; |
495 | } | 495 | } |
496 | 496 | ||
497 | return ext3_find_near(inode, partial); | 497 | return ext3_find_near(inode, partial); |
498 | } | 498 | } |
499 | 499 | ||
500 | /** | 500 | /** |
501 | * ext3_blks_to_allocate - Look up the block map and count the number | 501 | * ext3_blks_to_allocate - Look up the block map and count the number |
502 | * of direct blocks need to be allocated for the given branch. | 502 | * of direct blocks need to be allocated for the given branch. |
503 | * | 503 | * |
504 | * @branch: chain of indirect blocks | 504 | * @branch: chain of indirect blocks |
505 | * @k: number of blocks need for indirect blocks | 505 | * @k: number of blocks need for indirect blocks |
506 | * @blks: number of data blocks to be mapped. | 506 | * @blks: number of data blocks to be mapped. |
507 | * @blocks_to_boundary: the offset in the indirect block | 507 | * @blocks_to_boundary: the offset in the indirect block |
508 | * | 508 | * |
509 | * return the total number of blocks to be allocate, including the | 509 | * return the total number of blocks to be allocate, including the |
510 | * direct and indirect blocks. | 510 | * direct and indirect blocks. |
511 | */ | 511 | */ |
512 | static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks, | 512 | static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks, |
513 | int blocks_to_boundary) | 513 | int blocks_to_boundary) |
514 | { | 514 | { |
515 | unsigned long count = 0; | 515 | unsigned long count = 0; |
516 | 516 | ||
517 | /* | 517 | /* |
518 | * Simple case, [t,d]Indirect block(s) has not allocated yet | 518 | * Simple case, [t,d]Indirect block(s) has not allocated yet |
519 | * then it's clear blocks on that path have not allocated | 519 | * then it's clear blocks on that path have not allocated |
520 | */ | 520 | */ |
521 | if (k > 0) { | 521 | if (k > 0) { |
522 | /* right now we don't handle cross boundary allocation */ | 522 | /* right now we don't handle cross boundary allocation */ |
523 | if (blks < blocks_to_boundary + 1) | 523 | if (blks < blocks_to_boundary + 1) |
524 | count += blks; | 524 | count += blks; |
525 | else | 525 | else |
526 | count += blocks_to_boundary + 1; | 526 | count += blocks_to_boundary + 1; |
527 | return count; | 527 | return count; |
528 | } | 528 | } |
529 | 529 | ||
530 | count++; | 530 | count++; |
531 | while (count < blks && count <= blocks_to_boundary && | 531 | while (count < blks && count <= blocks_to_boundary && |
532 | le32_to_cpu(*(branch[0].p + count)) == 0) { | 532 | le32_to_cpu(*(branch[0].p + count)) == 0) { |
533 | count++; | 533 | count++; |
534 | } | 534 | } |
535 | return count; | 535 | return count; |
536 | } | 536 | } |
537 | 537 | ||
538 | /** | 538 | /** |
539 | * ext3_alloc_blocks - multiple allocate blocks needed for a branch | 539 | * ext3_alloc_blocks - multiple allocate blocks needed for a branch |
540 | * @handle: handle for this transaction | 540 | * @handle: handle for this transaction |
541 | * @inode: owner | 541 | * @inode: owner |
542 | * @goal: preferred place for allocation | 542 | * @goal: preferred place for allocation |
543 | * @indirect_blks: the number of blocks need to allocate for indirect | 543 | * @indirect_blks: the number of blocks need to allocate for indirect |
544 | * blocks | 544 | * blocks |
545 | * @blks: number of blocks need to allocated for direct blocks | 545 | * @blks: number of blocks need to allocated for direct blocks |
546 | * @new_blocks: on return it will store the new block numbers for | 546 | * @new_blocks: on return it will store the new block numbers for |
547 | * the indirect blocks(if needed) and the first direct block, | 547 | * the indirect blocks(if needed) and the first direct block, |
548 | * @err: here we store the error value | 548 | * @err: here we store the error value |
549 | * | 549 | * |
550 | * return the number of direct blocks allocated | 550 | * return the number of direct blocks allocated |
551 | */ | 551 | */ |
552 | static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, | 552 | static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, |
553 | ext3_fsblk_t goal, int indirect_blks, int blks, | 553 | ext3_fsblk_t goal, int indirect_blks, int blks, |
554 | ext3_fsblk_t new_blocks[4], int *err) | 554 | ext3_fsblk_t new_blocks[4], int *err) |
555 | { | 555 | { |
556 | int target, i; | 556 | int target, i; |
557 | unsigned long count = 0; | 557 | unsigned long count = 0; |
558 | int index = 0; | 558 | int index = 0; |
559 | ext3_fsblk_t current_block = 0; | 559 | ext3_fsblk_t current_block = 0; |
560 | int ret = 0; | 560 | int ret = 0; |
561 | 561 | ||
562 | /* | 562 | /* |
563 | * Here we try to allocate the requested multiple blocks at once, | 563 | * Here we try to allocate the requested multiple blocks at once, |
564 | * on a best-effort basis. | 564 | * on a best-effort basis. |
565 | * To build a branch, we should allocate blocks for | 565 | * To build a branch, we should allocate blocks for |
566 | * the indirect blocks(if not allocated yet), and at least | 566 | * the indirect blocks(if not allocated yet), and at least |
567 | * the first direct block of this branch. That's the | 567 | * the first direct block of this branch. That's the |
568 | * minimum number of blocks need to allocate(required) | 568 | * minimum number of blocks need to allocate(required) |
569 | */ | 569 | */ |
570 | target = blks + indirect_blks; | 570 | target = blks + indirect_blks; |
571 | 571 | ||
572 | while (1) { | 572 | while (1) { |
573 | count = target; | 573 | count = target; |
574 | /* allocating blocks for indirect blocks and direct blocks */ | 574 | /* allocating blocks for indirect blocks and direct blocks */ |
575 | current_block = ext3_new_blocks(handle,inode,goal,&count,err); | 575 | current_block = ext3_new_blocks(handle,inode,goal,&count,err); |
576 | if (*err) | 576 | if (*err) |
577 | goto failed_out; | 577 | goto failed_out; |
578 | 578 | ||
579 | target -= count; | 579 | target -= count; |
580 | /* allocate blocks for indirect blocks */ | 580 | /* allocate blocks for indirect blocks */ |
581 | while (index < indirect_blks && count) { | 581 | while (index < indirect_blks && count) { |
582 | new_blocks[index++] = current_block++; | 582 | new_blocks[index++] = current_block++; |
583 | count--; | 583 | count--; |
584 | } | 584 | } |
585 | 585 | ||
586 | if (count > 0) | 586 | if (count > 0) |
587 | break; | 587 | break; |
588 | } | 588 | } |
589 | 589 | ||
590 | /* save the new block number for the first direct block */ | 590 | /* save the new block number for the first direct block */ |
591 | new_blocks[index] = current_block; | 591 | new_blocks[index] = current_block; |
592 | 592 | ||
593 | /* total number of blocks allocated for direct blocks */ | 593 | /* total number of blocks allocated for direct blocks */ |
594 | ret = count; | 594 | ret = count; |
595 | *err = 0; | 595 | *err = 0; |
596 | return ret; | 596 | return ret; |
597 | failed_out: | 597 | failed_out: |
598 | for (i = 0; i <index; i++) | 598 | for (i = 0; i <index; i++) |
599 | ext3_free_blocks(handle, inode, new_blocks[i], 1); | 599 | ext3_free_blocks(handle, inode, new_blocks[i], 1); |
600 | return ret; | 600 | return ret; |
601 | } | 601 | } |
602 | 602 | ||
603 | /** | 603 | /** |
604 | * ext3_alloc_branch - allocate and set up a chain of blocks. | 604 | * ext3_alloc_branch - allocate and set up a chain of blocks. |
605 | * @handle: handle for this transaction | 605 | * @handle: handle for this transaction |
606 | * @inode: owner | 606 | * @inode: owner |
607 | * @indirect_blks: number of allocated indirect blocks | 607 | * @indirect_blks: number of allocated indirect blocks |
608 | * @blks: number of allocated direct blocks | 608 | * @blks: number of allocated direct blocks |
609 | * @goal: preferred place for allocation | 609 | * @goal: preferred place for allocation |
610 | * @offsets: offsets (in the blocks) to store the pointers to next. | 610 | * @offsets: offsets (in the blocks) to store the pointers to next. |
611 | * @branch: place to store the chain in. | 611 | * @branch: place to store the chain in. |
612 | * | 612 | * |
613 | * This function allocates blocks, zeroes out all but the last one, | 613 | * This function allocates blocks, zeroes out all but the last one, |
614 | * links them into chain and (if we are synchronous) writes them to disk. | 614 | * links them into chain and (if we are synchronous) writes them to disk. |
615 | * In other words, it prepares a branch that can be spliced onto the | 615 | * In other words, it prepares a branch that can be spliced onto the |
616 | * inode. It stores the information about that chain in the branch[], in | 616 | * inode. It stores the information about that chain in the branch[], in |
617 | * the same format as ext3_get_branch() would do. We are calling it after | 617 | * the same format as ext3_get_branch() would do. We are calling it after |
618 | * we had read the existing part of chain and partial points to the last | 618 | * we had read the existing part of chain and partial points to the last |
619 | * triple of that (one with zero ->key). Upon the exit we have the same | 619 | * triple of that (one with zero ->key). Upon the exit we have the same |
620 | * picture as after the successful ext3_get_block(), except that in one | 620 | * picture as after the successful ext3_get_block(), except that in one |
621 | * place chain is disconnected - *branch->p is still zero (we did not | 621 | * place chain is disconnected - *branch->p is still zero (we did not |
622 | * set the last link), but branch->key contains the number that should | 622 | * set the last link), but branch->key contains the number that should |
623 | * be placed into *branch->p to fill that gap. | 623 | * be placed into *branch->p to fill that gap. |
624 | * | 624 | * |
625 | * If allocation fails we free all blocks we've allocated (and forget | 625 | * If allocation fails we free all blocks we've allocated (and forget |
626 | * their buffer_heads) and return the error value the from failed | 626 | * their buffer_heads) and return the error value the from failed |
627 | * ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain | 627 | * ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain |
628 | * as described above and return 0. | 628 | * as described above and return 0. |
629 | */ | 629 | */ |
630 | static int ext3_alloc_branch(handle_t *handle, struct inode *inode, | 630 | static int ext3_alloc_branch(handle_t *handle, struct inode *inode, |
631 | int indirect_blks, int *blks, ext3_fsblk_t goal, | 631 | int indirect_blks, int *blks, ext3_fsblk_t goal, |
632 | int *offsets, Indirect *branch) | 632 | int *offsets, Indirect *branch) |
633 | { | 633 | { |
634 | int blocksize = inode->i_sb->s_blocksize; | 634 | int blocksize = inode->i_sb->s_blocksize; |
635 | int i, n = 0; | 635 | int i, n = 0; |
636 | int err = 0; | 636 | int err = 0; |
637 | struct buffer_head *bh; | 637 | struct buffer_head *bh; |
638 | int num; | 638 | int num; |
639 | ext3_fsblk_t new_blocks[4]; | 639 | ext3_fsblk_t new_blocks[4]; |
640 | ext3_fsblk_t current_block; | 640 | ext3_fsblk_t current_block; |
641 | 641 | ||
642 | num = ext3_alloc_blocks(handle, inode, goal, indirect_blks, | 642 | num = ext3_alloc_blocks(handle, inode, goal, indirect_blks, |
643 | *blks, new_blocks, &err); | 643 | *blks, new_blocks, &err); |
644 | if (err) | 644 | if (err) |
645 | return err; | 645 | return err; |
646 | 646 | ||
647 | branch[0].key = cpu_to_le32(new_blocks[0]); | 647 | branch[0].key = cpu_to_le32(new_blocks[0]); |
648 | /* | 648 | /* |
649 | * metadata blocks and data blocks are allocated. | 649 | * metadata blocks and data blocks are allocated. |
650 | */ | 650 | */ |
651 | for (n = 1; n <= indirect_blks; n++) { | 651 | for (n = 1; n <= indirect_blks; n++) { |
652 | /* | 652 | /* |
653 | * Get buffer_head for parent block, zero it out | 653 | * Get buffer_head for parent block, zero it out |
654 | * and set the pointer to new one, then send | 654 | * and set the pointer to new one, then send |
655 | * parent to disk. | 655 | * parent to disk. |
656 | */ | 656 | */ |
657 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 657 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
658 | branch[n].bh = bh; | 658 | branch[n].bh = bh; |
659 | lock_buffer(bh); | 659 | lock_buffer(bh); |
660 | BUFFER_TRACE(bh, "call get_create_access"); | 660 | BUFFER_TRACE(bh, "call get_create_access"); |
661 | err = ext3_journal_get_create_access(handle, bh); | 661 | err = ext3_journal_get_create_access(handle, bh); |
662 | if (err) { | 662 | if (err) { |
663 | unlock_buffer(bh); | 663 | unlock_buffer(bh); |
664 | brelse(bh); | 664 | brelse(bh); |
665 | goto failed; | 665 | goto failed; |
666 | } | 666 | } |
667 | 667 | ||
668 | memset(bh->b_data, 0, blocksize); | 668 | memset(bh->b_data, 0, blocksize); |
669 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | 669 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; |
670 | branch[n].key = cpu_to_le32(new_blocks[n]); | 670 | branch[n].key = cpu_to_le32(new_blocks[n]); |
671 | *branch[n].p = branch[n].key; | 671 | *branch[n].p = branch[n].key; |
672 | if ( n == indirect_blks) { | 672 | if ( n == indirect_blks) { |
673 | current_block = new_blocks[n]; | 673 | current_block = new_blocks[n]; |
674 | /* | 674 | /* |
675 | * End of chain, update the last new metablock of | 675 | * End of chain, update the last new metablock of |
676 | * the chain to point to the new allocated | 676 | * the chain to point to the new allocated |
677 | * data blocks numbers | 677 | * data blocks numbers |
678 | */ | 678 | */ |
679 | for (i=1; i < num; i++) | 679 | for (i=1; i < num; i++) |
680 | *(branch[n].p + i) = cpu_to_le32(++current_block); | 680 | *(branch[n].p + i) = cpu_to_le32(++current_block); |
681 | } | 681 | } |
682 | BUFFER_TRACE(bh, "marking uptodate"); | 682 | BUFFER_TRACE(bh, "marking uptodate"); |
683 | set_buffer_uptodate(bh); | 683 | set_buffer_uptodate(bh); |
684 | unlock_buffer(bh); | 684 | unlock_buffer(bh); |
685 | 685 | ||
686 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); | 686 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); |
687 | err = ext3_journal_dirty_metadata(handle, bh); | 687 | err = ext3_journal_dirty_metadata(handle, bh); |
688 | if (err) | 688 | if (err) |
689 | goto failed; | 689 | goto failed; |
690 | } | 690 | } |
691 | *blks = num; | 691 | *blks = num; |
692 | return err; | 692 | return err; |
693 | failed: | 693 | failed: |
694 | /* Allocation failed, free what we already allocated */ | 694 | /* Allocation failed, free what we already allocated */ |
695 | for (i = 1; i <= n ; i++) { | 695 | for (i = 1; i <= n ; i++) { |
696 | BUFFER_TRACE(branch[i].bh, "call journal_forget"); | 696 | BUFFER_TRACE(branch[i].bh, "call journal_forget"); |
697 | ext3_journal_forget(handle, branch[i].bh); | 697 | ext3_journal_forget(handle, branch[i].bh); |
698 | } | 698 | } |
699 | for (i = 0; i <indirect_blks; i++) | 699 | for (i = 0; i <indirect_blks; i++) |
700 | ext3_free_blocks(handle, inode, new_blocks[i], 1); | 700 | ext3_free_blocks(handle, inode, new_blocks[i], 1); |
701 | 701 | ||
702 | ext3_free_blocks(handle, inode, new_blocks[i], num); | 702 | ext3_free_blocks(handle, inode, new_blocks[i], num); |
703 | 703 | ||
704 | return err; | 704 | return err; |
705 | } | 705 | } |
706 | 706 | ||
707 | /** | 707 | /** |
708 | * ext3_splice_branch - splice the allocated branch onto inode. | 708 | * ext3_splice_branch - splice the allocated branch onto inode. |
709 | * @handle: handle for this transaction | 709 | * @handle: handle for this transaction |
710 | * @inode: owner | 710 | * @inode: owner |
711 | * @block: (logical) number of block we are adding | 711 | * @block: (logical) number of block we are adding |
712 | * @where: location of missing link | 712 | * @where: location of missing link |
713 | * @num: number of indirect blocks we are adding | 713 | * @num: number of indirect blocks we are adding |
714 | * @blks: number of direct blocks we are adding | 714 | * @blks: number of direct blocks we are adding |
715 | * | 715 | * |
716 | * This function fills the missing link and does all housekeeping needed in | 716 | * This function fills the missing link and does all housekeeping needed in |
717 | * inode (->i_blocks, etc.). In case of success we end up with the full | 717 | * inode (->i_blocks, etc.). In case of success we end up with the full |
718 | * chain to new block and return 0. | 718 | * chain to new block and return 0. |
719 | */ | 719 | */ |
720 | static int ext3_splice_branch(handle_t *handle, struct inode *inode, | 720 | static int ext3_splice_branch(handle_t *handle, struct inode *inode, |
721 | long block, Indirect *where, int num, int blks) | 721 | long block, Indirect *where, int num, int blks) |
722 | { | 722 | { |
723 | int i; | 723 | int i; |
724 | int err = 0; | 724 | int err = 0; |
725 | struct ext3_block_alloc_info *block_i; | 725 | struct ext3_block_alloc_info *block_i; |
726 | ext3_fsblk_t current_block; | 726 | ext3_fsblk_t current_block; |
727 | struct ext3_inode_info *ei = EXT3_I(inode); | 727 | struct ext3_inode_info *ei = EXT3_I(inode); |
728 | 728 | ||
729 | block_i = ei->i_block_alloc_info; | 729 | block_i = ei->i_block_alloc_info; |
730 | /* | 730 | /* |
731 | * If we're splicing into a [td]indirect block (as opposed to the | 731 | * If we're splicing into a [td]indirect block (as opposed to the |
732 | * inode) then we need to get write access to the [td]indirect block | 732 | * inode) then we need to get write access to the [td]indirect block |
733 | * before the splice. | 733 | * before the splice. |
734 | */ | 734 | */ |
735 | if (where->bh) { | 735 | if (where->bh) { |
736 | BUFFER_TRACE(where->bh, "get_write_access"); | 736 | BUFFER_TRACE(where->bh, "get_write_access"); |
737 | err = ext3_journal_get_write_access(handle, where->bh); | 737 | err = ext3_journal_get_write_access(handle, where->bh); |
738 | if (err) | 738 | if (err) |
739 | goto err_out; | 739 | goto err_out; |
740 | } | 740 | } |
741 | /* That's it */ | 741 | /* That's it */ |
742 | 742 | ||
743 | *where->p = where->key; | 743 | *where->p = where->key; |
744 | 744 | ||
745 | /* | 745 | /* |
746 | * Update the host buffer_head or inode to point to more just allocated | 746 | * Update the host buffer_head or inode to point to more just allocated |
747 | * direct blocks blocks | 747 | * direct blocks blocks |
748 | */ | 748 | */ |
749 | if (num == 0 && blks > 1) { | 749 | if (num == 0 && blks > 1) { |
750 | current_block = le32_to_cpu(where->key) + 1; | 750 | current_block = le32_to_cpu(where->key) + 1; |
751 | for (i = 1; i < blks; i++) | 751 | for (i = 1; i < blks; i++) |
752 | *(where->p + i ) = cpu_to_le32(current_block++); | 752 | *(where->p + i ) = cpu_to_le32(current_block++); |
753 | } | 753 | } |
754 | 754 | ||
755 | /* | 755 | /* |
756 | * update the most recently allocated logical & physical block | 756 | * update the most recently allocated logical & physical block |
757 | * in i_block_alloc_info, to assist find the proper goal block for next | 757 | * in i_block_alloc_info, to assist find the proper goal block for next |
758 | * allocation | 758 | * allocation |
759 | */ | 759 | */ |
760 | if (block_i) { | 760 | if (block_i) { |
761 | block_i->last_alloc_logical_block = block + blks - 1; | 761 | block_i->last_alloc_logical_block = block + blks - 1; |
762 | block_i->last_alloc_physical_block = | 762 | block_i->last_alloc_physical_block = |
763 | le32_to_cpu(where[num].key) + blks - 1; | 763 | le32_to_cpu(where[num].key) + blks - 1; |
764 | } | 764 | } |
765 | 765 | ||
766 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 766 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
767 | 767 | ||
768 | inode->i_ctime = CURRENT_TIME_SEC; | 768 | inode->i_ctime = CURRENT_TIME_SEC; |
769 | ext3_mark_inode_dirty(handle, inode); | 769 | ext3_mark_inode_dirty(handle, inode); |
770 | /* ext3_mark_inode_dirty already updated i_sync_tid */ | 770 | /* ext3_mark_inode_dirty already updated i_sync_tid */ |
771 | atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); | 771 | atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); |
772 | 772 | ||
773 | /* had we spliced it onto indirect block? */ | 773 | /* had we spliced it onto indirect block? */ |
774 | if (where->bh) { | 774 | if (where->bh) { |
775 | /* | 775 | /* |
776 | * If we spliced it onto an indirect block, we haven't | 776 | * If we spliced it onto an indirect block, we haven't |
777 | * altered the inode. Note however that if it is being spliced | 777 | * altered the inode. Note however that if it is being spliced |
778 | * onto an indirect block at the very end of the file (the | 778 | * onto an indirect block at the very end of the file (the |
779 | * file is growing) then we *will* alter the inode to reflect | 779 | * file is growing) then we *will* alter the inode to reflect |
780 | * the new i_size. But that is not done here - it is done in | 780 | * the new i_size. But that is not done here - it is done in |
781 | * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode. | 781 | * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode. |
782 | */ | 782 | */ |
783 | jbd_debug(5, "splicing indirect only\n"); | 783 | jbd_debug(5, "splicing indirect only\n"); |
784 | BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata"); | 784 | BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata"); |
785 | err = ext3_journal_dirty_metadata(handle, where->bh); | 785 | err = ext3_journal_dirty_metadata(handle, where->bh); |
786 | if (err) | 786 | if (err) |
787 | goto err_out; | 787 | goto err_out; |
788 | } else { | 788 | } else { |
789 | /* | 789 | /* |
790 | * OK, we spliced it into the inode itself on a direct block. | 790 | * OK, we spliced it into the inode itself on a direct block. |
791 | * Inode was dirtied above. | 791 | * Inode was dirtied above. |
792 | */ | 792 | */ |
793 | jbd_debug(5, "splicing direct\n"); | 793 | jbd_debug(5, "splicing direct\n"); |
794 | } | 794 | } |
795 | return err; | 795 | return err; |
796 | 796 | ||
797 | err_out: | 797 | err_out: |
798 | for (i = 1; i <= num; i++) { | 798 | for (i = 1; i <= num; i++) { |
799 | BUFFER_TRACE(where[i].bh, "call journal_forget"); | 799 | BUFFER_TRACE(where[i].bh, "call journal_forget"); |
800 | ext3_journal_forget(handle, where[i].bh); | 800 | ext3_journal_forget(handle, where[i].bh); |
801 | ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); | 801 | ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); |
802 | } | 802 | } |
803 | ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); | 803 | ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); |
804 | 804 | ||
805 | return err; | 805 | return err; |
806 | } | 806 | } |
807 | 807 | ||
808 | /* | 808 | /* |
809 | * Allocation strategy is simple: if we have to allocate something, we will | 809 | * Allocation strategy is simple: if we have to allocate something, we will |
810 | * have to go the whole way to leaf. So let's do it before attaching anything | 810 | * have to go the whole way to leaf. So let's do it before attaching anything |
811 | * to tree, set linkage between the newborn blocks, write them if sync is | 811 | * to tree, set linkage between the newborn blocks, write them if sync is |
812 | * required, recheck the path, free and repeat if check fails, otherwise | 812 | * required, recheck the path, free and repeat if check fails, otherwise |
813 | * set the last missing link (that will protect us from any truncate-generated | 813 | * set the last missing link (that will protect us from any truncate-generated |
814 | * removals - all blocks on the path are immune now) and possibly force the | 814 | * removals - all blocks on the path are immune now) and possibly force the |
815 | * write on the parent block. | 815 | * write on the parent block. |
816 | * That has a nice additional property: no special recovery from the failed | 816 | * That has a nice additional property: no special recovery from the failed |
817 | * allocations is needed - we simply release blocks and do not touch anything | 817 | * allocations is needed - we simply release blocks and do not touch anything |
818 | * reachable from inode. | 818 | * reachable from inode. |
819 | * | 819 | * |
820 | * `handle' can be NULL if create == 0. | 820 | * `handle' can be NULL if create == 0. |
821 | * | 821 | * |
822 | * The BKL may not be held on entry here. Be sure to take it early. | 822 | * The BKL may not be held on entry here. Be sure to take it early. |
823 | * return > 0, # of blocks mapped or allocated. | 823 | * return > 0, # of blocks mapped or allocated. |
824 | * return = 0, if plain lookup failed. | 824 | * return = 0, if plain lookup failed. |
825 | * return < 0, error case. | 825 | * return < 0, error case. |
826 | */ | 826 | */ |
827 | int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | 827 | int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, |
828 | sector_t iblock, unsigned long maxblocks, | 828 | sector_t iblock, unsigned long maxblocks, |
829 | struct buffer_head *bh_result, | 829 | struct buffer_head *bh_result, |
830 | int create) | 830 | int create) |
831 | { | 831 | { |
832 | int err = -EIO; | 832 | int err = -EIO; |
833 | int offsets[4]; | 833 | int offsets[4]; |
834 | Indirect chain[4]; | 834 | Indirect chain[4]; |
835 | Indirect *partial; | 835 | Indirect *partial; |
836 | ext3_fsblk_t goal; | 836 | ext3_fsblk_t goal; |
837 | int indirect_blks; | 837 | int indirect_blks; |
838 | int blocks_to_boundary = 0; | 838 | int blocks_to_boundary = 0; |
839 | int depth; | 839 | int depth; |
840 | struct ext3_inode_info *ei = EXT3_I(inode); | 840 | struct ext3_inode_info *ei = EXT3_I(inode); |
841 | int count = 0; | 841 | int count = 0; |
842 | ext3_fsblk_t first_block = 0; | 842 | ext3_fsblk_t first_block = 0; |
843 | 843 | ||
844 | 844 | ||
845 | J_ASSERT(handle != NULL || create == 0); | 845 | J_ASSERT(handle != NULL || create == 0); |
846 | depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); | 846 | depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); |
847 | 847 | ||
848 | if (depth == 0) | 848 | if (depth == 0) |
849 | goto out; | 849 | goto out; |
850 | 850 | ||
851 | partial = ext3_get_branch(inode, depth, offsets, chain, &err); | 851 | partial = ext3_get_branch(inode, depth, offsets, chain, &err); |
852 | 852 | ||
853 | /* Simplest case - block found, no allocation needed */ | 853 | /* Simplest case - block found, no allocation needed */ |
854 | if (!partial) { | 854 | if (!partial) { |
855 | first_block = le32_to_cpu(chain[depth - 1].key); | 855 | first_block = le32_to_cpu(chain[depth - 1].key); |
856 | clear_buffer_new(bh_result); | 856 | clear_buffer_new(bh_result); |
857 | count++; | 857 | count++; |
858 | /*map more blocks*/ | 858 | /*map more blocks*/ |
859 | while (count < maxblocks && count <= blocks_to_boundary) { | 859 | while (count < maxblocks && count <= blocks_to_boundary) { |
860 | ext3_fsblk_t blk; | 860 | ext3_fsblk_t blk; |
861 | 861 | ||
862 | if (!verify_chain(chain, chain + depth - 1)) { | 862 | if (!verify_chain(chain, chain + depth - 1)) { |
863 | /* | 863 | /* |
864 | * Indirect block might be removed by | 864 | * Indirect block might be removed by |
865 | * truncate while we were reading it. | 865 | * truncate while we were reading it. |
866 | * Handling of that case: forget what we've | 866 | * Handling of that case: forget what we've |
867 | * got now. Flag the err as EAGAIN, so it | 867 | * got now. Flag the err as EAGAIN, so it |
868 | * will reread. | 868 | * will reread. |
869 | */ | 869 | */ |
870 | err = -EAGAIN; | 870 | err = -EAGAIN; |
871 | count = 0; | 871 | count = 0; |
872 | break; | 872 | break; |
873 | } | 873 | } |
874 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | 874 | blk = le32_to_cpu(*(chain[depth-1].p + count)); |
875 | 875 | ||
876 | if (blk == first_block + count) | 876 | if (blk == first_block + count) |
877 | count++; | 877 | count++; |
878 | else | 878 | else |
879 | break; | 879 | break; |
880 | } | 880 | } |
881 | if (err != -EAGAIN) | 881 | if (err != -EAGAIN) |
882 | goto got_it; | 882 | goto got_it; |
883 | } | 883 | } |
884 | 884 | ||
885 | /* Next simple case - plain lookup or failed read of indirect block */ | 885 | /* Next simple case - plain lookup or failed read of indirect block */ |
886 | if (!create || err == -EIO) | 886 | if (!create || err == -EIO) |
887 | goto cleanup; | 887 | goto cleanup; |
888 | 888 | ||
889 | mutex_lock(&ei->truncate_mutex); | 889 | mutex_lock(&ei->truncate_mutex); |
890 | 890 | ||
891 | /* | 891 | /* |
892 | * If the indirect block is missing while we are reading | 892 | * If the indirect block is missing while we are reading |
893 | * the chain(ext3_get_branch() returns -EAGAIN err), or | 893 | * the chain(ext3_get_branch() returns -EAGAIN err), or |
894 | * if the chain has been changed after we grab the semaphore, | 894 | * if the chain has been changed after we grab the semaphore, |
895 | * (either because another process truncated this branch, or | 895 | * (either because another process truncated this branch, or |
896 | * another get_block allocated this branch) re-grab the chain to see if | 896 | * another get_block allocated this branch) re-grab the chain to see if |
897 | * the request block has been allocated or not. | 897 | * the request block has been allocated or not. |
898 | * | 898 | * |
899 | * Since we already block the truncate/other get_block | 899 | * Since we already block the truncate/other get_block |
900 | * at this point, we will have the current copy of the chain when we | 900 | * at this point, we will have the current copy of the chain when we |
901 | * splice the branch into the tree. | 901 | * splice the branch into the tree. |
902 | */ | 902 | */ |
903 | if (err == -EAGAIN || !verify_chain(chain, partial)) { | 903 | if (err == -EAGAIN || !verify_chain(chain, partial)) { |
904 | while (partial > chain) { | 904 | while (partial > chain) { |
905 | brelse(partial->bh); | 905 | brelse(partial->bh); |
906 | partial--; | 906 | partial--; |
907 | } | 907 | } |
908 | partial = ext3_get_branch(inode, depth, offsets, chain, &err); | 908 | partial = ext3_get_branch(inode, depth, offsets, chain, &err); |
909 | if (!partial) { | 909 | if (!partial) { |
910 | count++; | 910 | count++; |
911 | mutex_unlock(&ei->truncate_mutex); | 911 | mutex_unlock(&ei->truncate_mutex); |
912 | if (err) | 912 | if (err) |
913 | goto cleanup; | 913 | goto cleanup; |
914 | clear_buffer_new(bh_result); | 914 | clear_buffer_new(bh_result); |
915 | goto got_it; | 915 | goto got_it; |
916 | } | 916 | } |
917 | } | 917 | } |
918 | 918 | ||
919 | /* | 919 | /* |
920 | * Okay, we need to do block allocation. Lazily initialize the block | 920 | * Okay, we need to do block allocation. Lazily initialize the block |
921 | * allocation info here if necessary | 921 | * allocation info here if necessary |
922 | */ | 922 | */ |
923 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) | 923 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) |
924 | ext3_init_block_alloc_info(inode); | 924 | ext3_init_block_alloc_info(inode); |
925 | 925 | ||
926 | goal = ext3_find_goal(inode, iblock, partial); | 926 | goal = ext3_find_goal(inode, iblock, partial); |
927 | 927 | ||
928 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 928 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
929 | indirect_blks = (chain + depth) - partial - 1; | 929 | indirect_blks = (chain + depth) - partial - 1; |
930 | 930 | ||
931 | /* | 931 | /* |
932 | * Next look up the indirect map to count the totoal number of | 932 | * Next look up the indirect map to count the totoal number of |
933 | * direct blocks to allocate for this branch. | 933 | * direct blocks to allocate for this branch. |
934 | */ | 934 | */ |
935 | count = ext3_blks_to_allocate(partial, indirect_blks, | 935 | count = ext3_blks_to_allocate(partial, indirect_blks, |
936 | maxblocks, blocks_to_boundary); | 936 | maxblocks, blocks_to_boundary); |
937 | /* | 937 | /* |
938 | * Block out ext3_truncate while we alter the tree | 938 | * Block out ext3_truncate while we alter the tree |
939 | */ | 939 | */ |
940 | err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, | 940 | err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, |
941 | offsets + (partial - chain), partial); | 941 | offsets + (partial - chain), partial); |
942 | 942 | ||
943 | /* | 943 | /* |
944 | * The ext3_splice_branch call will free and forget any buffers | 944 | * The ext3_splice_branch call will free and forget any buffers |
945 | * on the new chain if there is a failure, but that risks using | 945 | * on the new chain if there is a failure, but that risks using |
946 | * up transaction credits, especially for bitmaps where the | 946 | * up transaction credits, especially for bitmaps where the |
947 | * credits cannot be returned. Can we handle this somehow? We | 947 | * credits cannot be returned. Can we handle this somehow? We |
948 | * may need to return -EAGAIN upwards in the worst case. --sct | 948 | * may need to return -EAGAIN upwards in the worst case. --sct |
949 | */ | 949 | */ |
950 | if (!err) | 950 | if (!err) |
951 | err = ext3_splice_branch(handle, inode, iblock, | 951 | err = ext3_splice_branch(handle, inode, iblock, |
952 | partial, indirect_blks, count); | 952 | partial, indirect_blks, count); |
953 | mutex_unlock(&ei->truncate_mutex); | 953 | mutex_unlock(&ei->truncate_mutex); |
954 | if (err) | 954 | if (err) |
955 | goto cleanup; | 955 | goto cleanup; |
956 | 956 | ||
957 | set_buffer_new(bh_result); | 957 | set_buffer_new(bh_result); |
958 | got_it: | 958 | got_it: |
959 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 959 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); |
960 | if (count > blocks_to_boundary) | 960 | if (count > blocks_to_boundary) |
961 | set_buffer_boundary(bh_result); | 961 | set_buffer_boundary(bh_result); |
962 | err = count; | 962 | err = count; |
963 | /* Clean up and exit */ | 963 | /* Clean up and exit */ |
964 | partial = chain + depth - 1; /* the whole chain */ | 964 | partial = chain + depth - 1; /* the whole chain */ |
965 | cleanup: | 965 | cleanup: |
966 | while (partial > chain) { | 966 | while (partial > chain) { |
967 | BUFFER_TRACE(partial->bh, "call brelse"); | 967 | BUFFER_TRACE(partial->bh, "call brelse"); |
968 | brelse(partial->bh); | 968 | brelse(partial->bh); |
969 | partial--; | 969 | partial--; |
970 | } | 970 | } |
971 | BUFFER_TRACE(bh_result, "returned"); | 971 | BUFFER_TRACE(bh_result, "returned"); |
972 | out: | 972 | out: |
973 | return err; | 973 | return err; |
974 | } | 974 | } |
975 | 975 | ||
976 | /* Maximum number of blocks we map for direct IO at once. */ | 976 | /* Maximum number of blocks we map for direct IO at once. */ |
977 | #define DIO_MAX_BLOCKS 4096 | 977 | #define DIO_MAX_BLOCKS 4096 |
978 | /* | 978 | /* |
979 | * Number of credits we need for writing DIO_MAX_BLOCKS: | 979 | * Number of credits we need for writing DIO_MAX_BLOCKS: |
980 | * We need sb + group descriptor + bitmap + inode -> 4 | 980 | * We need sb + group descriptor + bitmap + inode -> 4 |
981 | * For B blocks with A block pointers per block we need: | 981 | * For B blocks with A block pointers per block we need: |
982 | * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). | 982 | * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). |
983 | * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. | 983 | * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. |
984 | */ | 984 | */ |
985 | #define DIO_CREDITS 25 | 985 | #define DIO_CREDITS 25 |
986 | 986 | ||
987 | static int ext3_get_block(struct inode *inode, sector_t iblock, | 987 | static int ext3_get_block(struct inode *inode, sector_t iblock, |
988 | struct buffer_head *bh_result, int create) | 988 | struct buffer_head *bh_result, int create) |
989 | { | 989 | { |
990 | handle_t *handle = ext3_journal_current_handle(); | 990 | handle_t *handle = ext3_journal_current_handle(); |
991 | int ret = 0, started = 0; | 991 | int ret = 0, started = 0; |
992 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 992 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
993 | 993 | ||
994 | if (create && !handle) { /* Direct IO write... */ | 994 | if (create && !handle) { /* Direct IO write... */ |
995 | if (max_blocks > DIO_MAX_BLOCKS) | 995 | if (max_blocks > DIO_MAX_BLOCKS) |
996 | max_blocks = DIO_MAX_BLOCKS; | 996 | max_blocks = DIO_MAX_BLOCKS; |
997 | handle = ext3_journal_start(inode, DIO_CREDITS + | 997 | handle = ext3_journal_start(inode, DIO_CREDITS + |
998 | EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); | 998 | EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); |
999 | if (IS_ERR(handle)) { | 999 | if (IS_ERR(handle)) { |
1000 | ret = PTR_ERR(handle); | 1000 | ret = PTR_ERR(handle); |
1001 | goto out; | 1001 | goto out; |
1002 | } | 1002 | } |
1003 | started = 1; | 1003 | started = 1; |
1004 | } | 1004 | } |
1005 | 1005 | ||
1006 | ret = ext3_get_blocks_handle(handle, inode, iblock, | 1006 | ret = ext3_get_blocks_handle(handle, inode, iblock, |
1007 | max_blocks, bh_result, create); | 1007 | max_blocks, bh_result, create); |
1008 | if (ret > 0) { | 1008 | if (ret > 0) { |
1009 | bh_result->b_size = (ret << inode->i_blkbits); | 1009 | bh_result->b_size = (ret << inode->i_blkbits); |
1010 | ret = 0; | 1010 | ret = 0; |
1011 | } | 1011 | } |
1012 | if (started) | 1012 | if (started) |
1013 | ext3_journal_stop(handle); | 1013 | ext3_journal_stop(handle); |
1014 | out: | 1014 | out: |
1015 | return ret; | 1015 | return ret; |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1018 | int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1019 | u64 start, u64 len) | 1019 | u64 start, u64 len) |
1020 | { | 1020 | { |
1021 | return generic_block_fiemap(inode, fieinfo, start, len, | 1021 | return generic_block_fiemap(inode, fieinfo, start, len, |
1022 | ext3_get_block); | 1022 | ext3_get_block); |
1023 | } | 1023 | } |
1024 | 1024 | ||
1025 | /* | 1025 | /* |
1026 | * `handle' can be NULL if create is zero | 1026 | * `handle' can be NULL if create is zero |
1027 | */ | 1027 | */ |
1028 | struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, | 1028 | struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, |
1029 | long block, int create, int *errp) | 1029 | long block, int create, int *errp) |
1030 | { | 1030 | { |
1031 | struct buffer_head dummy; | 1031 | struct buffer_head dummy; |
1032 | int fatal = 0, err; | 1032 | int fatal = 0, err; |
1033 | 1033 | ||
1034 | J_ASSERT(handle != NULL || create == 0); | 1034 | J_ASSERT(handle != NULL || create == 0); |
1035 | 1035 | ||
1036 | dummy.b_state = 0; | 1036 | dummy.b_state = 0; |
1037 | dummy.b_blocknr = -1000; | 1037 | dummy.b_blocknr = -1000; |
1038 | buffer_trace_init(&dummy.b_history); | 1038 | buffer_trace_init(&dummy.b_history); |
1039 | err = ext3_get_blocks_handle(handle, inode, block, 1, | 1039 | err = ext3_get_blocks_handle(handle, inode, block, 1, |
1040 | &dummy, create); | 1040 | &dummy, create); |
1041 | /* | 1041 | /* |
1042 | * ext3_get_blocks_handle() returns number of blocks | 1042 | * ext3_get_blocks_handle() returns number of blocks |
1043 | * mapped. 0 in case of a HOLE. | 1043 | * mapped. 0 in case of a HOLE. |
1044 | */ | 1044 | */ |
1045 | if (err > 0) { | 1045 | if (err > 0) { |
1046 | if (err > 1) | 1046 | if (err > 1) |
1047 | WARN_ON(1); | 1047 | WARN_ON(1); |
1048 | err = 0; | 1048 | err = 0; |
1049 | } | 1049 | } |
1050 | *errp = err; | 1050 | *errp = err; |
1051 | if (!err && buffer_mapped(&dummy)) { | 1051 | if (!err && buffer_mapped(&dummy)) { |
1052 | struct buffer_head *bh; | 1052 | struct buffer_head *bh; |
1053 | bh = sb_getblk(inode->i_sb, dummy.b_blocknr); | 1053 | bh = sb_getblk(inode->i_sb, dummy.b_blocknr); |
1054 | if (!bh) { | 1054 | if (!bh) { |
1055 | *errp = -EIO; | 1055 | *errp = -EIO; |
1056 | goto err; | 1056 | goto err; |
1057 | } | 1057 | } |
1058 | if (buffer_new(&dummy)) { | 1058 | if (buffer_new(&dummy)) { |
1059 | J_ASSERT(create != 0); | 1059 | J_ASSERT(create != 0); |
1060 | J_ASSERT(handle != NULL); | 1060 | J_ASSERT(handle != NULL); |
1061 | 1061 | ||
1062 | /* | 1062 | /* |
1063 | * Now that we do not always journal data, we should | 1063 | * Now that we do not always journal data, we should |
1064 | * keep in mind whether this should always journal the | 1064 | * keep in mind whether this should always journal the |
1065 | * new buffer as metadata. For now, regular file | 1065 | * new buffer as metadata. For now, regular file |
1066 | * writes use ext3_get_block instead, so it's not a | 1066 | * writes use ext3_get_block instead, so it's not a |
1067 | * problem. | 1067 | * problem. |
1068 | */ | 1068 | */ |
1069 | lock_buffer(bh); | 1069 | lock_buffer(bh); |
1070 | BUFFER_TRACE(bh, "call get_create_access"); | 1070 | BUFFER_TRACE(bh, "call get_create_access"); |
1071 | fatal = ext3_journal_get_create_access(handle, bh); | 1071 | fatal = ext3_journal_get_create_access(handle, bh); |
1072 | if (!fatal && !buffer_uptodate(bh)) { | 1072 | if (!fatal && !buffer_uptodate(bh)) { |
1073 | memset(bh->b_data,0,inode->i_sb->s_blocksize); | 1073 | memset(bh->b_data,0,inode->i_sb->s_blocksize); |
1074 | set_buffer_uptodate(bh); | 1074 | set_buffer_uptodate(bh); |
1075 | } | 1075 | } |
1076 | unlock_buffer(bh); | 1076 | unlock_buffer(bh); |
1077 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); | 1077 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); |
1078 | err = ext3_journal_dirty_metadata(handle, bh); | 1078 | err = ext3_journal_dirty_metadata(handle, bh); |
1079 | if (!fatal) | 1079 | if (!fatal) |
1080 | fatal = err; | 1080 | fatal = err; |
1081 | } else { | 1081 | } else { |
1082 | BUFFER_TRACE(bh, "not a new buffer"); | 1082 | BUFFER_TRACE(bh, "not a new buffer"); |
1083 | } | 1083 | } |
1084 | if (fatal) { | 1084 | if (fatal) { |
1085 | *errp = fatal; | 1085 | *errp = fatal; |
1086 | brelse(bh); | 1086 | brelse(bh); |
1087 | bh = NULL; | 1087 | bh = NULL; |
1088 | } | 1088 | } |
1089 | return bh; | 1089 | return bh; |
1090 | } | 1090 | } |
1091 | err: | 1091 | err: |
1092 | return NULL; | 1092 | return NULL; |
1093 | } | 1093 | } |
1094 | 1094 | ||
1095 | struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, | 1095 | struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, |
1096 | int block, int create, int *err) | 1096 | int block, int create, int *err) |
1097 | { | 1097 | { |
1098 | struct buffer_head * bh; | 1098 | struct buffer_head * bh; |
1099 | 1099 | ||
1100 | bh = ext3_getblk(handle, inode, block, create, err); | 1100 | bh = ext3_getblk(handle, inode, block, create, err); |
1101 | if (!bh) | 1101 | if (!bh) |
1102 | return bh; | 1102 | return bh; |
1103 | if (buffer_uptodate(bh)) | 1103 | if (buffer_uptodate(bh)) |
1104 | return bh; | 1104 | return bh; |
1105 | ll_rw_block(READ_META, 1, &bh); | 1105 | ll_rw_block(READ_META, 1, &bh); |
1106 | wait_on_buffer(bh); | 1106 | wait_on_buffer(bh); |
1107 | if (buffer_uptodate(bh)) | 1107 | if (buffer_uptodate(bh)) |
1108 | return bh; | 1108 | return bh; |
1109 | put_bh(bh); | 1109 | put_bh(bh); |
1110 | *err = -EIO; | 1110 | *err = -EIO; |
1111 | return NULL; | 1111 | return NULL; |
1112 | } | 1112 | } |
1113 | 1113 | ||
1114 | static int walk_page_buffers( handle_t *handle, | 1114 | static int walk_page_buffers( handle_t *handle, |
1115 | struct buffer_head *head, | 1115 | struct buffer_head *head, |
1116 | unsigned from, | 1116 | unsigned from, |
1117 | unsigned to, | 1117 | unsigned to, |
1118 | int *partial, | 1118 | int *partial, |
1119 | int (*fn)( handle_t *handle, | 1119 | int (*fn)( handle_t *handle, |
1120 | struct buffer_head *bh)) | 1120 | struct buffer_head *bh)) |
1121 | { | 1121 | { |
1122 | struct buffer_head *bh; | 1122 | struct buffer_head *bh; |
1123 | unsigned block_start, block_end; | 1123 | unsigned block_start, block_end; |
1124 | unsigned blocksize = head->b_size; | 1124 | unsigned blocksize = head->b_size; |
1125 | int err, ret = 0; | 1125 | int err, ret = 0; |
1126 | struct buffer_head *next; | 1126 | struct buffer_head *next; |
1127 | 1127 | ||
1128 | for ( bh = head, block_start = 0; | 1128 | for ( bh = head, block_start = 0; |
1129 | ret == 0 && (bh != head || !block_start); | 1129 | ret == 0 && (bh != head || !block_start); |
1130 | block_start = block_end, bh = next) | 1130 | block_start = block_end, bh = next) |
1131 | { | 1131 | { |
1132 | next = bh->b_this_page; | 1132 | next = bh->b_this_page; |
1133 | block_end = block_start + blocksize; | 1133 | block_end = block_start + blocksize; |
1134 | if (block_end <= from || block_start >= to) { | 1134 | if (block_end <= from || block_start >= to) { |
1135 | if (partial && !buffer_uptodate(bh)) | 1135 | if (partial && !buffer_uptodate(bh)) |
1136 | *partial = 1; | 1136 | *partial = 1; |
1137 | continue; | 1137 | continue; |
1138 | } | 1138 | } |
1139 | err = (*fn)(handle, bh); | 1139 | err = (*fn)(handle, bh); |
1140 | if (!ret) | 1140 | if (!ret) |
1141 | ret = err; | 1141 | ret = err; |
1142 | } | 1142 | } |
1143 | return ret; | 1143 | return ret; |
1144 | } | 1144 | } |
1145 | 1145 | ||
1146 | /* | 1146 | /* |
1147 | * To preserve ordering, it is essential that the hole instantiation and | 1147 | * To preserve ordering, it is essential that the hole instantiation and |
1148 | * the data write be encapsulated in a single transaction. We cannot | 1148 | * the data write be encapsulated in a single transaction. We cannot |
1149 | * close off a transaction and start a new one between the ext3_get_block() | 1149 | * close off a transaction and start a new one between the ext3_get_block() |
1150 | * and the commit_write(). So doing the journal_start at the start of | 1150 | * and the commit_write(). So doing the journal_start at the start of |
1151 | * prepare_write() is the right place. | 1151 | * prepare_write() is the right place. |
1152 | * | 1152 | * |
1153 | * Also, this function can nest inside ext3_writepage() -> | 1153 | * Also, this function can nest inside ext3_writepage() -> |
1154 | * block_write_full_page(). In that case, we *know* that ext3_writepage() | 1154 | * block_write_full_page(). In that case, we *know* that ext3_writepage() |
1155 | * has generated enough buffer credits to do the whole page. So we won't | 1155 | * has generated enough buffer credits to do the whole page. So we won't |
1156 | * block on the journal in that case, which is good, because the caller may | 1156 | * block on the journal in that case, which is good, because the caller may |
1157 | * be PF_MEMALLOC. | 1157 | * be PF_MEMALLOC. |
1158 | * | 1158 | * |
1159 | * By accident, ext3 can be reentered when a transaction is open via | 1159 | * By accident, ext3 can be reentered when a transaction is open via |
1160 | * quota file writes. If we were to commit the transaction while thus | 1160 | * quota file writes. If we were to commit the transaction while thus |
1161 | * reentered, there can be a deadlock - we would be holding a quota | 1161 | * reentered, there can be a deadlock - we would be holding a quota |
1162 | * lock, and the commit would never complete if another thread had a | 1162 | * lock, and the commit would never complete if another thread had a |
1163 | * transaction open and was blocking on the quota lock - a ranking | 1163 | * transaction open and was blocking on the quota lock - a ranking |
1164 | * violation. | 1164 | * violation. |
1165 | * | 1165 | * |
1166 | * So what we do is to rely on the fact that journal_stop/journal_start | 1166 | * So what we do is to rely on the fact that journal_stop/journal_start |
1167 | * will _not_ run commit under these circumstances because handle->h_ref | 1167 | * will _not_ run commit under these circumstances because handle->h_ref |
1168 | * is elevated. We'll still have enough credits for the tiny quotafile | 1168 | * is elevated. We'll still have enough credits for the tiny quotafile |
1169 | * write. | 1169 | * write. |
1170 | */ | 1170 | */ |
1171 | static int do_journal_get_write_access(handle_t *handle, | 1171 | static int do_journal_get_write_access(handle_t *handle, |
1172 | struct buffer_head *bh) | 1172 | struct buffer_head *bh) |
1173 | { | 1173 | { |
1174 | int dirty = buffer_dirty(bh); | 1174 | int dirty = buffer_dirty(bh); |
1175 | int ret; | 1175 | int ret; |
1176 | 1176 | ||
1177 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1177 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1178 | return 0; | 1178 | return 0; |
1179 | /* | 1179 | /* |
1180 | * __block_prepare_write() could have dirtied some buffers. Clean | 1180 | * __block_prepare_write() could have dirtied some buffers. Clean |
1181 | * the dirty bit as jbd2_journal_get_write_access() could complain | 1181 | * the dirty bit as jbd2_journal_get_write_access() could complain |
1182 | * otherwise about fs integrity issues. Setting of the dirty bit | 1182 | * otherwise about fs integrity issues. Setting of the dirty bit |
1183 | * by __block_prepare_write() isn't a real problem here as we clear | 1183 | * by __block_prepare_write() isn't a real problem here as we clear |
1184 | * the bit before releasing a page lock and thus writeback cannot | 1184 | * the bit before releasing a page lock and thus writeback cannot |
1185 | * ever write the buffer. | 1185 | * ever write the buffer. |
1186 | */ | 1186 | */ |
1187 | if (dirty) | 1187 | if (dirty) |
1188 | clear_buffer_dirty(bh); | 1188 | clear_buffer_dirty(bh); |
1189 | ret = ext3_journal_get_write_access(handle, bh); | 1189 | ret = ext3_journal_get_write_access(handle, bh); |
1190 | if (!ret && dirty) | 1190 | if (!ret && dirty) |
1191 | ret = ext3_journal_dirty_metadata(handle, bh); | 1191 | ret = ext3_journal_dirty_metadata(handle, bh); |
1192 | return ret; | 1192 | return ret; |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | /* | 1195 | /* |
1196 | * Truncate blocks that were not used by write. We have to truncate the | 1196 | * Truncate blocks that were not used by write. We have to truncate the |
1197 | * pagecache as well so that corresponding buffers get properly unmapped. | 1197 | * pagecache as well so that corresponding buffers get properly unmapped. |
1198 | */ | 1198 | */ |
1199 | static void ext3_truncate_failed_write(struct inode *inode) | 1199 | static void ext3_truncate_failed_write(struct inode *inode) |
1200 | { | 1200 | { |
1201 | truncate_inode_pages(inode->i_mapping, inode->i_size); | 1201 | truncate_inode_pages(inode->i_mapping, inode->i_size); |
1202 | ext3_truncate(inode); | 1202 | ext3_truncate(inode); |
1203 | } | 1203 | } |
1204 | 1204 | ||
1205 | static int ext3_write_begin(struct file *file, struct address_space *mapping, | 1205 | static int ext3_write_begin(struct file *file, struct address_space *mapping, |
1206 | loff_t pos, unsigned len, unsigned flags, | 1206 | loff_t pos, unsigned len, unsigned flags, |
1207 | struct page **pagep, void **fsdata) | 1207 | struct page **pagep, void **fsdata) |
1208 | { | 1208 | { |
1209 | struct inode *inode = mapping->host; | 1209 | struct inode *inode = mapping->host; |
1210 | int ret; | 1210 | int ret; |
1211 | handle_t *handle; | 1211 | handle_t *handle; |
1212 | int retries = 0; | 1212 | int retries = 0; |
1213 | struct page *page; | 1213 | struct page *page; |
1214 | pgoff_t index; | 1214 | pgoff_t index; |
1215 | unsigned from, to; | 1215 | unsigned from, to; |
1216 | /* Reserve one block more for addition to orphan list in case | 1216 | /* Reserve one block more for addition to orphan list in case |
1217 | * we allocate blocks but write fails for some reason */ | 1217 | * we allocate blocks but write fails for some reason */ |
1218 | int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; | 1218 | int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; |
1219 | 1219 | ||
1220 | index = pos >> PAGE_CACHE_SHIFT; | 1220 | index = pos >> PAGE_CACHE_SHIFT; |
1221 | from = pos & (PAGE_CACHE_SIZE - 1); | 1221 | from = pos & (PAGE_CACHE_SIZE - 1); |
1222 | to = from + len; | 1222 | to = from + len; |
1223 | 1223 | ||
1224 | retry: | 1224 | retry: |
1225 | page = grab_cache_page_write_begin(mapping, index, flags); | 1225 | page = grab_cache_page_write_begin(mapping, index, flags); |
1226 | if (!page) | 1226 | if (!page) |
1227 | return -ENOMEM; | 1227 | return -ENOMEM; |
1228 | *pagep = page; | 1228 | *pagep = page; |
1229 | 1229 | ||
1230 | handle = ext3_journal_start(inode, needed_blocks); | 1230 | handle = ext3_journal_start(inode, needed_blocks); |
1231 | if (IS_ERR(handle)) { | 1231 | if (IS_ERR(handle)) { |
1232 | unlock_page(page); | 1232 | unlock_page(page); |
1233 | page_cache_release(page); | 1233 | page_cache_release(page); |
1234 | ret = PTR_ERR(handle); | 1234 | ret = PTR_ERR(handle); |
1235 | goto out; | 1235 | goto out; |
1236 | } | 1236 | } |
1237 | ret = __block_write_begin(page, pos, len, ext3_get_block); | 1237 | ret = __block_write_begin(page, pos, len, ext3_get_block); |
1238 | if (ret) | 1238 | if (ret) |
1239 | goto write_begin_failed; | 1239 | goto write_begin_failed; |
1240 | 1240 | ||
1241 | if (ext3_should_journal_data(inode)) { | 1241 | if (ext3_should_journal_data(inode)) { |
1242 | ret = walk_page_buffers(handle, page_buffers(page), | 1242 | ret = walk_page_buffers(handle, page_buffers(page), |
1243 | from, to, NULL, do_journal_get_write_access); | 1243 | from, to, NULL, do_journal_get_write_access); |
1244 | } | 1244 | } |
1245 | write_begin_failed: | 1245 | write_begin_failed: |
1246 | if (ret) { | 1246 | if (ret) { |
1247 | /* | 1247 | /* |
1248 | * block_write_begin may have instantiated a few blocks | 1248 | * block_write_begin may have instantiated a few blocks |
1249 | * outside i_size. Trim these off again. Don't need | 1249 | * outside i_size. Trim these off again. Don't need |
1250 | * i_size_read because we hold i_mutex. | 1250 | * i_size_read because we hold i_mutex. |
1251 | * | 1251 | * |
1252 | * Add inode to orphan list in case we crash before truncate | 1252 | * Add inode to orphan list in case we crash before truncate |
1253 | * finishes. Do this only if ext3_can_truncate() agrees so | 1253 | * finishes. Do this only if ext3_can_truncate() agrees so |
1254 | * that orphan processing code is happy. | 1254 | * that orphan processing code is happy. |
1255 | */ | 1255 | */ |
1256 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) | 1256 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1257 | ext3_orphan_add(handle, inode); | 1257 | ext3_orphan_add(handle, inode); |
1258 | ext3_journal_stop(handle); | 1258 | ext3_journal_stop(handle); |
1259 | unlock_page(page); | 1259 | unlock_page(page); |
1260 | page_cache_release(page); | 1260 | page_cache_release(page); |
1261 | if (pos + len > inode->i_size) | 1261 | if (pos + len > inode->i_size) |
1262 | ext3_truncate_failed_write(inode); | 1262 | ext3_truncate_failed_write(inode); |
1263 | } | 1263 | } |
1264 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1264 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
1265 | goto retry; | 1265 | goto retry; |
1266 | out: | 1266 | out: |
1267 | return ret; | 1267 | return ret; |
1268 | } | 1268 | } |
1269 | 1269 | ||
1270 | 1270 | ||
1271 | int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | 1271 | int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) |
1272 | { | 1272 | { |
1273 | int err = journal_dirty_data(handle, bh); | 1273 | int err = journal_dirty_data(handle, bh); |
1274 | if (err) | 1274 | if (err) |
1275 | ext3_journal_abort_handle(__func__, __func__, | 1275 | ext3_journal_abort_handle(__func__, __func__, |
1276 | bh, handle, err); | 1276 | bh, handle, err); |
1277 | return err; | 1277 | return err; |
1278 | } | 1278 | } |
1279 | 1279 | ||
1280 | /* For ordered writepage and write_end functions */ | 1280 | /* For ordered writepage and write_end functions */ |
1281 | static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | 1281 | static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) |
1282 | { | 1282 | { |
1283 | /* | 1283 | /* |
1284 | * Write could have mapped the buffer but it didn't copy the data in | 1284 | * Write could have mapped the buffer but it didn't copy the data in |
1285 | * yet. So avoid filing such buffer into a transaction. | 1285 | * yet. So avoid filing such buffer into a transaction. |
1286 | */ | 1286 | */ |
1287 | if (buffer_mapped(bh) && buffer_uptodate(bh)) | 1287 | if (buffer_mapped(bh) && buffer_uptodate(bh)) |
1288 | return ext3_journal_dirty_data(handle, bh); | 1288 | return ext3_journal_dirty_data(handle, bh); |
1289 | return 0; | 1289 | return 0; |
1290 | } | 1290 | } |
1291 | 1291 | ||
1292 | /* For write_end() in data=journal mode */ | 1292 | /* For write_end() in data=journal mode */ |
1293 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) | 1293 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) |
1294 | { | 1294 | { |
1295 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1295 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1296 | return 0; | 1296 | return 0; |
1297 | set_buffer_uptodate(bh); | 1297 | set_buffer_uptodate(bh); |
1298 | return ext3_journal_dirty_metadata(handle, bh); | 1298 | return ext3_journal_dirty_metadata(handle, bh); |
1299 | } | 1299 | } |
1300 | 1300 | ||
1301 | /* | 1301 | /* |
1302 | * This is nasty and subtle: ext3_write_begin() could have allocated blocks | 1302 | * This is nasty and subtle: ext3_write_begin() could have allocated blocks |
1303 | * for the whole page but later we failed to copy the data in. Update inode | 1303 | * for the whole page but later we failed to copy the data in. Update inode |
1304 | * size according to what we managed to copy. The rest is going to be | 1304 | * size according to what we managed to copy. The rest is going to be |
1305 | * truncated in write_end function. | 1305 | * truncated in write_end function. |
1306 | */ | 1306 | */ |
1307 | static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied) | 1307 | static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied) |
1308 | { | 1308 | { |
1309 | /* What matters to us is i_disksize. We don't write i_size anywhere */ | 1309 | /* What matters to us is i_disksize. We don't write i_size anywhere */ |
1310 | if (pos + copied > inode->i_size) | 1310 | if (pos + copied > inode->i_size) |
1311 | i_size_write(inode, pos + copied); | 1311 | i_size_write(inode, pos + copied); |
1312 | if (pos + copied > EXT3_I(inode)->i_disksize) { | 1312 | if (pos + copied > EXT3_I(inode)->i_disksize) { |
1313 | EXT3_I(inode)->i_disksize = pos + copied; | 1313 | EXT3_I(inode)->i_disksize = pos + copied; |
1314 | mark_inode_dirty(inode); | 1314 | mark_inode_dirty(inode); |
1315 | } | 1315 | } |
1316 | } | 1316 | } |
1317 | 1317 | ||
1318 | /* | 1318 | /* |
1319 | * We need to pick up the new inode size which generic_commit_write gave us | 1319 | * We need to pick up the new inode size which generic_commit_write gave us |
1320 | * `file' can be NULL - eg, when called from page_symlink(). | 1320 | * `file' can be NULL - eg, when called from page_symlink(). |
1321 | * | 1321 | * |
1322 | * ext3 never places buffers on inode->i_mapping->private_list. metadata | 1322 | * ext3 never places buffers on inode->i_mapping->private_list. metadata |
1323 | * buffers are managed internally. | 1323 | * buffers are managed internally. |
1324 | */ | 1324 | */ |
1325 | static int ext3_ordered_write_end(struct file *file, | 1325 | static int ext3_ordered_write_end(struct file *file, |
1326 | struct address_space *mapping, | 1326 | struct address_space *mapping, |
1327 | loff_t pos, unsigned len, unsigned copied, | 1327 | loff_t pos, unsigned len, unsigned copied, |
1328 | struct page *page, void *fsdata) | 1328 | struct page *page, void *fsdata) |
1329 | { | 1329 | { |
1330 | handle_t *handle = ext3_journal_current_handle(); | 1330 | handle_t *handle = ext3_journal_current_handle(); |
1331 | struct inode *inode = file->f_mapping->host; | 1331 | struct inode *inode = file->f_mapping->host; |
1332 | unsigned from, to; | 1332 | unsigned from, to; |
1333 | int ret = 0, ret2; | 1333 | int ret = 0, ret2; |
1334 | 1334 | ||
1335 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1335 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1336 | 1336 | ||
1337 | from = pos & (PAGE_CACHE_SIZE - 1); | 1337 | from = pos & (PAGE_CACHE_SIZE - 1); |
1338 | to = from + copied; | 1338 | to = from + copied; |
1339 | ret = walk_page_buffers(handle, page_buffers(page), | 1339 | ret = walk_page_buffers(handle, page_buffers(page), |
1340 | from, to, NULL, journal_dirty_data_fn); | 1340 | from, to, NULL, journal_dirty_data_fn); |
1341 | 1341 | ||
1342 | if (ret == 0) | 1342 | if (ret == 0) |
1343 | update_file_sizes(inode, pos, copied); | 1343 | update_file_sizes(inode, pos, copied); |
1344 | /* | 1344 | /* |
1345 | * There may be allocated blocks outside of i_size because | 1345 | * There may be allocated blocks outside of i_size because |
1346 | * we failed to copy some data. Prepare for truncate. | 1346 | * we failed to copy some data. Prepare for truncate. |
1347 | */ | 1347 | */ |
1348 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) | 1348 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1349 | ext3_orphan_add(handle, inode); | 1349 | ext3_orphan_add(handle, inode); |
1350 | ret2 = ext3_journal_stop(handle); | 1350 | ret2 = ext3_journal_stop(handle); |
1351 | if (!ret) | 1351 | if (!ret) |
1352 | ret = ret2; | 1352 | ret = ret2; |
1353 | unlock_page(page); | 1353 | unlock_page(page); |
1354 | page_cache_release(page); | 1354 | page_cache_release(page); |
1355 | 1355 | ||
1356 | if (pos + len > inode->i_size) | 1356 | if (pos + len > inode->i_size) |
1357 | ext3_truncate_failed_write(inode); | 1357 | ext3_truncate_failed_write(inode); |
1358 | return ret ? ret : copied; | 1358 | return ret ? ret : copied; |
1359 | } | 1359 | } |
1360 | 1360 | ||
1361 | static int ext3_writeback_write_end(struct file *file, | 1361 | static int ext3_writeback_write_end(struct file *file, |
1362 | struct address_space *mapping, | 1362 | struct address_space *mapping, |
1363 | loff_t pos, unsigned len, unsigned copied, | 1363 | loff_t pos, unsigned len, unsigned copied, |
1364 | struct page *page, void *fsdata) | 1364 | struct page *page, void *fsdata) |
1365 | { | 1365 | { |
1366 | handle_t *handle = ext3_journal_current_handle(); | 1366 | handle_t *handle = ext3_journal_current_handle(); |
1367 | struct inode *inode = file->f_mapping->host; | 1367 | struct inode *inode = file->f_mapping->host; |
1368 | int ret; | 1368 | int ret; |
1369 | 1369 | ||
1370 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1370 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1371 | update_file_sizes(inode, pos, copied); | 1371 | update_file_sizes(inode, pos, copied); |
1372 | /* | 1372 | /* |
1373 | * There may be allocated blocks outside of i_size because | 1373 | * There may be allocated blocks outside of i_size because |
1374 | * we failed to copy some data. Prepare for truncate. | 1374 | * we failed to copy some data. Prepare for truncate. |
1375 | */ | 1375 | */ |
1376 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) | 1376 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1377 | ext3_orphan_add(handle, inode); | 1377 | ext3_orphan_add(handle, inode); |
1378 | ret = ext3_journal_stop(handle); | 1378 | ret = ext3_journal_stop(handle); |
1379 | unlock_page(page); | 1379 | unlock_page(page); |
1380 | page_cache_release(page); | 1380 | page_cache_release(page); |
1381 | 1381 | ||
1382 | if (pos + len > inode->i_size) | 1382 | if (pos + len > inode->i_size) |
1383 | ext3_truncate_failed_write(inode); | 1383 | ext3_truncate_failed_write(inode); |
1384 | return ret ? ret : copied; | 1384 | return ret ? ret : copied; |
1385 | } | 1385 | } |
1386 | 1386 | ||
1387 | static int ext3_journalled_write_end(struct file *file, | 1387 | static int ext3_journalled_write_end(struct file *file, |
1388 | struct address_space *mapping, | 1388 | struct address_space *mapping, |
1389 | loff_t pos, unsigned len, unsigned copied, | 1389 | loff_t pos, unsigned len, unsigned copied, |
1390 | struct page *page, void *fsdata) | 1390 | struct page *page, void *fsdata) |
1391 | { | 1391 | { |
1392 | handle_t *handle = ext3_journal_current_handle(); | 1392 | handle_t *handle = ext3_journal_current_handle(); |
1393 | struct inode *inode = mapping->host; | 1393 | struct inode *inode = mapping->host; |
1394 | int ret = 0, ret2; | 1394 | int ret = 0, ret2; |
1395 | int partial = 0; | 1395 | int partial = 0; |
1396 | unsigned from, to; | 1396 | unsigned from, to; |
1397 | 1397 | ||
1398 | from = pos & (PAGE_CACHE_SIZE - 1); | 1398 | from = pos & (PAGE_CACHE_SIZE - 1); |
1399 | to = from + len; | 1399 | to = from + len; |
1400 | 1400 | ||
1401 | if (copied < len) { | 1401 | if (copied < len) { |
1402 | if (!PageUptodate(page)) | 1402 | if (!PageUptodate(page)) |
1403 | copied = 0; | 1403 | copied = 0; |
1404 | page_zero_new_buffers(page, from + copied, to); | 1404 | page_zero_new_buffers(page, from + copied, to); |
1405 | to = from + copied; | 1405 | to = from + copied; |
1406 | } | 1406 | } |
1407 | 1407 | ||
1408 | ret = walk_page_buffers(handle, page_buffers(page), from, | 1408 | ret = walk_page_buffers(handle, page_buffers(page), from, |
1409 | to, &partial, write_end_fn); | 1409 | to, &partial, write_end_fn); |
1410 | if (!partial) | 1410 | if (!partial) |
1411 | SetPageUptodate(page); | 1411 | SetPageUptodate(page); |
1412 | 1412 | ||
1413 | if (pos + copied > inode->i_size) | 1413 | if (pos + copied > inode->i_size) |
1414 | i_size_write(inode, pos + copied); | 1414 | i_size_write(inode, pos + copied); |
1415 | /* | 1415 | /* |
1416 | * There may be allocated blocks outside of i_size because | 1416 | * There may be allocated blocks outside of i_size because |
1417 | * we failed to copy some data. Prepare for truncate. | 1417 | * we failed to copy some data. Prepare for truncate. |
1418 | */ | 1418 | */ |
1419 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) | 1419 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1420 | ext3_orphan_add(handle, inode); | 1420 | ext3_orphan_add(handle, inode); |
1421 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); | 1421 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); |
1422 | if (inode->i_size > EXT3_I(inode)->i_disksize) { | 1422 | if (inode->i_size > EXT3_I(inode)->i_disksize) { |
1423 | EXT3_I(inode)->i_disksize = inode->i_size; | 1423 | EXT3_I(inode)->i_disksize = inode->i_size; |
1424 | ret2 = ext3_mark_inode_dirty(handle, inode); | 1424 | ret2 = ext3_mark_inode_dirty(handle, inode); |
1425 | if (!ret) | 1425 | if (!ret) |
1426 | ret = ret2; | 1426 | ret = ret2; |
1427 | } | 1427 | } |
1428 | 1428 | ||
1429 | ret2 = ext3_journal_stop(handle); | 1429 | ret2 = ext3_journal_stop(handle); |
1430 | if (!ret) | 1430 | if (!ret) |
1431 | ret = ret2; | 1431 | ret = ret2; |
1432 | unlock_page(page); | 1432 | unlock_page(page); |
1433 | page_cache_release(page); | 1433 | page_cache_release(page); |
1434 | 1434 | ||
1435 | if (pos + len > inode->i_size) | 1435 | if (pos + len > inode->i_size) |
1436 | ext3_truncate_failed_write(inode); | 1436 | ext3_truncate_failed_write(inode); |
1437 | return ret ? ret : copied; | 1437 | return ret ? ret : copied; |
1438 | } | 1438 | } |
1439 | 1439 | ||
1440 | /* | 1440 | /* |
1441 | * bmap() is special. It gets used by applications such as lilo and by | 1441 | * bmap() is special. It gets used by applications such as lilo and by |
1442 | * the swapper to find the on-disk block of a specific piece of data. | 1442 | * the swapper to find the on-disk block of a specific piece of data. |
1443 | * | 1443 | * |
1444 | * Naturally, this is dangerous if the block concerned is still in the | 1444 | * Naturally, this is dangerous if the block concerned is still in the |
1445 | * journal. If somebody makes a swapfile on an ext3 data-journaling | 1445 | * journal. If somebody makes a swapfile on an ext3 data-journaling |
1446 | * filesystem and enables swap, then they may get a nasty shock when the | 1446 | * filesystem and enables swap, then they may get a nasty shock when the |
1447 | * data getting swapped to that swapfile suddenly gets overwritten by | 1447 | * data getting swapped to that swapfile suddenly gets overwritten by |
1448 | * the original zero's written out previously to the journal and | 1448 | * the original zero's written out previously to the journal and |
1449 | * awaiting writeback in the kernel's buffer cache. | 1449 | * awaiting writeback in the kernel's buffer cache. |
1450 | * | 1450 | * |
1451 | * So, if we see any bmap calls here on a modified, data-journaled file, | 1451 | * So, if we see any bmap calls here on a modified, data-journaled file, |
1452 | * take extra steps to flush any blocks which might be in the cache. | 1452 | * take extra steps to flush any blocks which might be in the cache. |
1453 | */ | 1453 | */ |
1454 | static sector_t ext3_bmap(struct address_space *mapping, sector_t block) | 1454 | static sector_t ext3_bmap(struct address_space *mapping, sector_t block) |
1455 | { | 1455 | { |
1456 | struct inode *inode = mapping->host; | 1456 | struct inode *inode = mapping->host; |
1457 | journal_t *journal; | 1457 | journal_t *journal; |
1458 | int err; | 1458 | int err; |
1459 | 1459 | ||
1460 | if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) { | 1460 | if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) { |
1461 | /* | 1461 | /* |
1462 | * This is a REALLY heavyweight approach, but the use of | 1462 | * This is a REALLY heavyweight approach, but the use of |
1463 | * bmap on dirty files is expected to be extremely rare: | 1463 | * bmap on dirty files is expected to be extremely rare: |
1464 | * only if we run lilo or swapon on a freshly made file | 1464 | * only if we run lilo or swapon on a freshly made file |
1465 | * do we expect this to happen. | 1465 | * do we expect this to happen. |
1466 | * | 1466 | * |
1467 | * (bmap requires CAP_SYS_RAWIO so this does not | 1467 | * (bmap requires CAP_SYS_RAWIO so this does not |
1468 | * represent an unprivileged user DOS attack --- we'd be | 1468 | * represent an unprivileged user DOS attack --- we'd be |
1469 | * in trouble if mortal users could trigger this path at | 1469 | * in trouble if mortal users could trigger this path at |
1470 | * will.) | 1470 | * will.) |
1471 | * | 1471 | * |
1472 | * NB. EXT3_STATE_JDATA is not set on files other than | 1472 | * NB. EXT3_STATE_JDATA is not set on files other than |
1473 | * regular files. If somebody wants to bmap a directory | 1473 | * regular files. If somebody wants to bmap a directory |
1474 | * or symlink and gets confused because the buffer | 1474 | * or symlink and gets confused because the buffer |
1475 | * hasn't yet been flushed to disk, they deserve | 1475 | * hasn't yet been flushed to disk, they deserve |
1476 | * everything they get. | 1476 | * everything they get. |
1477 | */ | 1477 | */ |
1478 | 1478 | ||
1479 | ext3_clear_inode_state(inode, EXT3_STATE_JDATA); | 1479 | ext3_clear_inode_state(inode, EXT3_STATE_JDATA); |
1480 | journal = EXT3_JOURNAL(inode); | 1480 | journal = EXT3_JOURNAL(inode); |
1481 | journal_lock_updates(journal); | 1481 | journal_lock_updates(journal); |
1482 | err = journal_flush(journal); | 1482 | err = journal_flush(journal); |
1483 | journal_unlock_updates(journal); | 1483 | journal_unlock_updates(journal); |
1484 | 1484 | ||
1485 | if (err) | 1485 | if (err) |
1486 | return 0; | 1486 | return 0; |
1487 | } | 1487 | } |
1488 | 1488 | ||
1489 | return generic_block_bmap(mapping,block,ext3_get_block); | 1489 | return generic_block_bmap(mapping,block,ext3_get_block); |
1490 | } | 1490 | } |
1491 | 1491 | ||
1492 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 1492 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
1493 | { | 1493 | { |
1494 | get_bh(bh); | 1494 | get_bh(bh); |
1495 | return 0; | 1495 | return 0; |
1496 | } | 1496 | } |
1497 | 1497 | ||
1498 | static int bput_one(handle_t *handle, struct buffer_head *bh) | 1498 | static int bput_one(handle_t *handle, struct buffer_head *bh) |
1499 | { | 1499 | { |
1500 | put_bh(bh); | 1500 | put_bh(bh); |
1501 | return 0; | 1501 | return 0; |
1502 | } | 1502 | } |
1503 | 1503 | ||
1504 | static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) | 1504 | static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) |
1505 | { | 1505 | { |
1506 | return !buffer_mapped(bh); | 1506 | return !buffer_mapped(bh); |
1507 | } | 1507 | } |
1508 | 1508 | ||
1509 | /* | 1509 | /* |
1510 | * Note that we always start a transaction even if we're not journalling | 1510 | * Note that we always start a transaction even if we're not journalling |
1511 | * data. This is to preserve ordering: any hole instantiation within | 1511 | * data. This is to preserve ordering: any hole instantiation within |
1512 | * __block_write_full_page -> ext3_get_block() should be journalled | 1512 | * __block_write_full_page -> ext3_get_block() should be journalled |
1513 | * along with the data so we don't crash and then get metadata which | 1513 | * along with the data so we don't crash and then get metadata which |
1514 | * refers to old data. | 1514 | * refers to old data. |
1515 | * | 1515 | * |
1516 | * In all journalling modes block_write_full_page() will start the I/O. | 1516 | * In all journalling modes block_write_full_page() will start the I/O. |
1517 | * | 1517 | * |
1518 | * Problem: | 1518 | * Problem: |
1519 | * | 1519 | * |
1520 | * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> | 1520 | * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> |
1521 | * ext3_writepage() | 1521 | * ext3_writepage() |
1522 | * | 1522 | * |
1523 | * Similar for: | 1523 | * Similar for: |
1524 | * | 1524 | * |
1525 | * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ... | 1525 | * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ... |
1526 | * | 1526 | * |
1527 | * Same applies to ext3_get_block(). We will deadlock on various things like | 1527 | * Same applies to ext3_get_block(). We will deadlock on various things like |
1528 | * lock_journal and i_truncate_mutex. | 1528 | * lock_journal and i_truncate_mutex. |
1529 | * | 1529 | * |
1530 | * Setting PF_MEMALLOC here doesn't work - too many internal memory | 1530 | * Setting PF_MEMALLOC here doesn't work - too many internal memory |
1531 | * allocations fail. | 1531 | * allocations fail. |
1532 | * | 1532 | * |
1533 | * 16May01: If we're reentered then journal_current_handle() will be | 1533 | * 16May01: If we're reentered then journal_current_handle() will be |
1534 | * non-zero. We simply *return*. | 1534 | * non-zero. We simply *return*. |
1535 | * | 1535 | * |
1536 | * 1 July 2001: @@@ FIXME: | 1536 | * 1 July 2001: @@@ FIXME: |
1537 | * In journalled data mode, a data buffer may be metadata against the | 1537 | * In journalled data mode, a data buffer may be metadata against the |
1538 | * current transaction. But the same file is part of a shared mapping | 1538 | * current transaction. But the same file is part of a shared mapping |
1539 | * and someone does a writepage() on it. | 1539 | * and someone does a writepage() on it. |
1540 | * | 1540 | * |
1541 | * We will move the buffer onto the async_data list, but *after* it has | 1541 | * We will move the buffer onto the async_data list, but *after* it has |
1542 | * been dirtied. So there's a small window where we have dirty data on | 1542 | * been dirtied. So there's a small window where we have dirty data on |
1543 | * BJ_Metadata. | 1543 | * BJ_Metadata. |
1544 | * | 1544 | * |
1545 | * Note that this only applies to the last partial page in the file. The | 1545 | * Note that this only applies to the last partial page in the file. The |
1546 | * bit which block_write_full_page() uses prepare/commit for. (That's | 1546 | * bit which block_write_full_page() uses prepare/commit for. (That's |
1547 | * broken code anyway: it's wrong for msync()). | 1547 | * broken code anyway: it's wrong for msync()). |
1548 | * | 1548 | * |
1549 | * It's a rare case: affects the final partial page, for journalled data | 1549 | * It's a rare case: affects the final partial page, for journalled data |
1550 | * where the file is subject to bith write() and writepage() in the same | 1550 | * where the file is subject to bith write() and writepage() in the same |
1551 | * transction. To fix it we'll need a custom block_write_full_page(). | 1551 | * transction. To fix it we'll need a custom block_write_full_page(). |
1552 | * We'll probably need that anyway for journalling writepage() output. | 1552 | * We'll probably need that anyway for journalling writepage() output. |
1553 | * | 1553 | * |
1554 | * We don't honour synchronous mounts for writepage(). That would be | 1554 | * We don't honour synchronous mounts for writepage(). That would be |
1555 | * disastrous. Any write() or metadata operation will sync the fs for | 1555 | * disastrous. Any write() or metadata operation will sync the fs for |
1556 | * us. | 1556 | * us. |
1557 | * | 1557 | * |
1558 | * AKPM2: if all the page's buffers are mapped to disk and !data=journal, | 1558 | * AKPM2: if all the page's buffers are mapped to disk and !data=journal, |
1559 | * we don't need to open a transaction here. | 1559 | * we don't need to open a transaction here. |
1560 | */ | 1560 | */ |
1561 | static int ext3_ordered_writepage(struct page *page, | 1561 | static int ext3_ordered_writepage(struct page *page, |
1562 | struct writeback_control *wbc) | 1562 | struct writeback_control *wbc) |
1563 | { | 1563 | { |
1564 | struct inode *inode = page->mapping->host; | 1564 | struct inode *inode = page->mapping->host; |
1565 | struct buffer_head *page_bufs; | 1565 | struct buffer_head *page_bufs; |
1566 | handle_t *handle = NULL; | 1566 | handle_t *handle = NULL; |
1567 | int ret = 0; | 1567 | int ret = 0; |
1568 | int err; | 1568 | int err; |
1569 | 1569 | ||
1570 | J_ASSERT(PageLocked(page)); | 1570 | J_ASSERT(PageLocked(page)); |
1571 | WARN_ON_ONCE(IS_RDONLY(inode)); | 1571 | WARN_ON_ONCE(IS_RDONLY(inode)); |
1572 | 1572 | ||
1573 | /* | 1573 | /* |
1574 | * We give up here if we're reentered, because it might be for a | 1574 | * We give up here if we're reentered, because it might be for a |
1575 | * different filesystem. | 1575 | * different filesystem. |
1576 | */ | 1576 | */ |
1577 | if (ext3_journal_current_handle()) | 1577 | if (ext3_journal_current_handle()) |
1578 | goto out_fail; | 1578 | goto out_fail; |
1579 | 1579 | ||
1580 | if (!page_has_buffers(page)) { | 1580 | if (!page_has_buffers(page)) { |
1581 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 1581 | create_empty_buffers(page, inode->i_sb->s_blocksize, |
1582 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1582 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
1583 | page_bufs = page_buffers(page); | 1583 | page_bufs = page_buffers(page); |
1584 | } else { | 1584 | } else { |
1585 | page_bufs = page_buffers(page); | 1585 | page_bufs = page_buffers(page); |
1586 | if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE, | 1586 | if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE, |
1587 | NULL, buffer_unmapped)) { | 1587 | NULL, buffer_unmapped)) { |
1588 | /* Provide NULL get_block() to catch bugs if buffers | 1588 | /* Provide NULL get_block() to catch bugs if buffers |
1589 | * weren't really mapped */ | 1589 | * weren't really mapped */ |
1590 | return block_write_full_page(page, NULL, wbc); | 1590 | return block_write_full_page(page, NULL, wbc); |
1591 | } | 1591 | } |
1592 | } | 1592 | } |
1593 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1593 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1594 | 1594 | ||
1595 | if (IS_ERR(handle)) { | 1595 | if (IS_ERR(handle)) { |
1596 | ret = PTR_ERR(handle); | 1596 | ret = PTR_ERR(handle); |
1597 | goto out_fail; | 1597 | goto out_fail; |
1598 | } | 1598 | } |
1599 | 1599 | ||
1600 | walk_page_buffers(handle, page_bufs, 0, | 1600 | walk_page_buffers(handle, page_bufs, 0, |
1601 | PAGE_CACHE_SIZE, NULL, bget_one); | 1601 | PAGE_CACHE_SIZE, NULL, bget_one); |
1602 | 1602 | ||
1603 | ret = block_write_full_page(page, ext3_get_block, wbc); | 1603 | ret = block_write_full_page(page, ext3_get_block, wbc); |
1604 | 1604 | ||
1605 | /* | 1605 | /* |
1606 | * The page can become unlocked at any point now, and | 1606 | * The page can become unlocked at any point now, and |
1607 | * truncate can then come in and change things. So we | 1607 | * truncate can then come in and change things. So we |
1608 | * can't touch *page from now on. But *page_bufs is | 1608 | * can't touch *page from now on. But *page_bufs is |
1609 | * safe due to elevated refcount. | 1609 | * safe due to elevated refcount. |
1610 | */ | 1610 | */ |
1611 | 1611 | ||
1612 | /* | 1612 | /* |
1613 | * And attach them to the current transaction. But only if | 1613 | * And attach them to the current transaction. But only if |
1614 | * block_write_full_page() succeeded. Otherwise they are unmapped, | 1614 | * block_write_full_page() succeeded. Otherwise they are unmapped, |
1615 | * and generally junk. | 1615 | * and generally junk. |
1616 | */ | 1616 | */ |
1617 | if (ret == 0) { | 1617 | if (ret == 0) { |
1618 | err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, | 1618 | err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, |
1619 | NULL, journal_dirty_data_fn); | 1619 | NULL, journal_dirty_data_fn); |
1620 | if (!ret) | 1620 | if (!ret) |
1621 | ret = err; | 1621 | ret = err; |
1622 | } | 1622 | } |
1623 | walk_page_buffers(handle, page_bufs, 0, | 1623 | walk_page_buffers(handle, page_bufs, 0, |
1624 | PAGE_CACHE_SIZE, NULL, bput_one); | 1624 | PAGE_CACHE_SIZE, NULL, bput_one); |
1625 | err = ext3_journal_stop(handle); | 1625 | err = ext3_journal_stop(handle); |
1626 | if (!ret) | 1626 | if (!ret) |
1627 | ret = err; | 1627 | ret = err; |
1628 | return ret; | 1628 | return ret; |
1629 | 1629 | ||
1630 | out_fail: | 1630 | out_fail: |
1631 | redirty_page_for_writepage(wbc, page); | 1631 | redirty_page_for_writepage(wbc, page); |
1632 | unlock_page(page); | 1632 | unlock_page(page); |
1633 | return ret; | 1633 | return ret; |
1634 | } | 1634 | } |
1635 | 1635 | ||
1636 | static int ext3_writeback_writepage(struct page *page, | 1636 | static int ext3_writeback_writepage(struct page *page, |
1637 | struct writeback_control *wbc) | 1637 | struct writeback_control *wbc) |
1638 | { | 1638 | { |
1639 | struct inode *inode = page->mapping->host; | 1639 | struct inode *inode = page->mapping->host; |
1640 | handle_t *handle = NULL; | 1640 | handle_t *handle = NULL; |
1641 | int ret = 0; | 1641 | int ret = 0; |
1642 | int err; | 1642 | int err; |
1643 | 1643 | ||
1644 | J_ASSERT(PageLocked(page)); | 1644 | J_ASSERT(PageLocked(page)); |
1645 | WARN_ON_ONCE(IS_RDONLY(inode)); | 1645 | WARN_ON_ONCE(IS_RDONLY(inode)); |
1646 | 1646 | ||
1647 | if (ext3_journal_current_handle()) | 1647 | if (ext3_journal_current_handle()) |
1648 | goto out_fail; | 1648 | goto out_fail; |
1649 | 1649 | ||
1650 | if (page_has_buffers(page)) { | 1650 | if (page_has_buffers(page)) { |
1651 | if (!walk_page_buffers(NULL, page_buffers(page), 0, | 1651 | if (!walk_page_buffers(NULL, page_buffers(page), 0, |
1652 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { | 1652 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { |
1653 | /* Provide NULL get_block() to catch bugs if buffers | 1653 | /* Provide NULL get_block() to catch bugs if buffers |
1654 | * weren't really mapped */ | 1654 | * weren't really mapped */ |
1655 | return block_write_full_page(page, NULL, wbc); | 1655 | return block_write_full_page(page, NULL, wbc); |
1656 | } | 1656 | } |
1657 | } | 1657 | } |
1658 | 1658 | ||
1659 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1659 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1660 | if (IS_ERR(handle)) { | 1660 | if (IS_ERR(handle)) { |
1661 | ret = PTR_ERR(handle); | 1661 | ret = PTR_ERR(handle); |
1662 | goto out_fail; | 1662 | goto out_fail; |
1663 | } | 1663 | } |
1664 | 1664 | ||
1665 | ret = block_write_full_page(page, ext3_get_block, wbc); | 1665 | ret = block_write_full_page(page, ext3_get_block, wbc); |
1666 | 1666 | ||
1667 | err = ext3_journal_stop(handle); | 1667 | err = ext3_journal_stop(handle); |
1668 | if (!ret) | 1668 | if (!ret) |
1669 | ret = err; | 1669 | ret = err; |
1670 | return ret; | 1670 | return ret; |
1671 | 1671 | ||
1672 | out_fail: | 1672 | out_fail: |
1673 | redirty_page_for_writepage(wbc, page); | 1673 | redirty_page_for_writepage(wbc, page); |
1674 | unlock_page(page); | 1674 | unlock_page(page); |
1675 | return ret; | 1675 | return ret; |
1676 | } | 1676 | } |
1677 | 1677 | ||
1678 | static int ext3_journalled_writepage(struct page *page, | 1678 | static int ext3_journalled_writepage(struct page *page, |
1679 | struct writeback_control *wbc) | 1679 | struct writeback_control *wbc) |
1680 | { | 1680 | { |
1681 | struct inode *inode = page->mapping->host; | 1681 | struct inode *inode = page->mapping->host; |
1682 | handle_t *handle = NULL; | 1682 | handle_t *handle = NULL; |
1683 | int ret = 0; | 1683 | int ret = 0; |
1684 | int err; | 1684 | int err; |
1685 | 1685 | ||
1686 | J_ASSERT(PageLocked(page)); | 1686 | J_ASSERT(PageLocked(page)); |
1687 | WARN_ON_ONCE(IS_RDONLY(inode)); | 1687 | WARN_ON_ONCE(IS_RDONLY(inode)); |
1688 | 1688 | ||
1689 | if (ext3_journal_current_handle()) | 1689 | if (ext3_journal_current_handle()) |
1690 | goto no_write; | 1690 | goto no_write; |
1691 | 1691 | ||
1692 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1692 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1693 | if (IS_ERR(handle)) { | 1693 | if (IS_ERR(handle)) { |
1694 | ret = PTR_ERR(handle); | 1694 | ret = PTR_ERR(handle); |
1695 | goto no_write; | 1695 | goto no_write; |
1696 | } | 1696 | } |
1697 | 1697 | ||
1698 | if (!page_has_buffers(page) || PageChecked(page)) { | 1698 | if (!page_has_buffers(page) || PageChecked(page)) { |
1699 | /* | 1699 | /* |
1700 | * It's mmapped pagecache. Add buffers and journal it. There | 1700 | * It's mmapped pagecache. Add buffers and journal it. There |
1701 | * doesn't seem much point in redirtying the page here. | 1701 | * doesn't seem much point in redirtying the page here. |
1702 | */ | 1702 | */ |
1703 | ClearPageChecked(page); | 1703 | ClearPageChecked(page); |
1704 | ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE, | 1704 | ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE, |
1705 | ext3_get_block); | 1705 | ext3_get_block); |
1706 | if (ret != 0) { | 1706 | if (ret != 0) { |
1707 | ext3_journal_stop(handle); | 1707 | ext3_journal_stop(handle); |
1708 | goto out_unlock; | 1708 | goto out_unlock; |
1709 | } | 1709 | } |
1710 | ret = walk_page_buffers(handle, page_buffers(page), 0, | 1710 | ret = walk_page_buffers(handle, page_buffers(page), 0, |
1711 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); | 1711 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); |
1712 | 1712 | ||
1713 | err = walk_page_buffers(handle, page_buffers(page), 0, | 1713 | err = walk_page_buffers(handle, page_buffers(page), 0, |
1714 | PAGE_CACHE_SIZE, NULL, write_end_fn); | 1714 | PAGE_CACHE_SIZE, NULL, write_end_fn); |
1715 | if (ret == 0) | 1715 | if (ret == 0) |
1716 | ret = err; | 1716 | ret = err; |
1717 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); | 1717 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); |
1718 | unlock_page(page); | 1718 | unlock_page(page); |
1719 | } else { | 1719 | } else { |
1720 | /* | 1720 | /* |
1721 | * It may be a page full of checkpoint-mode buffers. We don't | 1721 | * It may be a page full of checkpoint-mode buffers. We don't |
1722 | * really know unless we go poke around in the buffer_heads. | 1722 | * really know unless we go poke around in the buffer_heads. |
1723 | * But block_write_full_page will do the right thing. | 1723 | * But block_write_full_page will do the right thing. |
1724 | */ | 1724 | */ |
1725 | ret = block_write_full_page(page, ext3_get_block, wbc); | 1725 | ret = block_write_full_page(page, ext3_get_block, wbc); |
1726 | } | 1726 | } |
1727 | err = ext3_journal_stop(handle); | 1727 | err = ext3_journal_stop(handle); |
1728 | if (!ret) | 1728 | if (!ret) |
1729 | ret = err; | 1729 | ret = err; |
1730 | out: | 1730 | out: |
1731 | return ret; | 1731 | return ret; |
1732 | 1732 | ||
1733 | no_write: | 1733 | no_write: |
1734 | redirty_page_for_writepage(wbc, page); | 1734 | redirty_page_for_writepage(wbc, page); |
1735 | out_unlock: | 1735 | out_unlock: |
1736 | unlock_page(page); | 1736 | unlock_page(page); |
1737 | goto out; | 1737 | goto out; |
1738 | } | 1738 | } |
1739 | 1739 | ||
1740 | static int ext3_readpage(struct file *file, struct page *page) | 1740 | static int ext3_readpage(struct file *file, struct page *page) |
1741 | { | 1741 | { |
1742 | return mpage_readpage(page, ext3_get_block); | 1742 | return mpage_readpage(page, ext3_get_block); |
1743 | } | 1743 | } |
1744 | 1744 | ||
1745 | static int | 1745 | static int |
1746 | ext3_readpages(struct file *file, struct address_space *mapping, | 1746 | ext3_readpages(struct file *file, struct address_space *mapping, |
1747 | struct list_head *pages, unsigned nr_pages) | 1747 | struct list_head *pages, unsigned nr_pages) |
1748 | { | 1748 | { |
1749 | return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); | 1749 | return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); |
1750 | } | 1750 | } |
1751 | 1751 | ||
1752 | static void ext3_invalidatepage(struct page *page, unsigned long offset) | 1752 | static void ext3_invalidatepage(struct page *page, unsigned long offset) |
1753 | { | 1753 | { |
1754 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1754 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
1755 | 1755 | ||
1756 | /* | 1756 | /* |
1757 | * If it's a full truncate we just forget about the pending dirtying | 1757 | * If it's a full truncate we just forget about the pending dirtying |
1758 | */ | 1758 | */ |
1759 | if (offset == 0) | 1759 | if (offset == 0) |
1760 | ClearPageChecked(page); | 1760 | ClearPageChecked(page); |
1761 | 1761 | ||
1762 | journal_invalidatepage(journal, page, offset); | 1762 | journal_invalidatepage(journal, page, offset); |
1763 | } | 1763 | } |
1764 | 1764 | ||
1765 | static int ext3_releasepage(struct page *page, gfp_t wait) | 1765 | static int ext3_releasepage(struct page *page, gfp_t wait) |
1766 | { | 1766 | { |
1767 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1767 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
1768 | 1768 | ||
1769 | WARN_ON(PageChecked(page)); | 1769 | WARN_ON(PageChecked(page)); |
1770 | if (!page_has_buffers(page)) | 1770 | if (!page_has_buffers(page)) |
1771 | return 0; | 1771 | return 0; |
1772 | return journal_try_to_free_buffers(journal, page, wait); | 1772 | return journal_try_to_free_buffers(journal, page, wait); |
1773 | } | 1773 | } |
1774 | 1774 | ||
1775 | /* | 1775 | /* |
1776 | * If the O_DIRECT write will extend the file then add this inode to the | 1776 | * If the O_DIRECT write will extend the file then add this inode to the |
1777 | * orphan list. So recovery will truncate it back to the original size | 1777 | * orphan list. So recovery will truncate it back to the original size |
1778 | * if the machine crashes during the write. | 1778 | * if the machine crashes during the write. |
1779 | * | 1779 | * |
1780 | * If the O_DIRECT write is intantiating holes inside i_size and the machine | 1780 | * If the O_DIRECT write is intantiating holes inside i_size and the machine |
1781 | * crashes then stale disk data _may_ be exposed inside the file. But current | 1781 | * crashes then stale disk data _may_ be exposed inside the file. But current |
1782 | * VFS code falls back into buffered path in that case so we are safe. | 1782 | * VFS code falls back into buffered path in that case so we are safe. |
1783 | */ | 1783 | */ |
1784 | static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, | 1784 | static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, |
1785 | const struct iovec *iov, loff_t offset, | 1785 | const struct iovec *iov, loff_t offset, |
1786 | unsigned long nr_segs) | 1786 | unsigned long nr_segs) |
1787 | { | 1787 | { |
1788 | struct file *file = iocb->ki_filp; | 1788 | struct file *file = iocb->ki_filp; |
1789 | struct inode *inode = file->f_mapping->host; | 1789 | struct inode *inode = file->f_mapping->host; |
1790 | struct ext3_inode_info *ei = EXT3_I(inode); | 1790 | struct ext3_inode_info *ei = EXT3_I(inode); |
1791 | handle_t *handle; | 1791 | handle_t *handle; |
1792 | ssize_t ret; | 1792 | ssize_t ret; |
1793 | int orphan = 0; | 1793 | int orphan = 0; |
1794 | size_t count = iov_length(iov, nr_segs); | 1794 | size_t count = iov_length(iov, nr_segs); |
1795 | int retries = 0; | 1795 | int retries = 0; |
1796 | 1796 | ||
1797 | if (rw == WRITE) { | 1797 | if (rw == WRITE) { |
1798 | loff_t final_size = offset + count; | 1798 | loff_t final_size = offset + count; |
1799 | 1799 | ||
1800 | if (final_size > inode->i_size) { | 1800 | if (final_size > inode->i_size) { |
1801 | /* Credits for sb + inode write */ | 1801 | /* Credits for sb + inode write */ |
1802 | handle = ext3_journal_start(inode, 2); | 1802 | handle = ext3_journal_start(inode, 2); |
1803 | if (IS_ERR(handle)) { | 1803 | if (IS_ERR(handle)) { |
1804 | ret = PTR_ERR(handle); | 1804 | ret = PTR_ERR(handle); |
1805 | goto out; | 1805 | goto out; |
1806 | } | 1806 | } |
1807 | ret = ext3_orphan_add(handle, inode); | 1807 | ret = ext3_orphan_add(handle, inode); |
1808 | if (ret) { | 1808 | if (ret) { |
1809 | ext3_journal_stop(handle); | 1809 | ext3_journal_stop(handle); |
1810 | goto out; | 1810 | goto out; |
1811 | } | 1811 | } |
1812 | orphan = 1; | 1812 | orphan = 1; |
1813 | ei->i_disksize = inode->i_size; | 1813 | ei->i_disksize = inode->i_size; |
1814 | ext3_journal_stop(handle); | 1814 | ext3_journal_stop(handle); |
1815 | } | 1815 | } |
1816 | } | 1816 | } |
1817 | 1817 | ||
1818 | retry: | 1818 | retry: |
1819 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 1819 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
1820 | offset, nr_segs, | 1820 | offset, nr_segs, |
1821 | ext3_get_block, NULL); | 1821 | ext3_get_block, NULL); |
1822 | /* | 1822 | /* |
1823 | * In case of error extending write may have instantiated a few | 1823 | * In case of error extending write may have instantiated a few |
1824 | * blocks outside i_size. Trim these off again. | 1824 | * blocks outside i_size. Trim these off again. |
1825 | */ | 1825 | */ |
1826 | if (unlikely((rw & WRITE) && ret < 0)) { | 1826 | if (unlikely((rw & WRITE) && ret < 0)) { |
1827 | loff_t isize = i_size_read(inode); | 1827 | loff_t isize = i_size_read(inode); |
1828 | loff_t end = offset + iov_length(iov, nr_segs); | 1828 | loff_t end = offset + iov_length(iov, nr_segs); |
1829 | 1829 | ||
1830 | if (end > isize) | 1830 | if (end > isize) |
1831 | vmtruncate(inode, isize); | 1831 | vmtruncate(inode, isize); |
1832 | } | 1832 | } |
1833 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1833 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
1834 | goto retry; | 1834 | goto retry; |
1835 | 1835 | ||
1836 | if (orphan) { | 1836 | if (orphan) { |
1837 | int err; | 1837 | int err; |
1838 | 1838 | ||
1839 | /* Credits for sb + inode write */ | 1839 | /* Credits for sb + inode write */ |
1840 | handle = ext3_journal_start(inode, 2); | 1840 | handle = ext3_journal_start(inode, 2); |
1841 | if (IS_ERR(handle)) { | 1841 | if (IS_ERR(handle)) { |
1842 | /* This is really bad luck. We've written the data | 1842 | /* This is really bad luck. We've written the data |
1843 | * but cannot extend i_size. Truncate allocated blocks | 1843 | * but cannot extend i_size. Truncate allocated blocks |
1844 | * and pretend the write failed... */ | 1844 | * and pretend the write failed... */ |
1845 | ext3_truncate(inode); | 1845 | ext3_truncate(inode); |
1846 | ret = PTR_ERR(handle); | 1846 | ret = PTR_ERR(handle); |
1847 | goto out; | 1847 | goto out; |
1848 | } | 1848 | } |
1849 | if (inode->i_nlink) | 1849 | if (inode->i_nlink) |
1850 | ext3_orphan_del(handle, inode); | 1850 | ext3_orphan_del(handle, inode); |
1851 | if (ret > 0) { | 1851 | if (ret > 0) { |
1852 | loff_t end = offset + ret; | 1852 | loff_t end = offset + ret; |
1853 | if (end > inode->i_size) { | 1853 | if (end > inode->i_size) { |
1854 | ei->i_disksize = end; | 1854 | ei->i_disksize = end; |
1855 | i_size_write(inode, end); | 1855 | i_size_write(inode, end); |
1856 | /* | 1856 | /* |
1857 | * We're going to return a positive `ret' | 1857 | * We're going to return a positive `ret' |
1858 | * here due to non-zero-length I/O, so there's | 1858 | * here due to non-zero-length I/O, so there's |
1859 | * no way of reporting error returns from | 1859 | * no way of reporting error returns from |
1860 | * ext3_mark_inode_dirty() to userspace. So | 1860 | * ext3_mark_inode_dirty() to userspace. So |
1861 | * ignore it. | 1861 | * ignore it. |
1862 | */ | 1862 | */ |
1863 | ext3_mark_inode_dirty(handle, inode); | 1863 | ext3_mark_inode_dirty(handle, inode); |
1864 | } | 1864 | } |
1865 | } | 1865 | } |
1866 | err = ext3_journal_stop(handle); | 1866 | err = ext3_journal_stop(handle); |
1867 | if (ret == 0) | 1867 | if (ret == 0) |
1868 | ret = err; | 1868 | ret = err; |
1869 | } | 1869 | } |
1870 | out: | 1870 | out: |
1871 | return ret; | 1871 | return ret; |
1872 | } | 1872 | } |
1873 | 1873 | ||
1874 | /* | 1874 | /* |
1875 | * Pages can be marked dirty completely asynchronously from ext3's journalling | 1875 | * Pages can be marked dirty completely asynchronously from ext3's journalling |
1876 | * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do | 1876 | * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do |
1877 | * much here because ->set_page_dirty is called under VFS locks. The page is | 1877 | * much here because ->set_page_dirty is called under VFS locks. The page is |
1878 | * not necessarily locked. | 1878 | * not necessarily locked. |
1879 | * | 1879 | * |
1880 | * We cannot just dirty the page and leave attached buffers clean, because the | 1880 | * We cannot just dirty the page and leave attached buffers clean, because the |
1881 | * buffers' dirty state is "definitive". We cannot just set the buffers dirty | 1881 | * buffers' dirty state is "definitive". We cannot just set the buffers dirty |
1882 | * or jbddirty because all the journalling code will explode. | 1882 | * or jbddirty because all the journalling code will explode. |
1883 | * | 1883 | * |
1884 | * So what we do is to mark the page "pending dirty" and next time writepage | 1884 | * So what we do is to mark the page "pending dirty" and next time writepage |
1885 | * is called, propagate that into the buffers appropriately. | 1885 | * is called, propagate that into the buffers appropriately. |
1886 | */ | 1886 | */ |
1887 | static int ext3_journalled_set_page_dirty(struct page *page) | 1887 | static int ext3_journalled_set_page_dirty(struct page *page) |
1888 | { | 1888 | { |
1889 | SetPageChecked(page); | 1889 | SetPageChecked(page); |
1890 | return __set_page_dirty_nobuffers(page); | 1890 | return __set_page_dirty_nobuffers(page); |
1891 | } | 1891 | } |
1892 | 1892 | ||
1893 | static const struct address_space_operations ext3_ordered_aops = { | 1893 | static const struct address_space_operations ext3_ordered_aops = { |
1894 | .readpage = ext3_readpage, | 1894 | .readpage = ext3_readpage, |
1895 | .readpages = ext3_readpages, | 1895 | .readpages = ext3_readpages, |
1896 | .writepage = ext3_ordered_writepage, | 1896 | .writepage = ext3_ordered_writepage, |
1897 | .write_begin = ext3_write_begin, | 1897 | .write_begin = ext3_write_begin, |
1898 | .write_end = ext3_ordered_write_end, | 1898 | .write_end = ext3_ordered_write_end, |
1899 | .bmap = ext3_bmap, | 1899 | .bmap = ext3_bmap, |
1900 | .invalidatepage = ext3_invalidatepage, | 1900 | .invalidatepage = ext3_invalidatepage, |
1901 | .releasepage = ext3_releasepage, | 1901 | .releasepage = ext3_releasepage, |
1902 | .direct_IO = ext3_direct_IO, | 1902 | .direct_IO = ext3_direct_IO, |
1903 | .migratepage = buffer_migrate_page, | 1903 | .migratepage = buffer_migrate_page, |
1904 | .is_partially_uptodate = block_is_partially_uptodate, | 1904 | .is_partially_uptodate = block_is_partially_uptodate, |
1905 | .error_remove_page = generic_error_remove_page, | 1905 | .error_remove_page = generic_error_remove_page, |
1906 | }; | 1906 | }; |
1907 | 1907 | ||
1908 | static const struct address_space_operations ext3_writeback_aops = { | 1908 | static const struct address_space_operations ext3_writeback_aops = { |
1909 | .readpage = ext3_readpage, | 1909 | .readpage = ext3_readpage, |
1910 | .readpages = ext3_readpages, | 1910 | .readpages = ext3_readpages, |
1911 | .writepage = ext3_writeback_writepage, | 1911 | .writepage = ext3_writeback_writepage, |
1912 | .write_begin = ext3_write_begin, | 1912 | .write_begin = ext3_write_begin, |
1913 | .write_end = ext3_writeback_write_end, | 1913 | .write_end = ext3_writeback_write_end, |
1914 | .bmap = ext3_bmap, | 1914 | .bmap = ext3_bmap, |
1915 | .invalidatepage = ext3_invalidatepage, | 1915 | .invalidatepage = ext3_invalidatepage, |
1916 | .releasepage = ext3_releasepage, | 1916 | .releasepage = ext3_releasepage, |
1917 | .direct_IO = ext3_direct_IO, | 1917 | .direct_IO = ext3_direct_IO, |
1918 | .migratepage = buffer_migrate_page, | 1918 | .migratepage = buffer_migrate_page, |
1919 | .is_partially_uptodate = block_is_partially_uptodate, | 1919 | .is_partially_uptodate = block_is_partially_uptodate, |
1920 | .error_remove_page = generic_error_remove_page, | 1920 | .error_remove_page = generic_error_remove_page, |
1921 | }; | 1921 | }; |
1922 | 1922 | ||
1923 | static const struct address_space_operations ext3_journalled_aops = { | 1923 | static const struct address_space_operations ext3_journalled_aops = { |
1924 | .readpage = ext3_readpage, | 1924 | .readpage = ext3_readpage, |
1925 | .readpages = ext3_readpages, | 1925 | .readpages = ext3_readpages, |
1926 | .writepage = ext3_journalled_writepage, | 1926 | .writepage = ext3_journalled_writepage, |
1927 | .write_begin = ext3_write_begin, | 1927 | .write_begin = ext3_write_begin, |
1928 | .write_end = ext3_journalled_write_end, | 1928 | .write_end = ext3_journalled_write_end, |
1929 | .set_page_dirty = ext3_journalled_set_page_dirty, | 1929 | .set_page_dirty = ext3_journalled_set_page_dirty, |
1930 | .bmap = ext3_bmap, | 1930 | .bmap = ext3_bmap, |
1931 | .invalidatepage = ext3_invalidatepage, | 1931 | .invalidatepage = ext3_invalidatepage, |
1932 | .releasepage = ext3_releasepage, | 1932 | .releasepage = ext3_releasepage, |
1933 | .is_partially_uptodate = block_is_partially_uptodate, | 1933 | .is_partially_uptodate = block_is_partially_uptodate, |
1934 | .error_remove_page = generic_error_remove_page, | 1934 | .error_remove_page = generic_error_remove_page, |
1935 | }; | 1935 | }; |
1936 | 1936 | ||
1937 | void ext3_set_aops(struct inode *inode) | 1937 | void ext3_set_aops(struct inode *inode) |
1938 | { | 1938 | { |
1939 | if (ext3_should_order_data(inode)) | 1939 | if (ext3_should_order_data(inode)) |
1940 | inode->i_mapping->a_ops = &ext3_ordered_aops; | 1940 | inode->i_mapping->a_ops = &ext3_ordered_aops; |
1941 | else if (ext3_should_writeback_data(inode)) | 1941 | else if (ext3_should_writeback_data(inode)) |
1942 | inode->i_mapping->a_ops = &ext3_writeback_aops; | 1942 | inode->i_mapping->a_ops = &ext3_writeback_aops; |
1943 | else | 1943 | else |
1944 | inode->i_mapping->a_ops = &ext3_journalled_aops; | 1944 | inode->i_mapping->a_ops = &ext3_journalled_aops; |
1945 | } | 1945 | } |
1946 | 1946 | ||
1947 | /* | 1947 | /* |
1948 | * ext3_block_truncate_page() zeroes out a mapping from file offset `from' | 1948 | * ext3_block_truncate_page() zeroes out a mapping from file offset `from' |
1949 | * up to the end of the block which corresponds to `from'. | 1949 | * up to the end of the block which corresponds to `from'. |
1950 | * This required during truncate. We need to physically zero the tail end | 1950 | * This required during truncate. We need to physically zero the tail end |
1951 | * of that block so it doesn't yield old data if the file is later grown. | 1951 | * of that block so it doesn't yield old data if the file is later grown. |
1952 | */ | 1952 | */ |
1953 | static int ext3_block_truncate_page(handle_t *handle, struct page *page, | 1953 | static int ext3_block_truncate_page(handle_t *handle, struct page *page, |
1954 | struct address_space *mapping, loff_t from) | 1954 | struct address_space *mapping, loff_t from) |
1955 | { | 1955 | { |
1956 | ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 1956 | ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
1957 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 1957 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
1958 | unsigned blocksize, iblock, length, pos; | 1958 | unsigned blocksize, iblock, length, pos; |
1959 | struct inode *inode = mapping->host; | 1959 | struct inode *inode = mapping->host; |
1960 | struct buffer_head *bh; | 1960 | struct buffer_head *bh; |
1961 | int err = 0; | 1961 | int err = 0; |
1962 | 1962 | ||
1963 | blocksize = inode->i_sb->s_blocksize; | 1963 | blocksize = inode->i_sb->s_blocksize; |
1964 | length = blocksize - (offset & (blocksize - 1)); | 1964 | length = blocksize - (offset & (blocksize - 1)); |
1965 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 1965 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
1966 | 1966 | ||
1967 | if (!page_has_buffers(page)) | 1967 | if (!page_has_buffers(page)) |
1968 | create_empty_buffers(page, blocksize, 0); | 1968 | create_empty_buffers(page, blocksize, 0); |
1969 | 1969 | ||
1970 | /* Find the buffer that contains "offset" */ | 1970 | /* Find the buffer that contains "offset" */ |
1971 | bh = page_buffers(page); | 1971 | bh = page_buffers(page); |
1972 | pos = blocksize; | 1972 | pos = blocksize; |
1973 | while (offset >= pos) { | 1973 | while (offset >= pos) { |
1974 | bh = bh->b_this_page; | 1974 | bh = bh->b_this_page; |
1975 | iblock++; | 1975 | iblock++; |
1976 | pos += blocksize; | 1976 | pos += blocksize; |
1977 | } | 1977 | } |
1978 | 1978 | ||
1979 | err = 0; | 1979 | err = 0; |
1980 | if (buffer_freed(bh)) { | 1980 | if (buffer_freed(bh)) { |
1981 | BUFFER_TRACE(bh, "freed: skip"); | 1981 | BUFFER_TRACE(bh, "freed: skip"); |
1982 | goto unlock; | 1982 | goto unlock; |
1983 | } | 1983 | } |
1984 | 1984 | ||
1985 | if (!buffer_mapped(bh)) { | 1985 | if (!buffer_mapped(bh)) { |
1986 | BUFFER_TRACE(bh, "unmapped"); | 1986 | BUFFER_TRACE(bh, "unmapped"); |
1987 | ext3_get_block(inode, iblock, bh, 0); | 1987 | ext3_get_block(inode, iblock, bh, 0); |
1988 | /* unmapped? It's a hole - nothing to do */ | 1988 | /* unmapped? It's a hole - nothing to do */ |
1989 | if (!buffer_mapped(bh)) { | 1989 | if (!buffer_mapped(bh)) { |
1990 | BUFFER_TRACE(bh, "still unmapped"); | 1990 | BUFFER_TRACE(bh, "still unmapped"); |
1991 | goto unlock; | 1991 | goto unlock; |
1992 | } | 1992 | } |
1993 | } | 1993 | } |
1994 | 1994 | ||
1995 | /* Ok, it's mapped. Make sure it's up-to-date */ | 1995 | /* Ok, it's mapped. Make sure it's up-to-date */ |
1996 | if (PageUptodate(page)) | 1996 | if (PageUptodate(page)) |
1997 | set_buffer_uptodate(bh); | 1997 | set_buffer_uptodate(bh); |
1998 | 1998 | ||
1999 | if (!buffer_uptodate(bh)) { | 1999 | if (!buffer_uptodate(bh)) { |
2000 | err = -EIO; | 2000 | err = -EIO; |
2001 | ll_rw_block(READ, 1, &bh); | 2001 | ll_rw_block(READ, 1, &bh); |
2002 | wait_on_buffer(bh); | 2002 | wait_on_buffer(bh); |
2003 | /* Uhhuh. Read error. Complain and punt. */ | 2003 | /* Uhhuh. Read error. Complain and punt. */ |
2004 | if (!buffer_uptodate(bh)) | 2004 | if (!buffer_uptodate(bh)) |
2005 | goto unlock; | 2005 | goto unlock; |
2006 | } | 2006 | } |
2007 | 2007 | ||
2008 | if (ext3_should_journal_data(inode)) { | 2008 | if (ext3_should_journal_data(inode)) { |
2009 | BUFFER_TRACE(bh, "get write access"); | 2009 | BUFFER_TRACE(bh, "get write access"); |
2010 | err = ext3_journal_get_write_access(handle, bh); | 2010 | err = ext3_journal_get_write_access(handle, bh); |
2011 | if (err) | 2011 | if (err) |
2012 | goto unlock; | 2012 | goto unlock; |
2013 | } | 2013 | } |
2014 | 2014 | ||
2015 | zero_user(page, offset, length); | 2015 | zero_user(page, offset, length); |
2016 | BUFFER_TRACE(bh, "zeroed end of block"); | 2016 | BUFFER_TRACE(bh, "zeroed end of block"); |
2017 | 2017 | ||
2018 | err = 0; | 2018 | err = 0; |
2019 | if (ext3_should_journal_data(inode)) { | 2019 | if (ext3_should_journal_data(inode)) { |
2020 | err = ext3_journal_dirty_metadata(handle, bh); | 2020 | err = ext3_journal_dirty_metadata(handle, bh); |
2021 | } else { | 2021 | } else { |
2022 | if (ext3_should_order_data(inode)) | 2022 | if (ext3_should_order_data(inode)) |
2023 | err = ext3_journal_dirty_data(handle, bh); | 2023 | err = ext3_journal_dirty_data(handle, bh); |
2024 | mark_buffer_dirty(bh); | 2024 | mark_buffer_dirty(bh); |
2025 | } | 2025 | } |
2026 | 2026 | ||
2027 | unlock: | 2027 | unlock: |
2028 | unlock_page(page); | 2028 | unlock_page(page); |
2029 | page_cache_release(page); | 2029 | page_cache_release(page); |
2030 | return err; | 2030 | return err; |
2031 | } | 2031 | } |
2032 | 2032 | ||
2033 | /* | 2033 | /* |
2034 | * Probably it should be a library function... search for first non-zero word | 2034 | * Probably it should be a library function... search for first non-zero word |
2035 | * or memcmp with zero_page, whatever is better for particular architecture. | 2035 | * or memcmp with zero_page, whatever is better for particular architecture. |
2036 | * Linus? | 2036 | * Linus? |
2037 | */ | 2037 | */ |
2038 | static inline int all_zeroes(__le32 *p, __le32 *q) | 2038 | static inline int all_zeroes(__le32 *p, __le32 *q) |
2039 | { | 2039 | { |
2040 | while (p < q) | 2040 | while (p < q) |
2041 | if (*p++) | 2041 | if (*p++) |
2042 | return 0; | 2042 | return 0; |
2043 | return 1; | 2043 | return 1; |
2044 | } | 2044 | } |
2045 | 2045 | ||
2046 | /** | 2046 | /** |
2047 | * ext3_find_shared - find the indirect blocks for partial truncation. | 2047 | * ext3_find_shared - find the indirect blocks for partial truncation. |
2048 | * @inode: inode in question | 2048 | * @inode: inode in question |
2049 | * @depth: depth of the affected branch | 2049 | * @depth: depth of the affected branch |
2050 | * @offsets: offsets of pointers in that branch (see ext3_block_to_path) | 2050 | * @offsets: offsets of pointers in that branch (see ext3_block_to_path) |
2051 | * @chain: place to store the pointers to partial indirect blocks | 2051 | * @chain: place to store the pointers to partial indirect blocks |
2052 | * @top: place to the (detached) top of branch | 2052 | * @top: place to the (detached) top of branch |
2053 | * | 2053 | * |
2054 | * This is a helper function used by ext3_truncate(). | 2054 | * This is a helper function used by ext3_truncate(). |
2055 | * | 2055 | * |
2056 | * When we do truncate() we may have to clean the ends of several | 2056 | * When we do truncate() we may have to clean the ends of several |
2057 | * indirect blocks but leave the blocks themselves alive. Block is | 2057 | * indirect blocks but leave the blocks themselves alive. Block is |
2058 | * partially truncated if some data below the new i_size is referred | 2058 | * partially truncated if some data below the new i_size is referred |
2059 | * from it (and it is on the path to the first completely truncated | 2059 | * from it (and it is on the path to the first completely truncated |
2060 | * data block, indeed). We have to free the top of that path along | 2060 | * data block, indeed). We have to free the top of that path along |
2061 | * with everything to the right of the path. Since no allocation | 2061 | * with everything to the right of the path. Since no allocation |
2062 | * past the truncation point is possible until ext3_truncate() | 2062 | * past the truncation point is possible until ext3_truncate() |
2063 | * finishes, we may safely do the latter, but top of branch may | 2063 | * finishes, we may safely do the latter, but top of branch may |
2064 | * require special attention - pageout below the truncation point | 2064 | * require special attention - pageout below the truncation point |
2065 | * might try to populate it. | 2065 | * might try to populate it. |
2066 | * | 2066 | * |
2067 | * We atomically detach the top of branch from the tree, store the | 2067 | * We atomically detach the top of branch from the tree, store the |
2068 | * block number of its root in *@top, pointers to buffer_heads of | 2068 | * block number of its root in *@top, pointers to buffer_heads of |
2069 | * partially truncated blocks - in @chain[].bh and pointers to | 2069 | * partially truncated blocks - in @chain[].bh and pointers to |
2070 | * their last elements that should not be removed - in | 2070 | * their last elements that should not be removed - in |
2071 | * @chain[].p. Return value is the pointer to last filled element | 2071 | * @chain[].p. Return value is the pointer to last filled element |
2072 | * of @chain. | 2072 | * of @chain. |
2073 | * | 2073 | * |
2074 | * The work left to caller to do the actual freeing of subtrees: | 2074 | * The work left to caller to do the actual freeing of subtrees: |
2075 | * a) free the subtree starting from *@top | 2075 | * a) free the subtree starting from *@top |
2076 | * b) free the subtrees whose roots are stored in | 2076 | * b) free the subtrees whose roots are stored in |
2077 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) | 2077 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) |
2078 | * c) free the subtrees growing from the inode past the @chain[0]. | 2078 | * c) free the subtrees growing from the inode past the @chain[0]. |
2079 | * (no partially truncated stuff there). */ | 2079 | * (no partially truncated stuff there). */ |
2080 | 2080 | ||
2081 | static Indirect *ext3_find_shared(struct inode *inode, int depth, | 2081 | static Indirect *ext3_find_shared(struct inode *inode, int depth, |
2082 | int offsets[4], Indirect chain[4], __le32 *top) | 2082 | int offsets[4], Indirect chain[4], __le32 *top) |
2083 | { | 2083 | { |
2084 | Indirect *partial, *p; | 2084 | Indirect *partial, *p; |
2085 | int k, err; | 2085 | int k, err; |
2086 | 2086 | ||
2087 | *top = 0; | 2087 | *top = 0; |
2088 | /* Make k index the deepest non-null offset + 1 */ | 2088 | /* Make k index the deepest non-null offset + 1 */ |
2089 | for (k = depth; k > 1 && !offsets[k-1]; k--) | 2089 | for (k = depth; k > 1 && !offsets[k-1]; k--) |
2090 | ; | 2090 | ; |
2091 | partial = ext3_get_branch(inode, k, offsets, chain, &err); | 2091 | partial = ext3_get_branch(inode, k, offsets, chain, &err); |
2092 | /* Writer: pointers */ | 2092 | /* Writer: pointers */ |
2093 | if (!partial) | 2093 | if (!partial) |
2094 | partial = chain + k-1; | 2094 | partial = chain + k-1; |
2095 | /* | 2095 | /* |
2096 | * If the branch acquired continuation since we've looked at it - | 2096 | * If the branch acquired continuation since we've looked at it - |
2097 | * fine, it should all survive and (new) top doesn't belong to us. | 2097 | * fine, it should all survive and (new) top doesn't belong to us. |
2098 | */ | 2098 | */ |
2099 | if (!partial->key && *partial->p) | 2099 | if (!partial->key && *partial->p) |
2100 | /* Writer: end */ | 2100 | /* Writer: end */ |
2101 | goto no_top; | 2101 | goto no_top; |
2102 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) | 2102 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) |
2103 | ; | 2103 | ; |
2104 | /* | 2104 | /* |
2105 | * OK, we've found the last block that must survive. The rest of our | 2105 | * OK, we've found the last block that must survive. The rest of our |
2106 | * branch should be detached before unlocking. However, if that rest | 2106 | * branch should be detached before unlocking. However, if that rest |
2107 | * of branch is all ours and does not grow immediately from the inode | 2107 | * of branch is all ours and does not grow immediately from the inode |
2108 | * it's easier to cheat and just decrement partial->p. | 2108 | * it's easier to cheat and just decrement partial->p. |
2109 | */ | 2109 | */ |
2110 | if (p == chain + k - 1 && p > chain) { | 2110 | if (p == chain + k - 1 && p > chain) { |
2111 | p->p--; | 2111 | p->p--; |
2112 | } else { | 2112 | } else { |
2113 | *top = *p->p; | 2113 | *top = *p->p; |
2114 | /* Nope, don't do this in ext3. Must leave the tree intact */ | 2114 | /* Nope, don't do this in ext3. Must leave the tree intact */ |
2115 | #if 0 | 2115 | #if 0 |
2116 | *p->p = 0; | 2116 | *p->p = 0; |
2117 | #endif | 2117 | #endif |
2118 | } | 2118 | } |
2119 | /* Writer: end */ | 2119 | /* Writer: end */ |
2120 | 2120 | ||
2121 | while(partial > p) { | 2121 | while(partial > p) { |
2122 | brelse(partial->bh); | 2122 | brelse(partial->bh); |
2123 | partial--; | 2123 | partial--; |
2124 | } | 2124 | } |
2125 | no_top: | 2125 | no_top: |
2126 | return partial; | 2126 | return partial; |
2127 | } | 2127 | } |
2128 | 2128 | ||
2129 | /* | 2129 | /* |
2130 | * Zero a number of block pointers in either an inode or an indirect block. | 2130 | * Zero a number of block pointers in either an inode or an indirect block. |
2131 | * If we restart the transaction we must again get write access to the | 2131 | * If we restart the transaction we must again get write access to the |
2132 | * indirect block for further modification. | 2132 | * indirect block for further modification. |
2133 | * | 2133 | * |
2134 | * We release `count' blocks on disk, but (last - first) may be greater | 2134 | * We release `count' blocks on disk, but (last - first) may be greater |
2135 | * than `count' because there can be holes in there. | 2135 | * than `count' because there can be holes in there. |
2136 | */ | 2136 | */ |
2137 | static void ext3_clear_blocks(handle_t *handle, struct inode *inode, | 2137 | static void ext3_clear_blocks(handle_t *handle, struct inode *inode, |
2138 | struct buffer_head *bh, ext3_fsblk_t block_to_free, | 2138 | struct buffer_head *bh, ext3_fsblk_t block_to_free, |
2139 | unsigned long count, __le32 *first, __le32 *last) | 2139 | unsigned long count, __le32 *first, __le32 *last) |
2140 | { | 2140 | { |
2141 | __le32 *p; | 2141 | __le32 *p; |
2142 | if (try_to_extend_transaction(handle, inode)) { | 2142 | if (try_to_extend_transaction(handle, inode)) { |
2143 | if (bh) { | 2143 | if (bh) { |
2144 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); | 2144 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); |
2145 | if (ext3_journal_dirty_metadata(handle, bh)) | 2145 | if (ext3_journal_dirty_metadata(handle, bh)) |
2146 | return; | 2146 | return; |
2147 | } | 2147 | } |
2148 | ext3_mark_inode_dirty(handle, inode); | 2148 | ext3_mark_inode_dirty(handle, inode); |
2149 | truncate_restart_transaction(handle, inode); | 2149 | truncate_restart_transaction(handle, inode); |
2150 | if (bh) { | 2150 | if (bh) { |
2151 | BUFFER_TRACE(bh, "retaking write access"); | 2151 | BUFFER_TRACE(bh, "retaking write access"); |
2152 | if (ext3_journal_get_write_access(handle, bh)) | 2152 | if (ext3_journal_get_write_access(handle, bh)) |
2153 | return; | 2153 | return; |
2154 | } | 2154 | } |
2155 | } | 2155 | } |
2156 | 2156 | ||
2157 | /* | 2157 | /* |
2158 | * Any buffers which are on the journal will be in memory. We find | 2158 | * Any buffers which are on the journal will be in memory. We find |
2159 | * them on the hash table so journal_revoke() will run journal_forget() | 2159 | * them on the hash table so journal_revoke() will run journal_forget() |
2160 | * on them. We've already detached each block from the file, so | 2160 | * on them. We've already detached each block from the file, so |
2161 | * bforget() in journal_forget() should be safe. | 2161 | * bforget() in journal_forget() should be safe. |
2162 | * | 2162 | * |
2163 | * AKPM: turn on bforget in journal_forget()!!! | 2163 | * AKPM: turn on bforget in journal_forget()!!! |
2164 | */ | 2164 | */ |
2165 | for (p = first; p < last; p++) { | 2165 | for (p = first; p < last; p++) { |
2166 | u32 nr = le32_to_cpu(*p); | 2166 | u32 nr = le32_to_cpu(*p); |
2167 | if (nr) { | 2167 | if (nr) { |
2168 | struct buffer_head *bh; | 2168 | struct buffer_head *bh; |
2169 | 2169 | ||
2170 | *p = 0; | 2170 | *p = 0; |
2171 | bh = sb_find_get_block(inode->i_sb, nr); | 2171 | bh = sb_find_get_block(inode->i_sb, nr); |
2172 | ext3_forget(handle, 0, inode, bh, nr); | 2172 | ext3_forget(handle, 0, inode, bh, nr); |
2173 | } | 2173 | } |
2174 | } | 2174 | } |
2175 | 2175 | ||
2176 | ext3_free_blocks(handle, inode, block_to_free, count); | 2176 | ext3_free_blocks(handle, inode, block_to_free, count); |
2177 | } | 2177 | } |
2178 | 2178 | ||
2179 | /** | 2179 | /** |
2180 | * ext3_free_data - free a list of data blocks | 2180 | * ext3_free_data - free a list of data blocks |
2181 | * @handle: handle for this transaction | 2181 | * @handle: handle for this transaction |
2182 | * @inode: inode we are dealing with | 2182 | * @inode: inode we are dealing with |
2183 | * @this_bh: indirect buffer_head which contains *@first and *@last | 2183 | * @this_bh: indirect buffer_head which contains *@first and *@last |
2184 | * @first: array of block numbers | 2184 | * @first: array of block numbers |
2185 | * @last: points immediately past the end of array | 2185 | * @last: points immediately past the end of array |
2186 | * | 2186 | * |
2187 | * We are freeing all blocks referred from that array (numbers are stored as | 2187 | * We are freeing all blocks referred from that array (numbers are stored as |
2188 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | 2188 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. |
2189 | * | 2189 | * |
2190 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | 2190 | * We accumulate contiguous runs of blocks to free. Conveniently, if these |
2191 | * blocks are contiguous then releasing them at one time will only affect one | 2191 | * blocks are contiguous then releasing them at one time will only affect one |
2192 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't | 2192 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't |
2193 | * actually use a lot of journal space. | 2193 | * actually use a lot of journal space. |
2194 | * | 2194 | * |
2195 | * @this_bh will be %NULL if @first and @last point into the inode's direct | 2195 | * @this_bh will be %NULL if @first and @last point into the inode's direct |
2196 | * block pointers. | 2196 | * block pointers. |
2197 | */ | 2197 | */ |
2198 | static void ext3_free_data(handle_t *handle, struct inode *inode, | 2198 | static void ext3_free_data(handle_t *handle, struct inode *inode, |
2199 | struct buffer_head *this_bh, | 2199 | struct buffer_head *this_bh, |
2200 | __le32 *first, __le32 *last) | 2200 | __le32 *first, __le32 *last) |
2201 | { | 2201 | { |
2202 | ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */ | 2202 | ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */ |
2203 | unsigned long count = 0; /* Number of blocks in the run */ | 2203 | unsigned long count = 0; /* Number of blocks in the run */ |
2204 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind | 2204 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind |
2205 | corresponding to | 2205 | corresponding to |
2206 | block_to_free */ | 2206 | block_to_free */ |
2207 | ext3_fsblk_t nr; /* Current block # */ | 2207 | ext3_fsblk_t nr; /* Current block # */ |
2208 | __le32 *p; /* Pointer into inode/ind | 2208 | __le32 *p; /* Pointer into inode/ind |
2209 | for current block */ | 2209 | for current block */ |
2210 | int err; | 2210 | int err; |
2211 | 2211 | ||
2212 | if (this_bh) { /* For indirect block */ | 2212 | if (this_bh) { /* For indirect block */ |
2213 | BUFFER_TRACE(this_bh, "get_write_access"); | 2213 | BUFFER_TRACE(this_bh, "get_write_access"); |
2214 | err = ext3_journal_get_write_access(handle, this_bh); | 2214 | err = ext3_journal_get_write_access(handle, this_bh); |
2215 | /* Important: if we can't update the indirect pointers | 2215 | /* Important: if we can't update the indirect pointers |
2216 | * to the blocks, we can't free them. */ | 2216 | * to the blocks, we can't free them. */ |
2217 | if (err) | 2217 | if (err) |
2218 | return; | 2218 | return; |
2219 | } | 2219 | } |
2220 | 2220 | ||
2221 | for (p = first; p < last; p++) { | 2221 | for (p = first; p < last; p++) { |
2222 | nr = le32_to_cpu(*p); | 2222 | nr = le32_to_cpu(*p); |
2223 | if (nr) { | 2223 | if (nr) { |
2224 | /* accumulate blocks to free if they're contiguous */ | 2224 | /* accumulate blocks to free if they're contiguous */ |
2225 | if (count == 0) { | 2225 | if (count == 0) { |
2226 | block_to_free = nr; | 2226 | block_to_free = nr; |
2227 | block_to_free_p = p; | 2227 | block_to_free_p = p; |
2228 | count = 1; | 2228 | count = 1; |
2229 | } else if (nr == block_to_free + count) { | 2229 | } else if (nr == block_to_free + count) { |
2230 | count++; | 2230 | count++; |
2231 | } else { | 2231 | } else { |
2232 | ext3_clear_blocks(handle, inode, this_bh, | 2232 | ext3_clear_blocks(handle, inode, this_bh, |
2233 | block_to_free, | 2233 | block_to_free, |
2234 | count, block_to_free_p, p); | 2234 | count, block_to_free_p, p); |
2235 | block_to_free = nr; | 2235 | block_to_free = nr; |
2236 | block_to_free_p = p; | 2236 | block_to_free_p = p; |
2237 | count = 1; | 2237 | count = 1; |
2238 | } | 2238 | } |
2239 | } | 2239 | } |
2240 | } | 2240 | } |
2241 | 2241 | ||
2242 | if (count > 0) | 2242 | if (count > 0) |
2243 | ext3_clear_blocks(handle, inode, this_bh, block_to_free, | 2243 | ext3_clear_blocks(handle, inode, this_bh, block_to_free, |
2244 | count, block_to_free_p, p); | 2244 | count, block_to_free_p, p); |
2245 | 2245 | ||
2246 | if (this_bh) { | 2246 | if (this_bh) { |
2247 | BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); | 2247 | BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); |
2248 | 2248 | ||
2249 | /* | 2249 | /* |
2250 | * The buffer head should have an attached journal head at this | 2250 | * The buffer head should have an attached journal head at this |
2251 | * point. However, if the data is corrupted and an indirect | 2251 | * point. However, if the data is corrupted and an indirect |
2252 | * block pointed to itself, it would have been detached when | 2252 | * block pointed to itself, it would have been detached when |
2253 | * the block was cleared. Check for this instead of OOPSing. | 2253 | * the block was cleared. Check for this instead of OOPSing. |
2254 | */ | 2254 | */ |
2255 | if (bh2jh(this_bh)) | 2255 | if (bh2jh(this_bh)) |
2256 | ext3_journal_dirty_metadata(handle, this_bh); | 2256 | ext3_journal_dirty_metadata(handle, this_bh); |
2257 | else | 2257 | else |
2258 | ext3_error(inode->i_sb, "ext3_free_data", | 2258 | ext3_error(inode->i_sb, "ext3_free_data", |
2259 | "circular indirect block detected, " | 2259 | "circular indirect block detected, " |
2260 | "inode=%lu, block=%llu", | 2260 | "inode=%lu, block=%llu", |
2261 | inode->i_ino, | 2261 | inode->i_ino, |
2262 | (unsigned long long)this_bh->b_blocknr); | 2262 | (unsigned long long)this_bh->b_blocknr); |
2263 | } | 2263 | } |
2264 | } | 2264 | } |
2265 | 2265 | ||
2266 | /** | 2266 | /** |
2267 | * ext3_free_branches - free an array of branches | 2267 | * ext3_free_branches - free an array of branches |
2268 | * @handle: JBD handle for this transaction | 2268 | * @handle: JBD handle for this transaction |
2269 | * @inode: inode we are dealing with | 2269 | * @inode: inode we are dealing with |
2270 | * @parent_bh: the buffer_head which contains *@first and *@last | 2270 | * @parent_bh: the buffer_head which contains *@first and *@last |
2271 | * @first: array of block numbers | 2271 | * @first: array of block numbers |
2272 | * @last: pointer immediately past the end of array | 2272 | * @last: pointer immediately past the end of array |
2273 | * @depth: depth of the branches to free | 2273 | * @depth: depth of the branches to free |
2274 | * | 2274 | * |
2275 | * We are freeing all blocks referred from these branches (numbers are | 2275 | * We are freeing all blocks referred from these branches (numbers are |
2276 | * stored as little-endian 32-bit) and updating @inode->i_blocks | 2276 | * stored as little-endian 32-bit) and updating @inode->i_blocks |
2277 | * appropriately. | 2277 | * appropriately. |
2278 | */ | 2278 | */ |
2279 | static void ext3_free_branches(handle_t *handle, struct inode *inode, | 2279 | static void ext3_free_branches(handle_t *handle, struct inode *inode, |
2280 | struct buffer_head *parent_bh, | 2280 | struct buffer_head *parent_bh, |
2281 | __le32 *first, __le32 *last, int depth) | 2281 | __le32 *first, __le32 *last, int depth) |
2282 | { | 2282 | { |
2283 | ext3_fsblk_t nr; | 2283 | ext3_fsblk_t nr; |
2284 | __le32 *p; | 2284 | __le32 *p; |
2285 | 2285 | ||
2286 | if (is_handle_aborted(handle)) | 2286 | if (is_handle_aborted(handle)) |
2287 | return; | 2287 | return; |
2288 | 2288 | ||
2289 | if (depth--) { | 2289 | if (depth--) { |
2290 | struct buffer_head *bh; | 2290 | struct buffer_head *bh; |
2291 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); | 2291 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); |
2292 | p = last; | 2292 | p = last; |
2293 | while (--p >= first) { | 2293 | while (--p >= first) { |
2294 | nr = le32_to_cpu(*p); | 2294 | nr = le32_to_cpu(*p); |
2295 | if (!nr) | 2295 | if (!nr) |
2296 | continue; /* A hole */ | 2296 | continue; /* A hole */ |
2297 | 2297 | ||
2298 | /* Go read the buffer for the next level down */ | 2298 | /* Go read the buffer for the next level down */ |
2299 | bh = sb_bread(inode->i_sb, nr); | 2299 | bh = sb_bread(inode->i_sb, nr); |
2300 | 2300 | ||
2301 | /* | 2301 | /* |
2302 | * A read failure? Report error and clear slot | 2302 | * A read failure? Report error and clear slot |
2303 | * (should be rare). | 2303 | * (should be rare). |
2304 | */ | 2304 | */ |
2305 | if (!bh) { | 2305 | if (!bh) { |
2306 | ext3_error(inode->i_sb, "ext3_free_branches", | 2306 | ext3_error(inode->i_sb, "ext3_free_branches", |
2307 | "Read failure, inode=%lu, block="E3FSBLK, | 2307 | "Read failure, inode=%lu, block="E3FSBLK, |
2308 | inode->i_ino, nr); | 2308 | inode->i_ino, nr); |
2309 | continue; | 2309 | continue; |
2310 | } | 2310 | } |
2311 | 2311 | ||
2312 | /* This zaps the entire block. Bottom up. */ | 2312 | /* This zaps the entire block. Bottom up. */ |
2313 | BUFFER_TRACE(bh, "free child branches"); | 2313 | BUFFER_TRACE(bh, "free child branches"); |
2314 | ext3_free_branches(handle, inode, bh, | 2314 | ext3_free_branches(handle, inode, bh, |
2315 | (__le32*)bh->b_data, | 2315 | (__le32*)bh->b_data, |
2316 | (__le32*)bh->b_data + addr_per_block, | 2316 | (__le32*)bh->b_data + addr_per_block, |
2317 | depth); | 2317 | depth); |
2318 | 2318 | ||
2319 | /* | 2319 | /* |
2320 | * Everything below this this pointer has been | 2320 | * Everything below this this pointer has been |
2321 | * released. Now let this top-of-subtree go. | 2321 | * released. Now let this top-of-subtree go. |
2322 | * | 2322 | * |
2323 | * We want the freeing of this indirect block to be | 2323 | * We want the freeing of this indirect block to be |
2324 | * atomic in the journal with the updating of the | 2324 | * atomic in the journal with the updating of the |
2325 | * bitmap block which owns it. So make some room in | 2325 | * bitmap block which owns it. So make some room in |
2326 | * the journal. | 2326 | * the journal. |
2327 | * | 2327 | * |
2328 | * We zero the parent pointer *after* freeing its | 2328 | * We zero the parent pointer *after* freeing its |
2329 | * pointee in the bitmaps, so if extend_transaction() | 2329 | * pointee in the bitmaps, so if extend_transaction() |
2330 | * for some reason fails to put the bitmap changes and | 2330 | * for some reason fails to put the bitmap changes and |
2331 | * the release into the same transaction, recovery | 2331 | * the release into the same transaction, recovery |
2332 | * will merely complain about releasing a free block, | 2332 | * will merely complain about releasing a free block, |
2333 | * rather than leaking blocks. | 2333 | * rather than leaking blocks. |
2334 | */ | 2334 | */ |
2335 | if (is_handle_aborted(handle)) | 2335 | if (is_handle_aborted(handle)) |
2336 | return; | 2336 | return; |
2337 | if (try_to_extend_transaction(handle, inode)) { | 2337 | if (try_to_extend_transaction(handle, inode)) { |
2338 | ext3_mark_inode_dirty(handle, inode); | 2338 | ext3_mark_inode_dirty(handle, inode); |
2339 | truncate_restart_transaction(handle, inode); | 2339 | truncate_restart_transaction(handle, inode); |
2340 | } | 2340 | } |
2341 | 2341 | ||
2342 | /* | 2342 | /* |
2343 | * We've probably journalled the indirect block several | 2343 | * We've probably journalled the indirect block several |
2344 | * times during the truncate. But it's no longer | 2344 | * times during the truncate. But it's no longer |
2345 | * needed and we now drop it from the transaction via | 2345 | * needed and we now drop it from the transaction via |
2346 | * journal_revoke(). | 2346 | * journal_revoke(). |
2347 | * | 2347 | * |
2348 | * That's easy if it's exclusively part of this | 2348 | * That's easy if it's exclusively part of this |
2349 | * transaction. But if it's part of the committing | 2349 | * transaction. But if it's part of the committing |
2350 | * transaction then journal_forget() will simply | 2350 | * transaction then journal_forget() will simply |
2351 | * brelse() it. That means that if the underlying | 2351 | * brelse() it. That means that if the underlying |
2352 | * block is reallocated in ext3_get_block(), | 2352 | * block is reallocated in ext3_get_block(), |
2353 | * unmap_underlying_metadata() will find this block | 2353 | * unmap_underlying_metadata() will find this block |
2354 | * and will try to get rid of it. damn, damn. Thus | 2354 | * and will try to get rid of it. damn, damn. Thus |
2355 | * we don't allow a block to be reallocated until | 2355 | * we don't allow a block to be reallocated until |
2356 | * a transaction freeing it has fully committed. | 2356 | * a transaction freeing it has fully committed. |
2357 | * | 2357 | * |
2358 | * We also have to make sure journal replay after a | 2358 | * We also have to make sure journal replay after a |
2359 | * crash does not overwrite non-journaled data blocks | 2359 | * crash does not overwrite non-journaled data blocks |
2360 | * with old metadata when the block got reallocated for | 2360 | * with old metadata when the block got reallocated for |
2361 | * data. Thus we have to store a revoke record for a | 2361 | * data. Thus we have to store a revoke record for a |
2362 | * block in the same transaction in which we free the | 2362 | * block in the same transaction in which we free the |
2363 | * block. | 2363 | * block. |
2364 | */ | 2364 | */ |
2365 | ext3_forget(handle, 1, inode, bh, bh->b_blocknr); | 2365 | ext3_forget(handle, 1, inode, bh, bh->b_blocknr); |
2366 | 2366 | ||
2367 | ext3_free_blocks(handle, inode, nr, 1); | 2367 | ext3_free_blocks(handle, inode, nr, 1); |
2368 | 2368 | ||
2369 | if (parent_bh) { | 2369 | if (parent_bh) { |
2370 | /* | 2370 | /* |
2371 | * The block which we have just freed is | 2371 | * The block which we have just freed is |
2372 | * pointed to by an indirect block: journal it | 2372 | * pointed to by an indirect block: journal it |
2373 | */ | 2373 | */ |
2374 | BUFFER_TRACE(parent_bh, "get_write_access"); | 2374 | BUFFER_TRACE(parent_bh, "get_write_access"); |
2375 | if (!ext3_journal_get_write_access(handle, | 2375 | if (!ext3_journal_get_write_access(handle, |
2376 | parent_bh)){ | 2376 | parent_bh)){ |
2377 | *p = 0; | 2377 | *p = 0; |
2378 | BUFFER_TRACE(parent_bh, | 2378 | BUFFER_TRACE(parent_bh, |
2379 | "call ext3_journal_dirty_metadata"); | 2379 | "call ext3_journal_dirty_metadata"); |
2380 | ext3_journal_dirty_metadata(handle, | 2380 | ext3_journal_dirty_metadata(handle, |
2381 | parent_bh); | 2381 | parent_bh); |
2382 | } | 2382 | } |
2383 | } | 2383 | } |
2384 | } | 2384 | } |
2385 | } else { | 2385 | } else { |
2386 | /* We have reached the bottom of the tree. */ | 2386 | /* We have reached the bottom of the tree. */ |
2387 | BUFFER_TRACE(parent_bh, "free data blocks"); | 2387 | BUFFER_TRACE(parent_bh, "free data blocks"); |
2388 | ext3_free_data(handle, inode, parent_bh, first, last); | 2388 | ext3_free_data(handle, inode, parent_bh, first, last); |
2389 | } | 2389 | } |
2390 | } | 2390 | } |
2391 | 2391 | ||
2392 | int ext3_can_truncate(struct inode *inode) | 2392 | int ext3_can_truncate(struct inode *inode) |
2393 | { | 2393 | { |
2394 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 2394 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
2395 | return 0; | 2395 | return 0; |
2396 | if (S_ISREG(inode->i_mode)) | 2396 | if (S_ISREG(inode->i_mode)) |
2397 | return 1; | 2397 | return 1; |
2398 | if (S_ISDIR(inode->i_mode)) | 2398 | if (S_ISDIR(inode->i_mode)) |
2399 | return 1; | 2399 | return 1; |
2400 | if (S_ISLNK(inode->i_mode)) | 2400 | if (S_ISLNK(inode->i_mode)) |
2401 | return !ext3_inode_is_fast_symlink(inode); | 2401 | return !ext3_inode_is_fast_symlink(inode); |
2402 | return 0; | 2402 | return 0; |
2403 | } | 2403 | } |
2404 | 2404 | ||
2405 | /* | 2405 | /* |
2406 | * ext3_truncate() | 2406 | * ext3_truncate() |
2407 | * | 2407 | * |
2408 | * We block out ext3_get_block() block instantiations across the entire | 2408 | * We block out ext3_get_block() block instantiations across the entire |
2409 | * transaction, and VFS/VM ensures that ext3_truncate() cannot run | 2409 | * transaction, and VFS/VM ensures that ext3_truncate() cannot run |
2410 | * simultaneously on behalf of the same inode. | 2410 | * simultaneously on behalf of the same inode. |
2411 | * | 2411 | * |
2412 | * As we work through the truncate and commmit bits of it to the journal there | 2412 | * As we work through the truncate and commmit bits of it to the journal there |
2413 | * is one core, guiding principle: the file's tree must always be consistent on | 2413 | * is one core, guiding principle: the file's tree must always be consistent on |
2414 | * disk. We must be able to restart the truncate after a crash. | 2414 | * disk. We must be able to restart the truncate after a crash. |
2415 | * | 2415 | * |
2416 | * The file's tree may be transiently inconsistent in memory (although it | 2416 | * The file's tree may be transiently inconsistent in memory (although it |
2417 | * probably isn't), but whenever we close off and commit a journal transaction, | 2417 | * probably isn't), but whenever we close off and commit a journal transaction, |
2418 | * the contents of (the filesystem + the journal) must be consistent and | 2418 | * the contents of (the filesystem + the journal) must be consistent and |
2419 | * restartable. It's pretty simple, really: bottom up, right to left (although | 2419 | * restartable. It's pretty simple, really: bottom up, right to left (although |
2420 | * left-to-right works OK too). | 2420 | * left-to-right works OK too). |
2421 | * | 2421 | * |
2422 | * Note that at recovery time, journal replay occurs *before* the restart of | 2422 | * Note that at recovery time, journal replay occurs *before* the restart of |
2423 | * truncate against the orphan inode list. | 2423 | * truncate against the orphan inode list. |
2424 | * | 2424 | * |
2425 | * The committed inode has the new, desired i_size (which is the same as | 2425 | * The committed inode has the new, desired i_size (which is the same as |
2426 | * i_disksize in this case). After a crash, ext3_orphan_cleanup() will see | 2426 | * i_disksize in this case). After a crash, ext3_orphan_cleanup() will see |
2427 | * that this inode's truncate did not complete and it will again call | 2427 | * that this inode's truncate did not complete and it will again call |
2428 | * ext3_truncate() to have another go. So there will be instantiated blocks | 2428 | * ext3_truncate() to have another go. So there will be instantiated blocks |
2429 | * to the right of the truncation point in a crashed ext3 filesystem. But | 2429 | * to the right of the truncation point in a crashed ext3 filesystem. But |
2430 | * that's fine - as long as they are linked from the inode, the post-crash | 2430 | * that's fine - as long as they are linked from the inode, the post-crash |
2431 | * ext3_truncate() run will find them and release them. | 2431 | * ext3_truncate() run will find them and release them. |
2432 | */ | 2432 | */ |
2433 | void ext3_truncate(struct inode *inode) | 2433 | void ext3_truncate(struct inode *inode) |
2434 | { | 2434 | { |
2435 | handle_t *handle; | 2435 | handle_t *handle; |
2436 | struct ext3_inode_info *ei = EXT3_I(inode); | 2436 | struct ext3_inode_info *ei = EXT3_I(inode); |
2437 | __le32 *i_data = ei->i_data; | 2437 | __le32 *i_data = ei->i_data; |
2438 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); | 2438 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); |
2439 | struct address_space *mapping = inode->i_mapping; | 2439 | struct address_space *mapping = inode->i_mapping; |
2440 | int offsets[4]; | 2440 | int offsets[4]; |
2441 | Indirect chain[4]; | 2441 | Indirect chain[4]; |
2442 | Indirect *partial; | 2442 | Indirect *partial; |
2443 | __le32 nr = 0; | 2443 | __le32 nr = 0; |
2444 | int n; | 2444 | int n; |
2445 | long last_block; | 2445 | long last_block; |
2446 | unsigned blocksize = inode->i_sb->s_blocksize; | 2446 | unsigned blocksize = inode->i_sb->s_blocksize; |
2447 | struct page *page; | 2447 | struct page *page; |
2448 | 2448 | ||
2449 | if (!ext3_can_truncate(inode)) | 2449 | if (!ext3_can_truncate(inode)) |
2450 | goto out_notrans; | 2450 | goto out_notrans; |
2451 | 2451 | ||
2452 | if (inode->i_size == 0 && ext3_should_writeback_data(inode)) | 2452 | if (inode->i_size == 0 && ext3_should_writeback_data(inode)) |
2453 | ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); | 2453 | ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); |
2454 | 2454 | ||
2455 | /* | 2455 | /* |
2456 | * We have to lock the EOF page here, because lock_page() nests | 2456 | * We have to lock the EOF page here, because lock_page() nests |
2457 | * outside journal_start(). | 2457 | * outside journal_start(). |
2458 | */ | 2458 | */ |
2459 | if ((inode->i_size & (blocksize - 1)) == 0) { | 2459 | if ((inode->i_size & (blocksize - 1)) == 0) { |
2460 | /* Block boundary? Nothing to do */ | 2460 | /* Block boundary? Nothing to do */ |
2461 | page = NULL; | 2461 | page = NULL; |
2462 | } else { | 2462 | } else { |
2463 | page = grab_cache_page(mapping, | 2463 | page = grab_cache_page(mapping, |
2464 | inode->i_size >> PAGE_CACHE_SHIFT); | 2464 | inode->i_size >> PAGE_CACHE_SHIFT); |
2465 | if (!page) | 2465 | if (!page) |
2466 | goto out_notrans; | 2466 | goto out_notrans; |
2467 | } | 2467 | } |
2468 | 2468 | ||
2469 | handle = start_transaction(inode); | 2469 | handle = start_transaction(inode); |
2470 | if (IS_ERR(handle)) { | 2470 | if (IS_ERR(handle)) { |
2471 | if (page) { | 2471 | if (page) { |
2472 | clear_highpage(page); | 2472 | clear_highpage(page); |
2473 | flush_dcache_page(page); | 2473 | flush_dcache_page(page); |
2474 | unlock_page(page); | 2474 | unlock_page(page); |
2475 | page_cache_release(page); | 2475 | page_cache_release(page); |
2476 | } | 2476 | } |
2477 | goto out_notrans; | 2477 | goto out_notrans; |
2478 | } | 2478 | } |
2479 | 2479 | ||
2480 | last_block = (inode->i_size + blocksize-1) | 2480 | last_block = (inode->i_size + blocksize-1) |
2481 | >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); | 2481 | >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); |
2482 | 2482 | ||
2483 | if (page) | 2483 | if (page) |
2484 | ext3_block_truncate_page(handle, page, mapping, inode->i_size); | 2484 | ext3_block_truncate_page(handle, page, mapping, inode->i_size); |
2485 | 2485 | ||
2486 | n = ext3_block_to_path(inode, last_block, offsets, NULL); | 2486 | n = ext3_block_to_path(inode, last_block, offsets, NULL); |
2487 | if (n == 0) | 2487 | if (n == 0) |
2488 | goto out_stop; /* error */ | 2488 | goto out_stop; /* error */ |
2489 | 2489 | ||
2490 | /* | 2490 | /* |
2491 | * OK. This truncate is going to happen. We add the inode to the | 2491 | * OK. This truncate is going to happen. We add the inode to the |
2492 | * orphan list, so that if this truncate spans multiple transactions, | 2492 | * orphan list, so that if this truncate spans multiple transactions, |
2493 | * and we crash, we will resume the truncate when the filesystem | 2493 | * and we crash, we will resume the truncate when the filesystem |
2494 | * recovers. It also marks the inode dirty, to catch the new size. | 2494 | * recovers. It also marks the inode dirty, to catch the new size. |
2495 | * | 2495 | * |
2496 | * Implication: the file must always be in a sane, consistent | 2496 | * Implication: the file must always be in a sane, consistent |
2497 | * truncatable state while each transaction commits. | 2497 | * truncatable state while each transaction commits. |
2498 | */ | 2498 | */ |
2499 | if (ext3_orphan_add(handle, inode)) | 2499 | if (ext3_orphan_add(handle, inode)) |
2500 | goto out_stop; | 2500 | goto out_stop; |
2501 | 2501 | ||
2502 | /* | 2502 | /* |
2503 | * The orphan list entry will now protect us from any crash which | 2503 | * The orphan list entry will now protect us from any crash which |
2504 | * occurs before the truncate completes, so it is now safe to propagate | 2504 | * occurs before the truncate completes, so it is now safe to propagate |
2505 | * the new, shorter inode size (held for now in i_size) into the | 2505 | * the new, shorter inode size (held for now in i_size) into the |
2506 | * on-disk inode. We do this via i_disksize, which is the value which | 2506 | * on-disk inode. We do this via i_disksize, which is the value which |
2507 | * ext3 *really* writes onto the disk inode. | 2507 | * ext3 *really* writes onto the disk inode. |
2508 | */ | 2508 | */ |
2509 | ei->i_disksize = inode->i_size; | 2509 | ei->i_disksize = inode->i_size; |
2510 | 2510 | ||
2511 | /* | 2511 | /* |
2512 | * From here we block out all ext3_get_block() callers who want to | 2512 | * From here we block out all ext3_get_block() callers who want to |
2513 | * modify the block allocation tree. | 2513 | * modify the block allocation tree. |
2514 | */ | 2514 | */ |
2515 | mutex_lock(&ei->truncate_mutex); | 2515 | mutex_lock(&ei->truncate_mutex); |
2516 | 2516 | ||
2517 | if (n == 1) { /* direct blocks */ | 2517 | if (n == 1) { /* direct blocks */ |
2518 | ext3_free_data(handle, inode, NULL, i_data+offsets[0], | 2518 | ext3_free_data(handle, inode, NULL, i_data+offsets[0], |
2519 | i_data + EXT3_NDIR_BLOCKS); | 2519 | i_data + EXT3_NDIR_BLOCKS); |
2520 | goto do_indirects; | 2520 | goto do_indirects; |
2521 | } | 2521 | } |
2522 | 2522 | ||
2523 | partial = ext3_find_shared(inode, n, offsets, chain, &nr); | 2523 | partial = ext3_find_shared(inode, n, offsets, chain, &nr); |
2524 | /* Kill the top of shared branch (not detached) */ | 2524 | /* Kill the top of shared branch (not detached) */ |
2525 | if (nr) { | 2525 | if (nr) { |
2526 | if (partial == chain) { | 2526 | if (partial == chain) { |
2527 | /* Shared branch grows from the inode */ | 2527 | /* Shared branch grows from the inode */ |
2528 | ext3_free_branches(handle, inode, NULL, | 2528 | ext3_free_branches(handle, inode, NULL, |
2529 | &nr, &nr+1, (chain+n-1) - partial); | 2529 | &nr, &nr+1, (chain+n-1) - partial); |
2530 | *partial->p = 0; | 2530 | *partial->p = 0; |
2531 | /* | 2531 | /* |
2532 | * We mark the inode dirty prior to restart, | 2532 | * We mark the inode dirty prior to restart, |
2533 | * and prior to stop. No need for it here. | 2533 | * and prior to stop. No need for it here. |
2534 | */ | 2534 | */ |
2535 | } else { | 2535 | } else { |
2536 | /* Shared branch grows from an indirect block */ | 2536 | /* Shared branch grows from an indirect block */ |
2537 | ext3_free_branches(handle, inode, partial->bh, | 2537 | ext3_free_branches(handle, inode, partial->bh, |
2538 | partial->p, | 2538 | partial->p, |
2539 | partial->p+1, (chain+n-1) - partial); | 2539 | partial->p+1, (chain+n-1) - partial); |
2540 | } | 2540 | } |
2541 | } | 2541 | } |
2542 | /* Clear the ends of indirect blocks on the shared branch */ | 2542 | /* Clear the ends of indirect blocks on the shared branch */ |
2543 | while (partial > chain) { | 2543 | while (partial > chain) { |
2544 | ext3_free_branches(handle, inode, partial->bh, partial->p + 1, | 2544 | ext3_free_branches(handle, inode, partial->bh, partial->p + 1, |
2545 | (__le32*)partial->bh->b_data+addr_per_block, | 2545 | (__le32*)partial->bh->b_data+addr_per_block, |
2546 | (chain+n-1) - partial); | 2546 | (chain+n-1) - partial); |
2547 | BUFFER_TRACE(partial->bh, "call brelse"); | 2547 | BUFFER_TRACE(partial->bh, "call brelse"); |
2548 | brelse (partial->bh); | 2548 | brelse (partial->bh); |
2549 | partial--; | 2549 | partial--; |
2550 | } | 2550 | } |
2551 | do_indirects: | 2551 | do_indirects: |
2552 | /* Kill the remaining (whole) subtrees */ | 2552 | /* Kill the remaining (whole) subtrees */ |
2553 | switch (offsets[0]) { | 2553 | switch (offsets[0]) { |
2554 | default: | 2554 | default: |
2555 | nr = i_data[EXT3_IND_BLOCK]; | 2555 | nr = i_data[EXT3_IND_BLOCK]; |
2556 | if (nr) { | 2556 | if (nr) { |
2557 | ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1); | 2557 | ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1); |
2558 | i_data[EXT3_IND_BLOCK] = 0; | 2558 | i_data[EXT3_IND_BLOCK] = 0; |
2559 | } | 2559 | } |
2560 | case EXT3_IND_BLOCK: | 2560 | case EXT3_IND_BLOCK: |
2561 | nr = i_data[EXT3_DIND_BLOCK]; | 2561 | nr = i_data[EXT3_DIND_BLOCK]; |
2562 | if (nr) { | 2562 | if (nr) { |
2563 | ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2); | 2563 | ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2); |
2564 | i_data[EXT3_DIND_BLOCK] = 0; | 2564 | i_data[EXT3_DIND_BLOCK] = 0; |
2565 | } | 2565 | } |
2566 | case EXT3_DIND_BLOCK: | 2566 | case EXT3_DIND_BLOCK: |
2567 | nr = i_data[EXT3_TIND_BLOCK]; | 2567 | nr = i_data[EXT3_TIND_BLOCK]; |
2568 | if (nr) { | 2568 | if (nr) { |
2569 | ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3); | 2569 | ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3); |
2570 | i_data[EXT3_TIND_BLOCK] = 0; | 2570 | i_data[EXT3_TIND_BLOCK] = 0; |
2571 | } | 2571 | } |
2572 | case EXT3_TIND_BLOCK: | 2572 | case EXT3_TIND_BLOCK: |
2573 | ; | 2573 | ; |
2574 | } | 2574 | } |
2575 | 2575 | ||
2576 | ext3_discard_reservation(inode); | 2576 | ext3_discard_reservation(inode); |
2577 | 2577 | ||
2578 | mutex_unlock(&ei->truncate_mutex); | 2578 | mutex_unlock(&ei->truncate_mutex); |
2579 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | 2579 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; |
2580 | ext3_mark_inode_dirty(handle, inode); | 2580 | ext3_mark_inode_dirty(handle, inode); |
2581 | 2581 | ||
2582 | /* | 2582 | /* |
2583 | * In a multi-transaction truncate, we only make the final transaction | 2583 | * In a multi-transaction truncate, we only make the final transaction |
2584 | * synchronous | 2584 | * synchronous |
2585 | */ | 2585 | */ |
2586 | if (IS_SYNC(inode)) | 2586 | if (IS_SYNC(inode)) |
2587 | handle->h_sync = 1; | 2587 | handle->h_sync = 1; |
2588 | out_stop: | 2588 | out_stop: |
2589 | /* | 2589 | /* |
2590 | * If this was a simple ftruncate(), and the file will remain alive | 2590 | * If this was a simple ftruncate(), and the file will remain alive |
2591 | * then we need to clear up the orphan record which we created above. | 2591 | * then we need to clear up the orphan record which we created above. |
2592 | * However, if this was a real unlink then we were called by | 2592 | * However, if this was a real unlink then we were called by |
2593 | * ext3_evict_inode(), and we allow that function to clean up the | 2593 | * ext3_evict_inode(), and we allow that function to clean up the |
2594 | * orphan info for us. | 2594 | * orphan info for us. |
2595 | */ | 2595 | */ |
2596 | if (inode->i_nlink) | 2596 | if (inode->i_nlink) |
2597 | ext3_orphan_del(handle, inode); | 2597 | ext3_orphan_del(handle, inode); |
2598 | 2598 | ||
2599 | ext3_journal_stop(handle); | 2599 | ext3_journal_stop(handle); |
2600 | return; | 2600 | return; |
2601 | out_notrans: | 2601 | out_notrans: |
2602 | /* | 2602 | /* |
2603 | * Delete the inode from orphan list so that it doesn't stay there | 2603 | * Delete the inode from orphan list so that it doesn't stay there |
2604 | * forever and trigger assertion on umount. | 2604 | * forever and trigger assertion on umount. |
2605 | */ | 2605 | */ |
2606 | if (inode->i_nlink) | 2606 | if (inode->i_nlink) |
2607 | ext3_orphan_del(NULL, inode); | 2607 | ext3_orphan_del(NULL, inode); |
2608 | } | 2608 | } |
2609 | 2609 | ||
2610 | static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, | 2610 | static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, |
2611 | unsigned long ino, struct ext3_iloc *iloc) | 2611 | unsigned long ino, struct ext3_iloc *iloc) |
2612 | { | 2612 | { |
2613 | unsigned long block_group; | 2613 | unsigned long block_group; |
2614 | unsigned long offset; | 2614 | unsigned long offset; |
2615 | ext3_fsblk_t block; | 2615 | ext3_fsblk_t block; |
2616 | struct ext3_group_desc *gdp; | 2616 | struct ext3_group_desc *gdp; |
2617 | 2617 | ||
2618 | if (!ext3_valid_inum(sb, ino)) { | 2618 | if (!ext3_valid_inum(sb, ino)) { |
2619 | /* | 2619 | /* |
2620 | * This error is already checked for in namei.c unless we are | 2620 | * This error is already checked for in namei.c unless we are |
2621 | * looking at an NFS filehandle, in which case no error | 2621 | * looking at an NFS filehandle, in which case no error |
2622 | * report is needed | 2622 | * report is needed |
2623 | */ | 2623 | */ |
2624 | return 0; | 2624 | return 0; |
2625 | } | 2625 | } |
2626 | 2626 | ||
2627 | block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); | 2627 | block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); |
2628 | gdp = ext3_get_group_desc(sb, block_group, NULL); | 2628 | gdp = ext3_get_group_desc(sb, block_group, NULL); |
2629 | if (!gdp) | 2629 | if (!gdp) |
2630 | return 0; | 2630 | return 0; |
2631 | /* | 2631 | /* |
2632 | * Figure out the offset within the block group inode table | 2632 | * Figure out the offset within the block group inode table |
2633 | */ | 2633 | */ |
2634 | offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * | 2634 | offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * |
2635 | EXT3_INODE_SIZE(sb); | 2635 | EXT3_INODE_SIZE(sb); |
2636 | block = le32_to_cpu(gdp->bg_inode_table) + | 2636 | block = le32_to_cpu(gdp->bg_inode_table) + |
2637 | (offset >> EXT3_BLOCK_SIZE_BITS(sb)); | 2637 | (offset >> EXT3_BLOCK_SIZE_BITS(sb)); |
2638 | 2638 | ||
2639 | iloc->block_group = block_group; | 2639 | iloc->block_group = block_group; |
2640 | iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1); | 2640 | iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1); |
2641 | return block; | 2641 | return block; |
2642 | } | 2642 | } |
2643 | 2643 | ||
2644 | /* | 2644 | /* |
2645 | * ext3_get_inode_loc returns with an extra refcount against the inode's | 2645 | * ext3_get_inode_loc returns with an extra refcount against the inode's |
2646 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 2646 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
2647 | * data in memory that is needed to recreate the on-disk version of this | 2647 | * data in memory that is needed to recreate the on-disk version of this |
2648 | * inode. | 2648 | * inode. |
2649 | */ | 2649 | */ |
2650 | static int __ext3_get_inode_loc(struct inode *inode, | 2650 | static int __ext3_get_inode_loc(struct inode *inode, |
2651 | struct ext3_iloc *iloc, int in_mem) | 2651 | struct ext3_iloc *iloc, int in_mem) |
2652 | { | 2652 | { |
2653 | ext3_fsblk_t block; | 2653 | ext3_fsblk_t block; |
2654 | struct buffer_head *bh; | 2654 | struct buffer_head *bh; |
2655 | 2655 | ||
2656 | block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); | 2656 | block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); |
2657 | if (!block) | 2657 | if (!block) |
2658 | return -EIO; | 2658 | return -EIO; |
2659 | 2659 | ||
2660 | bh = sb_getblk(inode->i_sb, block); | 2660 | bh = sb_getblk(inode->i_sb, block); |
2661 | if (!bh) { | 2661 | if (!bh) { |
2662 | ext3_error (inode->i_sb, "ext3_get_inode_loc", | 2662 | ext3_error (inode->i_sb, "ext3_get_inode_loc", |
2663 | "unable to read inode block - " | 2663 | "unable to read inode block - " |
2664 | "inode=%lu, block="E3FSBLK, | 2664 | "inode=%lu, block="E3FSBLK, |
2665 | inode->i_ino, block); | 2665 | inode->i_ino, block); |
2666 | return -EIO; | 2666 | return -EIO; |
2667 | } | 2667 | } |
2668 | if (!buffer_uptodate(bh)) { | 2668 | if (!buffer_uptodate(bh)) { |
2669 | lock_buffer(bh); | 2669 | lock_buffer(bh); |
2670 | 2670 | ||
2671 | /* | 2671 | /* |
2672 | * If the buffer has the write error flag, we have failed | 2672 | * If the buffer has the write error flag, we have failed |
2673 | * to write out another inode in the same block. In this | 2673 | * to write out another inode in the same block. In this |
2674 | * case, we don't have to read the block because we may | 2674 | * case, we don't have to read the block because we may |
2675 | * read the old inode data successfully. | 2675 | * read the old inode data successfully. |
2676 | */ | 2676 | */ |
2677 | if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) | 2677 | if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) |
2678 | set_buffer_uptodate(bh); | 2678 | set_buffer_uptodate(bh); |
2679 | 2679 | ||
2680 | if (buffer_uptodate(bh)) { | 2680 | if (buffer_uptodate(bh)) { |
2681 | /* someone brought it uptodate while we waited */ | 2681 | /* someone brought it uptodate while we waited */ |
2682 | unlock_buffer(bh); | 2682 | unlock_buffer(bh); |
2683 | goto has_buffer; | 2683 | goto has_buffer; |
2684 | } | 2684 | } |
2685 | 2685 | ||
2686 | /* | 2686 | /* |
2687 | * If we have all information of the inode in memory and this | 2687 | * If we have all information of the inode in memory and this |
2688 | * is the only valid inode in the block, we need not read the | 2688 | * is the only valid inode in the block, we need not read the |
2689 | * block. | 2689 | * block. |
2690 | */ | 2690 | */ |
2691 | if (in_mem) { | 2691 | if (in_mem) { |
2692 | struct buffer_head *bitmap_bh; | 2692 | struct buffer_head *bitmap_bh; |
2693 | struct ext3_group_desc *desc; | 2693 | struct ext3_group_desc *desc; |
2694 | int inodes_per_buffer; | 2694 | int inodes_per_buffer; |
2695 | int inode_offset, i; | 2695 | int inode_offset, i; |
2696 | int block_group; | 2696 | int block_group; |
2697 | int start; | 2697 | int start; |
2698 | 2698 | ||
2699 | block_group = (inode->i_ino - 1) / | 2699 | block_group = (inode->i_ino - 1) / |
2700 | EXT3_INODES_PER_GROUP(inode->i_sb); | 2700 | EXT3_INODES_PER_GROUP(inode->i_sb); |
2701 | inodes_per_buffer = bh->b_size / | 2701 | inodes_per_buffer = bh->b_size / |
2702 | EXT3_INODE_SIZE(inode->i_sb); | 2702 | EXT3_INODE_SIZE(inode->i_sb); |
2703 | inode_offset = ((inode->i_ino - 1) % | 2703 | inode_offset = ((inode->i_ino - 1) % |
2704 | EXT3_INODES_PER_GROUP(inode->i_sb)); | 2704 | EXT3_INODES_PER_GROUP(inode->i_sb)); |
2705 | start = inode_offset & ~(inodes_per_buffer - 1); | 2705 | start = inode_offset & ~(inodes_per_buffer - 1); |
2706 | 2706 | ||
2707 | /* Is the inode bitmap in cache? */ | 2707 | /* Is the inode bitmap in cache? */ |
2708 | desc = ext3_get_group_desc(inode->i_sb, | 2708 | desc = ext3_get_group_desc(inode->i_sb, |
2709 | block_group, NULL); | 2709 | block_group, NULL); |
2710 | if (!desc) | 2710 | if (!desc) |
2711 | goto make_io; | 2711 | goto make_io; |
2712 | 2712 | ||
2713 | bitmap_bh = sb_getblk(inode->i_sb, | 2713 | bitmap_bh = sb_getblk(inode->i_sb, |
2714 | le32_to_cpu(desc->bg_inode_bitmap)); | 2714 | le32_to_cpu(desc->bg_inode_bitmap)); |
2715 | if (!bitmap_bh) | 2715 | if (!bitmap_bh) |
2716 | goto make_io; | 2716 | goto make_io; |
2717 | 2717 | ||
2718 | /* | 2718 | /* |
2719 | * If the inode bitmap isn't in cache then the | 2719 | * If the inode bitmap isn't in cache then the |
2720 | * optimisation may end up performing two reads instead | 2720 | * optimisation may end up performing two reads instead |
2721 | * of one, so skip it. | 2721 | * of one, so skip it. |
2722 | */ | 2722 | */ |
2723 | if (!buffer_uptodate(bitmap_bh)) { | 2723 | if (!buffer_uptodate(bitmap_bh)) { |
2724 | brelse(bitmap_bh); | 2724 | brelse(bitmap_bh); |
2725 | goto make_io; | 2725 | goto make_io; |
2726 | } | 2726 | } |
2727 | for (i = start; i < start + inodes_per_buffer; i++) { | 2727 | for (i = start; i < start + inodes_per_buffer; i++) { |
2728 | if (i == inode_offset) | 2728 | if (i == inode_offset) |
2729 | continue; | 2729 | continue; |
2730 | if (ext3_test_bit(i, bitmap_bh->b_data)) | 2730 | if (ext3_test_bit(i, bitmap_bh->b_data)) |
2731 | break; | 2731 | break; |
2732 | } | 2732 | } |
2733 | brelse(bitmap_bh); | 2733 | brelse(bitmap_bh); |
2734 | if (i == start + inodes_per_buffer) { | 2734 | if (i == start + inodes_per_buffer) { |
2735 | /* all other inodes are free, so skip I/O */ | 2735 | /* all other inodes are free, so skip I/O */ |
2736 | memset(bh->b_data, 0, bh->b_size); | 2736 | memset(bh->b_data, 0, bh->b_size); |
2737 | set_buffer_uptodate(bh); | 2737 | set_buffer_uptodate(bh); |
2738 | unlock_buffer(bh); | 2738 | unlock_buffer(bh); |
2739 | goto has_buffer; | 2739 | goto has_buffer; |
2740 | } | 2740 | } |
2741 | } | 2741 | } |
2742 | 2742 | ||
2743 | make_io: | 2743 | make_io: |
2744 | /* | 2744 | /* |
2745 | * There are other valid inodes in the buffer, this inode | 2745 | * There are other valid inodes in the buffer, this inode |
2746 | * has in-inode xattrs, or we don't have this inode in memory. | 2746 | * has in-inode xattrs, or we don't have this inode in memory. |
2747 | * Read the block from disk. | 2747 | * Read the block from disk. |
2748 | */ | 2748 | */ |
2749 | get_bh(bh); | 2749 | get_bh(bh); |
2750 | bh->b_end_io = end_buffer_read_sync; | 2750 | bh->b_end_io = end_buffer_read_sync; |
2751 | submit_bh(READ_META, bh); | 2751 | submit_bh(READ_META, bh); |
2752 | wait_on_buffer(bh); | 2752 | wait_on_buffer(bh); |
2753 | if (!buffer_uptodate(bh)) { | 2753 | if (!buffer_uptodate(bh)) { |
2754 | ext3_error(inode->i_sb, "ext3_get_inode_loc", | 2754 | ext3_error(inode->i_sb, "ext3_get_inode_loc", |
2755 | "unable to read inode block - " | 2755 | "unable to read inode block - " |
2756 | "inode=%lu, block="E3FSBLK, | 2756 | "inode=%lu, block="E3FSBLK, |
2757 | inode->i_ino, block); | 2757 | inode->i_ino, block); |
2758 | brelse(bh); | 2758 | brelse(bh); |
2759 | return -EIO; | 2759 | return -EIO; |
2760 | } | 2760 | } |
2761 | } | 2761 | } |
2762 | has_buffer: | 2762 | has_buffer: |
2763 | iloc->bh = bh; | 2763 | iloc->bh = bh; |
2764 | return 0; | 2764 | return 0; |
2765 | } | 2765 | } |
2766 | 2766 | ||
2767 | int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) | 2767 | int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) |
2768 | { | 2768 | { |
2769 | /* We have all inode data except xattrs in memory here. */ | 2769 | /* We have all inode data except xattrs in memory here. */ |
2770 | return __ext3_get_inode_loc(inode, iloc, | 2770 | return __ext3_get_inode_loc(inode, iloc, |
2771 | !ext3_test_inode_state(inode, EXT3_STATE_XATTR)); | 2771 | !ext3_test_inode_state(inode, EXT3_STATE_XATTR)); |
2772 | } | 2772 | } |
2773 | 2773 | ||
2774 | void ext3_set_inode_flags(struct inode *inode) | 2774 | void ext3_set_inode_flags(struct inode *inode) |
2775 | { | 2775 | { |
2776 | unsigned int flags = EXT3_I(inode)->i_flags; | 2776 | unsigned int flags = EXT3_I(inode)->i_flags; |
2777 | 2777 | ||
2778 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | 2778 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); |
2779 | if (flags & EXT3_SYNC_FL) | 2779 | if (flags & EXT3_SYNC_FL) |
2780 | inode->i_flags |= S_SYNC; | 2780 | inode->i_flags |= S_SYNC; |
2781 | if (flags & EXT3_APPEND_FL) | 2781 | if (flags & EXT3_APPEND_FL) |
2782 | inode->i_flags |= S_APPEND; | 2782 | inode->i_flags |= S_APPEND; |
2783 | if (flags & EXT3_IMMUTABLE_FL) | 2783 | if (flags & EXT3_IMMUTABLE_FL) |
2784 | inode->i_flags |= S_IMMUTABLE; | 2784 | inode->i_flags |= S_IMMUTABLE; |
2785 | if (flags & EXT3_NOATIME_FL) | 2785 | if (flags & EXT3_NOATIME_FL) |
2786 | inode->i_flags |= S_NOATIME; | 2786 | inode->i_flags |= S_NOATIME; |
2787 | if (flags & EXT3_DIRSYNC_FL) | 2787 | if (flags & EXT3_DIRSYNC_FL) |
2788 | inode->i_flags |= S_DIRSYNC; | 2788 | inode->i_flags |= S_DIRSYNC; |
2789 | } | 2789 | } |
2790 | 2790 | ||
2791 | /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */ | 2791 | /* Propagate flags from i_flags to EXT3_I(inode)->i_flags */ |
2792 | void ext3_get_inode_flags(struct ext3_inode_info *ei) | 2792 | void ext3_get_inode_flags(struct ext3_inode_info *ei) |
2793 | { | 2793 | { |
2794 | unsigned int flags = ei->vfs_inode.i_flags; | 2794 | unsigned int flags = ei->vfs_inode.i_flags; |
2795 | 2795 | ||
2796 | ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL| | 2796 | ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL| |
2797 | EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL); | 2797 | EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL); |
2798 | if (flags & S_SYNC) | 2798 | if (flags & S_SYNC) |
2799 | ei->i_flags |= EXT3_SYNC_FL; | 2799 | ei->i_flags |= EXT3_SYNC_FL; |
2800 | if (flags & S_APPEND) | 2800 | if (flags & S_APPEND) |
2801 | ei->i_flags |= EXT3_APPEND_FL; | 2801 | ei->i_flags |= EXT3_APPEND_FL; |
2802 | if (flags & S_IMMUTABLE) | 2802 | if (flags & S_IMMUTABLE) |
2803 | ei->i_flags |= EXT3_IMMUTABLE_FL; | 2803 | ei->i_flags |= EXT3_IMMUTABLE_FL; |
2804 | if (flags & S_NOATIME) | 2804 | if (flags & S_NOATIME) |
2805 | ei->i_flags |= EXT3_NOATIME_FL; | 2805 | ei->i_flags |= EXT3_NOATIME_FL; |
2806 | if (flags & S_DIRSYNC) | 2806 | if (flags & S_DIRSYNC) |
2807 | ei->i_flags |= EXT3_DIRSYNC_FL; | 2807 | ei->i_flags |= EXT3_DIRSYNC_FL; |
2808 | } | 2808 | } |
2809 | 2809 | ||
2810 | struct inode *ext3_iget(struct super_block *sb, unsigned long ino) | 2810 | struct inode *ext3_iget(struct super_block *sb, unsigned long ino) |
2811 | { | 2811 | { |
2812 | struct ext3_iloc iloc; | 2812 | struct ext3_iloc iloc; |
2813 | struct ext3_inode *raw_inode; | 2813 | struct ext3_inode *raw_inode; |
2814 | struct ext3_inode_info *ei; | 2814 | struct ext3_inode_info *ei; |
2815 | struct buffer_head *bh; | 2815 | struct buffer_head *bh; |
2816 | struct inode *inode; | 2816 | struct inode *inode; |
2817 | journal_t *journal = EXT3_SB(sb)->s_journal; | 2817 | journal_t *journal = EXT3_SB(sb)->s_journal; |
2818 | transaction_t *transaction; | 2818 | transaction_t *transaction; |
2819 | long ret; | 2819 | long ret; |
2820 | int block; | 2820 | int block; |
2821 | 2821 | ||
2822 | inode = iget_locked(sb, ino); | 2822 | inode = iget_locked(sb, ino); |
2823 | if (!inode) | 2823 | if (!inode) |
2824 | return ERR_PTR(-ENOMEM); | 2824 | return ERR_PTR(-ENOMEM); |
2825 | if (!(inode->i_state & I_NEW)) | 2825 | if (!(inode->i_state & I_NEW)) |
2826 | return inode; | 2826 | return inode; |
2827 | 2827 | ||
2828 | ei = EXT3_I(inode); | 2828 | ei = EXT3_I(inode); |
2829 | ei->i_block_alloc_info = NULL; | 2829 | ei->i_block_alloc_info = NULL; |
2830 | 2830 | ||
2831 | ret = __ext3_get_inode_loc(inode, &iloc, 0); | 2831 | ret = __ext3_get_inode_loc(inode, &iloc, 0); |
2832 | if (ret < 0) | 2832 | if (ret < 0) |
2833 | goto bad_inode; | 2833 | goto bad_inode; |
2834 | bh = iloc.bh; | 2834 | bh = iloc.bh; |
2835 | raw_inode = ext3_raw_inode(&iloc); | 2835 | raw_inode = ext3_raw_inode(&iloc); |
2836 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 2836 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
2837 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 2837 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
2838 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 2838 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
2839 | if(!(test_opt (inode->i_sb, NO_UID32))) { | 2839 | if(!(test_opt (inode->i_sb, NO_UID32))) { |
2840 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 2840 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
2841 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 2841 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
2842 | } | 2842 | } |
2843 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 2843 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
2844 | inode->i_size = le32_to_cpu(raw_inode->i_size); | 2844 | inode->i_size = le32_to_cpu(raw_inode->i_size); |
2845 | inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); | 2845 | inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); |
2846 | inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); | 2846 | inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); |
2847 | inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); | 2847 | inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); |
2848 | inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; | 2848 | inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; |
2849 | 2849 | ||
2850 | ei->i_state_flags = 0; | 2850 | ei->i_state_flags = 0; |
2851 | ei->i_dir_start_lookup = 0; | 2851 | ei->i_dir_start_lookup = 0; |
2852 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 2852 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
2853 | /* We now have enough fields to check if the inode was active or not. | 2853 | /* We now have enough fields to check if the inode was active or not. |
2854 | * This is needed because nfsd might try to access dead inodes | 2854 | * This is needed because nfsd might try to access dead inodes |
2855 | * the test is that same one that e2fsck uses | 2855 | * the test is that same one that e2fsck uses |
2856 | * NeilBrown 1999oct15 | 2856 | * NeilBrown 1999oct15 |
2857 | */ | 2857 | */ |
2858 | if (inode->i_nlink == 0) { | 2858 | if (inode->i_nlink == 0) { |
2859 | if (inode->i_mode == 0 || | 2859 | if (inode->i_mode == 0 || |
2860 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { | 2860 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { |
2861 | /* this inode is deleted */ | 2861 | /* this inode is deleted */ |
2862 | brelse (bh); | 2862 | brelse (bh); |
2863 | ret = -ESTALE; | 2863 | ret = -ESTALE; |
2864 | goto bad_inode; | 2864 | goto bad_inode; |
2865 | } | 2865 | } |
2866 | /* The only unlinked inodes we let through here have | 2866 | /* The only unlinked inodes we let through here have |
2867 | * valid i_mode and are being read by the orphan | 2867 | * valid i_mode and are being read by the orphan |
2868 | * recovery code: that's fine, we're about to complete | 2868 | * recovery code: that's fine, we're about to complete |
2869 | * the process of deleting those. */ | 2869 | * the process of deleting those. */ |
2870 | } | 2870 | } |
2871 | inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); | 2871 | inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); |
2872 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); | 2872 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); |
2873 | #ifdef EXT3_FRAGMENTS | 2873 | #ifdef EXT3_FRAGMENTS |
2874 | ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); | 2874 | ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); |
2875 | ei->i_frag_no = raw_inode->i_frag; | 2875 | ei->i_frag_no = raw_inode->i_frag; |
2876 | ei->i_frag_size = raw_inode->i_fsize; | 2876 | ei->i_frag_size = raw_inode->i_fsize; |
2877 | #endif | 2877 | #endif |
2878 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); | 2878 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); |
2879 | if (!S_ISREG(inode->i_mode)) { | 2879 | if (!S_ISREG(inode->i_mode)) { |
2880 | ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); | 2880 | ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); |
2881 | } else { | 2881 | } else { |
2882 | inode->i_size |= | 2882 | inode->i_size |= |
2883 | ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; | 2883 | ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; |
2884 | } | 2884 | } |
2885 | ei->i_disksize = inode->i_size; | 2885 | ei->i_disksize = inode->i_size; |
2886 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 2886 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
2887 | ei->i_block_group = iloc.block_group; | 2887 | ei->i_block_group = iloc.block_group; |
2888 | /* | 2888 | /* |
2889 | * NOTE! The in-memory inode i_data array is in little-endian order | 2889 | * NOTE! The in-memory inode i_data array is in little-endian order |
2890 | * even on big-endian machines: we do NOT byteswap the block numbers! | 2890 | * even on big-endian machines: we do NOT byteswap the block numbers! |
2891 | */ | 2891 | */ |
2892 | for (block = 0; block < EXT3_N_BLOCKS; block++) | 2892 | for (block = 0; block < EXT3_N_BLOCKS; block++) |
2893 | ei->i_data[block] = raw_inode->i_block[block]; | 2893 | ei->i_data[block] = raw_inode->i_block[block]; |
2894 | INIT_LIST_HEAD(&ei->i_orphan); | 2894 | INIT_LIST_HEAD(&ei->i_orphan); |
2895 | 2895 | ||
2896 | /* | 2896 | /* |
2897 | * Set transaction id's of transactions that have to be committed | 2897 | * Set transaction id's of transactions that have to be committed |
2898 | * to finish f[data]sync. We set them to currently running transaction | 2898 | * to finish f[data]sync. We set them to currently running transaction |
2899 | * as we cannot be sure that the inode or some of its metadata isn't | 2899 | * as we cannot be sure that the inode or some of its metadata isn't |
2900 | * part of the transaction - the inode could have been reclaimed and | 2900 | * part of the transaction - the inode could have been reclaimed and |
2901 | * now it is reread from disk. | 2901 | * now it is reread from disk. |
2902 | */ | 2902 | */ |
2903 | if (journal) { | 2903 | if (journal) { |
2904 | tid_t tid; | 2904 | tid_t tid; |
2905 | 2905 | ||
2906 | spin_lock(&journal->j_state_lock); | 2906 | spin_lock(&journal->j_state_lock); |
2907 | if (journal->j_running_transaction) | 2907 | if (journal->j_running_transaction) |
2908 | transaction = journal->j_running_transaction; | 2908 | transaction = journal->j_running_transaction; |
2909 | else | 2909 | else |
2910 | transaction = journal->j_committing_transaction; | 2910 | transaction = journal->j_committing_transaction; |
2911 | if (transaction) | 2911 | if (transaction) |
2912 | tid = transaction->t_tid; | 2912 | tid = transaction->t_tid; |
2913 | else | 2913 | else |
2914 | tid = journal->j_commit_sequence; | 2914 | tid = journal->j_commit_sequence; |
2915 | spin_unlock(&journal->j_state_lock); | 2915 | spin_unlock(&journal->j_state_lock); |
2916 | atomic_set(&ei->i_sync_tid, tid); | 2916 | atomic_set(&ei->i_sync_tid, tid); |
2917 | atomic_set(&ei->i_datasync_tid, tid); | 2917 | atomic_set(&ei->i_datasync_tid, tid); |
2918 | } | 2918 | } |
2919 | 2919 | ||
2920 | if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 && | 2920 | if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 && |
2921 | EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { | 2921 | EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { |
2922 | /* | 2922 | /* |
2923 | * When mke2fs creates big inodes it does not zero out | 2923 | * When mke2fs creates big inodes it does not zero out |
2924 | * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE, | 2924 | * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE, |
2925 | * so ignore those first few inodes. | 2925 | * so ignore those first few inodes. |
2926 | */ | 2926 | */ |
2927 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 2927 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
2928 | if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 2928 | if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
2929 | EXT3_INODE_SIZE(inode->i_sb)) { | 2929 | EXT3_INODE_SIZE(inode->i_sb)) { |
2930 | brelse (bh); | 2930 | brelse (bh); |
2931 | ret = -EIO; | 2931 | ret = -EIO; |
2932 | goto bad_inode; | 2932 | goto bad_inode; |
2933 | } | 2933 | } |
2934 | if (ei->i_extra_isize == 0) { | 2934 | if (ei->i_extra_isize == 0) { |
2935 | /* The extra space is currently unused. Use it. */ | 2935 | /* The extra space is currently unused. Use it. */ |
2936 | ei->i_extra_isize = sizeof(struct ext3_inode) - | 2936 | ei->i_extra_isize = sizeof(struct ext3_inode) - |
2937 | EXT3_GOOD_OLD_INODE_SIZE; | 2937 | EXT3_GOOD_OLD_INODE_SIZE; |
2938 | } else { | 2938 | } else { |
2939 | __le32 *magic = (void *)raw_inode + | 2939 | __le32 *magic = (void *)raw_inode + |
2940 | EXT3_GOOD_OLD_INODE_SIZE + | 2940 | EXT3_GOOD_OLD_INODE_SIZE + |
2941 | ei->i_extra_isize; | 2941 | ei->i_extra_isize; |
2942 | if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC)) | 2942 | if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC)) |
2943 | ext3_set_inode_state(inode, EXT3_STATE_XATTR); | 2943 | ext3_set_inode_state(inode, EXT3_STATE_XATTR); |
2944 | } | 2944 | } |
2945 | } else | 2945 | } else |
2946 | ei->i_extra_isize = 0; | 2946 | ei->i_extra_isize = 0; |
2947 | 2947 | ||
2948 | if (S_ISREG(inode->i_mode)) { | 2948 | if (S_ISREG(inode->i_mode)) { |
2949 | inode->i_op = &ext3_file_inode_operations; | 2949 | inode->i_op = &ext3_file_inode_operations; |
2950 | inode->i_fop = &ext3_file_operations; | 2950 | inode->i_fop = &ext3_file_operations; |
2951 | ext3_set_aops(inode); | 2951 | ext3_set_aops(inode); |
2952 | } else if (S_ISDIR(inode->i_mode)) { | 2952 | } else if (S_ISDIR(inode->i_mode)) { |
2953 | inode->i_op = &ext3_dir_inode_operations; | 2953 | inode->i_op = &ext3_dir_inode_operations; |
2954 | inode->i_fop = &ext3_dir_operations; | 2954 | inode->i_fop = &ext3_dir_operations; |
2955 | } else if (S_ISLNK(inode->i_mode)) { | 2955 | } else if (S_ISLNK(inode->i_mode)) { |
2956 | if (ext3_inode_is_fast_symlink(inode)) { | 2956 | if (ext3_inode_is_fast_symlink(inode)) { |
2957 | inode->i_op = &ext3_fast_symlink_inode_operations; | 2957 | inode->i_op = &ext3_fast_symlink_inode_operations; |
2958 | nd_terminate_link(ei->i_data, inode->i_size, | 2958 | nd_terminate_link(ei->i_data, inode->i_size, |
2959 | sizeof(ei->i_data) - 1); | 2959 | sizeof(ei->i_data) - 1); |
2960 | } else { | 2960 | } else { |
2961 | inode->i_op = &ext3_symlink_inode_operations; | 2961 | inode->i_op = &ext3_symlink_inode_operations; |
2962 | ext3_set_aops(inode); | 2962 | ext3_set_aops(inode); |
2963 | } | 2963 | } |
2964 | } else { | 2964 | } else { |
2965 | inode->i_op = &ext3_special_inode_operations; | 2965 | inode->i_op = &ext3_special_inode_operations; |
2966 | if (raw_inode->i_block[0]) | 2966 | if (raw_inode->i_block[0]) |
2967 | init_special_inode(inode, inode->i_mode, | 2967 | init_special_inode(inode, inode->i_mode, |
2968 | old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); | 2968 | old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); |
2969 | else | 2969 | else |
2970 | init_special_inode(inode, inode->i_mode, | 2970 | init_special_inode(inode, inode->i_mode, |
2971 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 2971 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
2972 | } | 2972 | } |
2973 | brelse (iloc.bh); | 2973 | brelse (iloc.bh); |
2974 | ext3_set_inode_flags(inode); | 2974 | ext3_set_inode_flags(inode); |
2975 | unlock_new_inode(inode); | 2975 | unlock_new_inode(inode); |
2976 | return inode; | 2976 | return inode; |
2977 | 2977 | ||
2978 | bad_inode: | 2978 | bad_inode: |
2979 | iget_failed(inode); | 2979 | iget_failed(inode); |
2980 | return ERR_PTR(ret); | 2980 | return ERR_PTR(ret); |
2981 | } | 2981 | } |
2982 | 2982 | ||
2983 | /* | 2983 | /* |
2984 | * Post the struct inode info into an on-disk inode location in the | 2984 | * Post the struct inode info into an on-disk inode location in the |
2985 | * buffer-cache. This gobbles the caller's reference to the | 2985 | * buffer-cache. This gobbles the caller's reference to the |
2986 | * buffer_head in the inode location struct. | 2986 | * buffer_head in the inode location struct. |
2987 | * | 2987 | * |
2988 | * The caller must have write access to iloc->bh. | 2988 | * The caller must have write access to iloc->bh. |
2989 | */ | 2989 | */ |
2990 | static int ext3_do_update_inode(handle_t *handle, | 2990 | static int ext3_do_update_inode(handle_t *handle, |
2991 | struct inode *inode, | 2991 | struct inode *inode, |
2992 | struct ext3_iloc *iloc) | 2992 | struct ext3_iloc *iloc) |
2993 | { | 2993 | { |
2994 | struct ext3_inode *raw_inode = ext3_raw_inode(iloc); | 2994 | struct ext3_inode *raw_inode = ext3_raw_inode(iloc); |
2995 | struct ext3_inode_info *ei = EXT3_I(inode); | 2995 | struct ext3_inode_info *ei = EXT3_I(inode); |
2996 | struct buffer_head *bh = iloc->bh; | 2996 | struct buffer_head *bh = iloc->bh; |
2997 | int err = 0, rc, block; | 2997 | int err = 0, rc, block; |
2998 | 2998 | ||
2999 | again: | 2999 | again: |
3000 | /* we can't allow multiple procs in here at once, its a bit racey */ | 3000 | /* we can't allow multiple procs in here at once, its a bit racey */ |
3001 | lock_buffer(bh); | 3001 | lock_buffer(bh); |
3002 | 3002 | ||
3003 | /* For fields not not tracking in the in-memory inode, | 3003 | /* For fields not not tracking in the in-memory inode, |
3004 | * initialise them to zero for new inodes. */ | 3004 | * initialise them to zero for new inodes. */ |
3005 | if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) | 3005 | if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) |
3006 | memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); | 3006 | memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); |
3007 | 3007 | ||
3008 | ext3_get_inode_flags(ei); | 3008 | ext3_get_inode_flags(ei); |
3009 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 3009 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
3010 | if(!(test_opt(inode->i_sb, NO_UID32))) { | 3010 | if(!(test_opt(inode->i_sb, NO_UID32))) { |
3011 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); | 3011 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); |
3012 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); | 3012 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); |
3013 | /* | 3013 | /* |
3014 | * Fix up interoperability with old kernels. Otherwise, old inodes get | 3014 | * Fix up interoperability with old kernels. Otherwise, old inodes get |
3015 | * re-used with the upper 16 bits of the uid/gid intact | 3015 | * re-used with the upper 16 bits of the uid/gid intact |
3016 | */ | 3016 | */ |
3017 | if(!ei->i_dtime) { | 3017 | if(!ei->i_dtime) { |
3018 | raw_inode->i_uid_high = | 3018 | raw_inode->i_uid_high = |
3019 | cpu_to_le16(high_16_bits(inode->i_uid)); | 3019 | cpu_to_le16(high_16_bits(inode->i_uid)); |
3020 | raw_inode->i_gid_high = | 3020 | raw_inode->i_gid_high = |
3021 | cpu_to_le16(high_16_bits(inode->i_gid)); | 3021 | cpu_to_le16(high_16_bits(inode->i_gid)); |
3022 | } else { | 3022 | } else { |
3023 | raw_inode->i_uid_high = 0; | 3023 | raw_inode->i_uid_high = 0; |
3024 | raw_inode->i_gid_high = 0; | 3024 | raw_inode->i_gid_high = 0; |
3025 | } | 3025 | } |
3026 | } else { | 3026 | } else { |
3027 | raw_inode->i_uid_low = | 3027 | raw_inode->i_uid_low = |
3028 | cpu_to_le16(fs_high2lowuid(inode->i_uid)); | 3028 | cpu_to_le16(fs_high2lowuid(inode->i_uid)); |
3029 | raw_inode->i_gid_low = | 3029 | raw_inode->i_gid_low = |
3030 | cpu_to_le16(fs_high2lowgid(inode->i_gid)); | 3030 | cpu_to_le16(fs_high2lowgid(inode->i_gid)); |
3031 | raw_inode->i_uid_high = 0; | 3031 | raw_inode->i_uid_high = 0; |
3032 | raw_inode->i_gid_high = 0; | 3032 | raw_inode->i_gid_high = 0; |
3033 | } | 3033 | } |
3034 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | 3034 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); |
3035 | raw_inode->i_size = cpu_to_le32(ei->i_disksize); | 3035 | raw_inode->i_size = cpu_to_le32(ei->i_disksize); |
3036 | raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); | 3036 | raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); |
3037 | raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | 3037 | raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); |
3038 | raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); | 3038 | raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); |
3039 | raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); | 3039 | raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); |
3040 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 3040 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
3041 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); | 3041 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); |
3042 | #ifdef EXT3_FRAGMENTS | 3042 | #ifdef EXT3_FRAGMENTS |
3043 | raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); | 3043 | raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); |
3044 | raw_inode->i_frag = ei->i_frag_no; | 3044 | raw_inode->i_frag = ei->i_frag_no; |
3045 | raw_inode->i_fsize = ei->i_frag_size; | 3045 | raw_inode->i_fsize = ei->i_frag_size; |
3046 | #endif | 3046 | #endif |
3047 | raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); | 3047 | raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); |
3048 | if (!S_ISREG(inode->i_mode)) { | 3048 | if (!S_ISREG(inode->i_mode)) { |
3049 | raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); | 3049 | raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); |
3050 | } else { | 3050 | } else { |
3051 | raw_inode->i_size_high = | 3051 | raw_inode->i_size_high = |
3052 | cpu_to_le32(ei->i_disksize >> 32); | 3052 | cpu_to_le32(ei->i_disksize >> 32); |
3053 | if (ei->i_disksize > 0x7fffffffULL) { | 3053 | if (ei->i_disksize > 0x7fffffffULL) { |
3054 | struct super_block *sb = inode->i_sb; | 3054 | struct super_block *sb = inode->i_sb; |
3055 | if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, | 3055 | if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, |
3056 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE) || | 3056 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE) || |
3057 | EXT3_SB(sb)->s_es->s_rev_level == | 3057 | EXT3_SB(sb)->s_es->s_rev_level == |
3058 | cpu_to_le32(EXT3_GOOD_OLD_REV)) { | 3058 | cpu_to_le32(EXT3_GOOD_OLD_REV)) { |
3059 | /* If this is the first large file | 3059 | /* If this is the first large file |
3060 | * created, add a flag to the superblock. | 3060 | * created, add a flag to the superblock. |
3061 | */ | 3061 | */ |
3062 | unlock_buffer(bh); | 3062 | unlock_buffer(bh); |
3063 | err = ext3_journal_get_write_access(handle, | 3063 | err = ext3_journal_get_write_access(handle, |
3064 | EXT3_SB(sb)->s_sbh); | 3064 | EXT3_SB(sb)->s_sbh); |
3065 | if (err) | 3065 | if (err) |
3066 | goto out_brelse; | 3066 | goto out_brelse; |
3067 | 3067 | ||
3068 | ext3_update_dynamic_rev(sb); | 3068 | ext3_update_dynamic_rev(sb); |
3069 | EXT3_SET_RO_COMPAT_FEATURE(sb, | 3069 | EXT3_SET_RO_COMPAT_FEATURE(sb, |
3070 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE); | 3070 | EXT3_FEATURE_RO_COMPAT_LARGE_FILE); |
3071 | handle->h_sync = 1; | 3071 | handle->h_sync = 1; |
3072 | err = ext3_journal_dirty_metadata(handle, | 3072 | err = ext3_journal_dirty_metadata(handle, |
3073 | EXT3_SB(sb)->s_sbh); | 3073 | EXT3_SB(sb)->s_sbh); |
3074 | /* get our lock and start over */ | 3074 | /* get our lock and start over */ |
3075 | goto again; | 3075 | goto again; |
3076 | } | 3076 | } |
3077 | } | 3077 | } |
3078 | } | 3078 | } |
3079 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | 3079 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
3080 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 3080 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { |
3081 | if (old_valid_dev(inode->i_rdev)) { | 3081 | if (old_valid_dev(inode->i_rdev)) { |
3082 | raw_inode->i_block[0] = | 3082 | raw_inode->i_block[0] = |
3083 | cpu_to_le32(old_encode_dev(inode->i_rdev)); | 3083 | cpu_to_le32(old_encode_dev(inode->i_rdev)); |
3084 | raw_inode->i_block[1] = 0; | 3084 | raw_inode->i_block[1] = 0; |
3085 | } else { | 3085 | } else { |
3086 | raw_inode->i_block[0] = 0; | 3086 | raw_inode->i_block[0] = 0; |
3087 | raw_inode->i_block[1] = | 3087 | raw_inode->i_block[1] = |
3088 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | 3088 | cpu_to_le32(new_encode_dev(inode->i_rdev)); |
3089 | raw_inode->i_block[2] = 0; | 3089 | raw_inode->i_block[2] = 0; |
3090 | } | 3090 | } |
3091 | } else for (block = 0; block < EXT3_N_BLOCKS; block++) | 3091 | } else for (block = 0; block < EXT3_N_BLOCKS; block++) |
3092 | raw_inode->i_block[block] = ei->i_data[block]; | 3092 | raw_inode->i_block[block] = ei->i_data[block]; |
3093 | 3093 | ||
3094 | if (ei->i_extra_isize) | 3094 | if (ei->i_extra_isize) |
3095 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 3095 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
3096 | 3096 | ||
3097 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); | 3097 | BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); |
3098 | unlock_buffer(bh); | 3098 | unlock_buffer(bh); |
3099 | rc = ext3_journal_dirty_metadata(handle, bh); | 3099 | rc = ext3_journal_dirty_metadata(handle, bh); |
3100 | if (!err) | 3100 | if (!err) |
3101 | err = rc; | 3101 | err = rc; |
3102 | ext3_clear_inode_state(inode, EXT3_STATE_NEW); | 3102 | ext3_clear_inode_state(inode, EXT3_STATE_NEW); |
3103 | 3103 | ||
3104 | atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); | 3104 | atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); |
3105 | out_brelse: | 3105 | out_brelse: |
3106 | brelse (bh); | 3106 | brelse (bh); |
3107 | ext3_std_error(inode->i_sb, err); | 3107 | ext3_std_error(inode->i_sb, err); |
3108 | return err; | 3108 | return err; |
3109 | } | 3109 | } |
3110 | 3110 | ||
3111 | /* | 3111 | /* |
3112 | * ext3_write_inode() | 3112 | * ext3_write_inode() |
3113 | * | 3113 | * |
3114 | * We are called from a few places: | 3114 | * We are called from a few places: |
3115 | * | 3115 | * |
3116 | * - Within generic_file_write() for O_SYNC files. | 3116 | * - Within generic_file_write() for O_SYNC files. |
3117 | * Here, there will be no transaction running. We wait for any running | 3117 | * Here, there will be no transaction running. We wait for any running |
3118 | * trasnaction to commit. | 3118 | * trasnaction to commit. |
3119 | * | 3119 | * |
3120 | * - Within sys_sync(), kupdate and such. | 3120 | * - Within sys_sync(), kupdate and such. |
3121 | * We wait on commit, if tol to. | 3121 | * We wait on commit, if tol to. |
3122 | * | 3122 | * |
3123 | * - Within prune_icache() (PF_MEMALLOC == true) | 3123 | * - Within prune_icache() (PF_MEMALLOC == true) |
3124 | * Here we simply return. We can't afford to block kswapd on the | 3124 | * Here we simply return. We can't afford to block kswapd on the |
3125 | * journal commit. | 3125 | * journal commit. |
3126 | * | 3126 | * |
3127 | * In all cases it is actually safe for us to return without doing anything, | 3127 | * In all cases it is actually safe for us to return without doing anything, |
3128 | * because the inode has been copied into a raw inode buffer in | 3128 | * because the inode has been copied into a raw inode buffer in |
3129 | * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for | 3129 | * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for |
3130 | * knfsd. | 3130 | * knfsd. |
3131 | * | 3131 | * |
3132 | * Note that we are absolutely dependent upon all inode dirtiers doing the | 3132 | * Note that we are absolutely dependent upon all inode dirtiers doing the |
3133 | * right thing: they *must* call mark_inode_dirty() after dirtying info in | 3133 | * right thing: they *must* call mark_inode_dirty() after dirtying info in |
3134 | * which we are interested. | 3134 | * which we are interested. |
3135 | * | 3135 | * |
3136 | * It would be a bug for them to not do this. The code: | 3136 | * It would be a bug for them to not do this. The code: |
3137 | * | 3137 | * |
3138 | * mark_inode_dirty(inode) | 3138 | * mark_inode_dirty(inode) |
3139 | * stuff(); | 3139 | * stuff(); |
3140 | * inode->i_size = expr; | 3140 | * inode->i_size = expr; |
3141 | * | 3141 | * |
3142 | * is in error because a kswapd-driven write_inode() could occur while | 3142 | * is in error because a kswapd-driven write_inode() could occur while |
3143 | * `stuff()' is running, and the new i_size will be lost. Plus the inode | 3143 | * `stuff()' is running, and the new i_size will be lost. Plus the inode |
3144 | * will no longer be on the superblock's dirty inode list. | 3144 | * will no longer be on the superblock's dirty inode list. |
3145 | */ | 3145 | */ |
3146 | int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) | 3146 | int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) |
3147 | { | 3147 | { |
3148 | if (current->flags & PF_MEMALLOC) | 3148 | if (current->flags & PF_MEMALLOC) |
3149 | return 0; | 3149 | return 0; |
3150 | 3150 | ||
3151 | if (ext3_journal_current_handle()) { | 3151 | if (ext3_journal_current_handle()) { |
3152 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); | 3152 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); |
3153 | dump_stack(); | 3153 | dump_stack(); |
3154 | return -EIO; | 3154 | return -EIO; |
3155 | } | 3155 | } |
3156 | 3156 | ||
3157 | if (wbc->sync_mode != WB_SYNC_ALL) | 3157 | if (wbc->sync_mode != WB_SYNC_ALL) |
3158 | return 0; | 3158 | return 0; |
3159 | 3159 | ||
3160 | return ext3_force_commit(inode->i_sb); | 3160 | return ext3_force_commit(inode->i_sb); |
3161 | } | 3161 | } |
3162 | 3162 | ||
3163 | /* | 3163 | /* |
3164 | * ext3_setattr() | 3164 | * ext3_setattr() |
3165 | * | 3165 | * |
3166 | * Called from notify_change. | 3166 | * Called from notify_change. |
3167 | * | 3167 | * |
3168 | * We want to trap VFS attempts to truncate the file as soon as | 3168 | * We want to trap VFS attempts to truncate the file as soon as |
3169 | * possible. In particular, we want to make sure that when the VFS | 3169 | * possible. In particular, we want to make sure that when the VFS |
3170 | * shrinks i_size, we put the inode on the orphan list and modify | 3170 | * shrinks i_size, we put the inode on the orphan list and modify |
3171 | * i_disksize immediately, so that during the subsequent flushing of | 3171 | * i_disksize immediately, so that during the subsequent flushing of |
3172 | * dirty pages and freeing of disk blocks, we can guarantee that any | 3172 | * dirty pages and freeing of disk blocks, we can guarantee that any |
3173 | * commit will leave the blocks being flushed in an unused state on | 3173 | * commit will leave the blocks being flushed in an unused state on |
3174 | * disk. (On recovery, the inode will get truncated and the blocks will | 3174 | * disk. (On recovery, the inode will get truncated and the blocks will |
3175 | * be freed, so we have a strong guarantee that no future commit will | 3175 | * be freed, so we have a strong guarantee that no future commit will |
3176 | * leave these blocks visible to the user.) | 3176 | * leave these blocks visible to the user.) |
3177 | * | 3177 | * |
3178 | * Called with inode->sem down. | 3178 | * Called with inode->sem down. |
3179 | */ | 3179 | */ |
3180 | int ext3_setattr(struct dentry *dentry, struct iattr *attr) | 3180 | int ext3_setattr(struct dentry *dentry, struct iattr *attr) |
3181 | { | 3181 | { |
3182 | struct inode *inode = dentry->d_inode; | 3182 | struct inode *inode = dentry->d_inode; |
3183 | int error, rc = 0; | 3183 | int error, rc = 0; |
3184 | const unsigned int ia_valid = attr->ia_valid; | 3184 | const unsigned int ia_valid = attr->ia_valid; |
3185 | 3185 | ||
3186 | error = inode_change_ok(inode, attr); | 3186 | error = inode_change_ok(inode, attr); |
3187 | if (error) | 3187 | if (error) |
3188 | return error; | 3188 | return error; |
3189 | 3189 | ||
3190 | if (is_quota_modification(inode, attr)) | 3190 | if (is_quota_modification(inode, attr)) |
3191 | dquot_initialize(inode); | 3191 | dquot_initialize(inode); |
3192 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 3192 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
3193 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 3193 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
3194 | handle_t *handle; | 3194 | handle_t *handle; |
3195 | 3195 | ||
3196 | /* (user+group)*(old+new) structure, inode write (sb, | 3196 | /* (user+group)*(old+new) structure, inode write (sb, |
3197 | * inode block, ? - but truncate inode update has it) */ | 3197 | * inode block, ? - but truncate inode update has it) */ |
3198 | handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ | 3198 | handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ |
3199 | EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3); | 3199 | EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3); |
3200 | if (IS_ERR(handle)) { | 3200 | if (IS_ERR(handle)) { |
3201 | error = PTR_ERR(handle); | 3201 | error = PTR_ERR(handle); |
3202 | goto err_out; | 3202 | goto err_out; |
3203 | } | 3203 | } |
3204 | error = dquot_transfer(inode, attr); | 3204 | error = dquot_transfer(inode, attr); |
3205 | if (error) { | 3205 | if (error) { |
3206 | ext3_journal_stop(handle); | 3206 | ext3_journal_stop(handle); |
3207 | return error; | 3207 | return error; |
3208 | } | 3208 | } |
3209 | /* Update corresponding info in inode so that everything is in | 3209 | /* Update corresponding info in inode so that everything is in |
3210 | * one transaction */ | 3210 | * one transaction */ |
3211 | if (attr->ia_valid & ATTR_UID) | 3211 | if (attr->ia_valid & ATTR_UID) |
3212 | inode->i_uid = attr->ia_uid; | 3212 | inode->i_uid = attr->ia_uid; |
3213 | if (attr->ia_valid & ATTR_GID) | 3213 | if (attr->ia_valid & ATTR_GID) |
3214 | inode->i_gid = attr->ia_gid; | 3214 | inode->i_gid = attr->ia_gid; |
3215 | error = ext3_mark_inode_dirty(handle, inode); | 3215 | error = ext3_mark_inode_dirty(handle, inode); |
3216 | ext3_journal_stop(handle); | 3216 | ext3_journal_stop(handle); |
3217 | } | 3217 | } |
3218 | 3218 | ||
3219 | if (attr->ia_valid & ATTR_SIZE) | ||
3220 | inode_dio_wait(inode); | ||
3221 | |||
3219 | if (S_ISREG(inode->i_mode) && | 3222 | if (S_ISREG(inode->i_mode) && |
3220 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { | 3223 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { |
3221 | handle_t *handle; | 3224 | handle_t *handle; |
3222 | 3225 | ||
3223 | handle = ext3_journal_start(inode, 3); | 3226 | handle = ext3_journal_start(inode, 3); |
3224 | if (IS_ERR(handle)) { | 3227 | if (IS_ERR(handle)) { |
3225 | error = PTR_ERR(handle); | 3228 | error = PTR_ERR(handle); |
3226 | goto err_out; | 3229 | goto err_out; |
3227 | } | 3230 | } |
3228 | 3231 | ||
3229 | error = ext3_orphan_add(handle, inode); | 3232 | error = ext3_orphan_add(handle, inode); |
3230 | EXT3_I(inode)->i_disksize = attr->ia_size; | 3233 | EXT3_I(inode)->i_disksize = attr->ia_size; |
3231 | rc = ext3_mark_inode_dirty(handle, inode); | 3234 | rc = ext3_mark_inode_dirty(handle, inode); |
3232 | if (!error) | 3235 | if (!error) |
3233 | error = rc; | 3236 | error = rc; |
3234 | ext3_journal_stop(handle); | 3237 | ext3_journal_stop(handle); |
3235 | } | 3238 | } |
3236 | 3239 | ||
3237 | if ((attr->ia_valid & ATTR_SIZE) && | 3240 | if ((attr->ia_valid & ATTR_SIZE) && |
3238 | attr->ia_size != i_size_read(inode)) { | 3241 | attr->ia_size != i_size_read(inode)) { |
3239 | rc = vmtruncate(inode, attr->ia_size); | 3242 | rc = vmtruncate(inode, attr->ia_size); |
3240 | if (rc) | 3243 | if (rc) |
3241 | goto err_out; | 3244 | goto err_out; |
3242 | } | 3245 | } |
3243 | 3246 | ||
3244 | setattr_copy(inode, attr); | 3247 | setattr_copy(inode, attr); |
3245 | mark_inode_dirty(inode); | 3248 | mark_inode_dirty(inode); |
3246 | 3249 | ||
3247 | if (ia_valid & ATTR_MODE) | 3250 | if (ia_valid & ATTR_MODE) |
3248 | rc = ext3_acl_chmod(inode); | 3251 | rc = ext3_acl_chmod(inode); |
3249 | 3252 | ||
3250 | err_out: | 3253 | err_out: |
3251 | ext3_std_error(inode->i_sb, error); | 3254 | ext3_std_error(inode->i_sb, error); |
3252 | if (!error) | 3255 | if (!error) |
3253 | error = rc; | 3256 | error = rc; |
3254 | return error; | 3257 | return error; |
3255 | } | 3258 | } |
3256 | 3259 | ||
3257 | 3260 | ||
3258 | /* | 3261 | /* |
3259 | * How many blocks doth make a writepage()? | 3262 | * How many blocks doth make a writepage()? |
3260 | * | 3263 | * |
3261 | * With N blocks per page, it may be: | 3264 | * With N blocks per page, it may be: |
3262 | * N data blocks | 3265 | * N data blocks |
3263 | * 2 indirect block | 3266 | * 2 indirect block |
3264 | * 2 dindirect | 3267 | * 2 dindirect |
3265 | * 1 tindirect | 3268 | * 1 tindirect |
3266 | * N+5 bitmap blocks (from the above) | 3269 | * N+5 bitmap blocks (from the above) |
3267 | * N+5 group descriptor summary blocks | 3270 | * N+5 group descriptor summary blocks |
3268 | * 1 inode block | 3271 | * 1 inode block |
3269 | * 1 superblock. | 3272 | * 1 superblock. |
3270 | * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files | 3273 | * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files |
3271 | * | 3274 | * |
3272 | * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS | 3275 | * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS |
3273 | * | 3276 | * |
3274 | * With ordered or writeback data it's the same, less the N data blocks. | 3277 | * With ordered or writeback data it's the same, less the N data blocks. |
3275 | * | 3278 | * |
3276 | * If the inode's direct blocks can hold an integral number of pages then a | 3279 | * If the inode's direct blocks can hold an integral number of pages then a |
3277 | * page cannot straddle two indirect blocks, and we can only touch one indirect | 3280 | * page cannot straddle two indirect blocks, and we can only touch one indirect |
3278 | * and dindirect block, and the "5" above becomes "3". | 3281 | * and dindirect block, and the "5" above becomes "3". |
3279 | * | 3282 | * |
3280 | * This still overestimates under most circumstances. If we were to pass the | 3283 | * This still overestimates under most circumstances. If we were to pass the |
3281 | * start and end offsets in here as well we could do block_to_path() on each | 3284 | * start and end offsets in here as well we could do block_to_path() on each |
3282 | * block and work out the exact number of indirects which are touched. Pah. | 3285 | * block and work out the exact number of indirects which are touched. Pah. |
3283 | */ | 3286 | */ |
3284 | 3287 | ||
3285 | static int ext3_writepage_trans_blocks(struct inode *inode) | 3288 | static int ext3_writepage_trans_blocks(struct inode *inode) |
3286 | { | 3289 | { |
3287 | int bpp = ext3_journal_blocks_per_page(inode); | 3290 | int bpp = ext3_journal_blocks_per_page(inode); |
3288 | int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; | 3291 | int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; |
3289 | int ret; | 3292 | int ret; |
3290 | 3293 | ||
3291 | if (ext3_should_journal_data(inode)) | 3294 | if (ext3_should_journal_data(inode)) |
3292 | ret = 3 * (bpp + indirects) + 2; | 3295 | ret = 3 * (bpp + indirects) + 2; |
3293 | else | 3296 | else |
3294 | ret = 2 * (bpp + indirects) + indirects + 2; | 3297 | ret = 2 * (bpp + indirects) + indirects + 2; |
3295 | 3298 | ||
3296 | #ifdef CONFIG_QUOTA | 3299 | #ifdef CONFIG_QUOTA |
3297 | /* We know that structure was already allocated during dquot_initialize so | 3300 | /* We know that structure was already allocated during dquot_initialize so |
3298 | * we will be updating only the data blocks + inodes */ | 3301 | * we will be updating only the data blocks + inodes */ |
3299 | ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); | 3302 | ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); |
3300 | #endif | 3303 | #endif |
3301 | 3304 | ||
3302 | return ret; | 3305 | return ret; |
3303 | } | 3306 | } |
3304 | 3307 | ||
3305 | /* | 3308 | /* |
3306 | * The caller must have previously called ext3_reserve_inode_write(). | 3309 | * The caller must have previously called ext3_reserve_inode_write(). |
3307 | * Give this, we know that the caller already has write access to iloc->bh. | 3310 | * Give this, we know that the caller already has write access to iloc->bh. |
3308 | */ | 3311 | */ |
3309 | int ext3_mark_iloc_dirty(handle_t *handle, | 3312 | int ext3_mark_iloc_dirty(handle_t *handle, |
3310 | struct inode *inode, struct ext3_iloc *iloc) | 3313 | struct inode *inode, struct ext3_iloc *iloc) |
3311 | { | 3314 | { |
3312 | int err = 0; | 3315 | int err = 0; |
3313 | 3316 | ||
3314 | /* the do_update_inode consumes one bh->b_count */ | 3317 | /* the do_update_inode consumes one bh->b_count */ |
3315 | get_bh(iloc->bh); | 3318 | get_bh(iloc->bh); |
3316 | 3319 | ||
3317 | /* ext3_do_update_inode() does journal_dirty_metadata */ | 3320 | /* ext3_do_update_inode() does journal_dirty_metadata */ |
3318 | err = ext3_do_update_inode(handle, inode, iloc); | 3321 | err = ext3_do_update_inode(handle, inode, iloc); |
3319 | put_bh(iloc->bh); | 3322 | put_bh(iloc->bh); |
3320 | return err; | 3323 | return err; |
3321 | } | 3324 | } |
3322 | 3325 | ||
3323 | /* | 3326 | /* |
3324 | * On success, We end up with an outstanding reference count against | 3327 | * On success, We end up with an outstanding reference count against |
3325 | * iloc->bh. This _must_ be cleaned up later. | 3328 | * iloc->bh. This _must_ be cleaned up later. |
3326 | */ | 3329 | */ |
3327 | 3330 | ||
3328 | int | 3331 | int |
3329 | ext3_reserve_inode_write(handle_t *handle, struct inode *inode, | 3332 | ext3_reserve_inode_write(handle_t *handle, struct inode *inode, |
3330 | struct ext3_iloc *iloc) | 3333 | struct ext3_iloc *iloc) |
3331 | { | 3334 | { |
3332 | int err = 0; | 3335 | int err = 0; |
3333 | if (handle) { | 3336 | if (handle) { |
3334 | err = ext3_get_inode_loc(inode, iloc); | 3337 | err = ext3_get_inode_loc(inode, iloc); |
3335 | if (!err) { | 3338 | if (!err) { |
3336 | BUFFER_TRACE(iloc->bh, "get_write_access"); | 3339 | BUFFER_TRACE(iloc->bh, "get_write_access"); |
3337 | err = ext3_journal_get_write_access(handle, iloc->bh); | 3340 | err = ext3_journal_get_write_access(handle, iloc->bh); |
3338 | if (err) { | 3341 | if (err) { |
3339 | brelse(iloc->bh); | 3342 | brelse(iloc->bh); |
3340 | iloc->bh = NULL; | 3343 | iloc->bh = NULL; |
3341 | } | 3344 | } |
3342 | } | 3345 | } |
3343 | } | 3346 | } |
3344 | ext3_std_error(inode->i_sb, err); | 3347 | ext3_std_error(inode->i_sb, err); |
3345 | return err; | 3348 | return err; |
3346 | } | 3349 | } |
3347 | 3350 | ||
3348 | /* | 3351 | /* |
3349 | * What we do here is to mark the in-core inode as clean with respect to inode | 3352 | * What we do here is to mark the in-core inode as clean with respect to inode |
3350 | * dirtiness (it may still be data-dirty). | 3353 | * dirtiness (it may still be data-dirty). |
3351 | * This means that the in-core inode may be reaped by prune_icache | 3354 | * This means that the in-core inode may be reaped by prune_icache |
3352 | * without having to perform any I/O. This is a very good thing, | 3355 | * without having to perform any I/O. This is a very good thing, |
3353 | * because *any* task may call prune_icache - even ones which | 3356 | * because *any* task may call prune_icache - even ones which |
3354 | * have a transaction open against a different journal. | 3357 | * have a transaction open against a different journal. |
3355 | * | 3358 | * |
3356 | * Is this cheating? Not really. Sure, we haven't written the | 3359 | * Is this cheating? Not really. Sure, we haven't written the |
3357 | * inode out, but prune_icache isn't a user-visible syncing function. | 3360 | * inode out, but prune_icache isn't a user-visible syncing function. |
3358 | * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) | 3361 | * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) |
3359 | * we start and wait on commits. | 3362 | * we start and wait on commits. |
3360 | * | 3363 | * |
3361 | * Is this efficient/effective? Well, we're being nice to the system | 3364 | * Is this efficient/effective? Well, we're being nice to the system |
3362 | * by cleaning up our inodes proactively so they can be reaped | 3365 | * by cleaning up our inodes proactively so they can be reaped |
3363 | * without I/O. But we are potentially leaving up to five seconds' | 3366 | * without I/O. But we are potentially leaving up to five seconds' |
3364 | * worth of inodes floating about which prune_icache wants us to | 3367 | * worth of inodes floating about which prune_icache wants us to |
3365 | * write out. One way to fix that would be to get prune_icache() | 3368 | * write out. One way to fix that would be to get prune_icache() |
3366 | * to do a write_super() to free up some memory. It has the desired | 3369 | * to do a write_super() to free up some memory. It has the desired |
3367 | * effect. | 3370 | * effect. |
3368 | */ | 3371 | */ |
3369 | int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) | 3372 | int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) |
3370 | { | 3373 | { |
3371 | struct ext3_iloc iloc; | 3374 | struct ext3_iloc iloc; |
3372 | int err; | 3375 | int err; |
3373 | 3376 | ||
3374 | might_sleep(); | 3377 | might_sleep(); |
3375 | err = ext3_reserve_inode_write(handle, inode, &iloc); | 3378 | err = ext3_reserve_inode_write(handle, inode, &iloc); |
3376 | if (!err) | 3379 | if (!err) |
3377 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); | 3380 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); |
3378 | return err; | 3381 | return err; |
3379 | } | 3382 | } |
3380 | 3383 | ||
3381 | /* | 3384 | /* |
3382 | * ext3_dirty_inode() is called from __mark_inode_dirty() | 3385 | * ext3_dirty_inode() is called from __mark_inode_dirty() |
3383 | * | 3386 | * |
3384 | * We're really interested in the case where a file is being extended. | 3387 | * We're really interested in the case where a file is being extended. |
3385 | * i_size has been changed by generic_commit_write() and we thus need | 3388 | * i_size has been changed by generic_commit_write() and we thus need |
3386 | * to include the updated inode in the current transaction. | 3389 | * to include the updated inode in the current transaction. |
3387 | * | 3390 | * |
3388 | * Also, dquot_alloc_space() will always dirty the inode when blocks | 3391 | * Also, dquot_alloc_space() will always dirty the inode when blocks |
3389 | * are allocated to the file. | 3392 | * are allocated to the file. |
3390 | * | 3393 | * |
3391 | * If the inode is marked synchronous, we don't honour that here - doing | 3394 | * If the inode is marked synchronous, we don't honour that here - doing |
3392 | * so would cause a commit on atime updates, which we don't bother doing. | 3395 | * so would cause a commit on atime updates, which we don't bother doing. |
3393 | * We handle synchronous inodes at the highest possible level. | 3396 | * We handle synchronous inodes at the highest possible level. |
3394 | */ | 3397 | */ |
3395 | void ext3_dirty_inode(struct inode *inode, int flags) | 3398 | void ext3_dirty_inode(struct inode *inode, int flags) |
3396 | { | 3399 | { |
3397 | handle_t *current_handle = ext3_journal_current_handle(); | 3400 | handle_t *current_handle = ext3_journal_current_handle(); |
3398 | handle_t *handle; | 3401 | handle_t *handle; |
3399 | 3402 | ||
3400 | handle = ext3_journal_start(inode, 2); | 3403 | handle = ext3_journal_start(inode, 2); |
3401 | if (IS_ERR(handle)) | 3404 | if (IS_ERR(handle)) |
3402 | goto out; | 3405 | goto out; |
3403 | if (current_handle && | 3406 | if (current_handle && |
3404 | current_handle->h_transaction != handle->h_transaction) { | 3407 | current_handle->h_transaction != handle->h_transaction) { |
3405 | /* This task has a transaction open against a different fs */ | 3408 | /* This task has a transaction open against a different fs */ |
3406 | printk(KERN_EMERG "%s: transactions do not match!\n", | 3409 | printk(KERN_EMERG "%s: transactions do not match!\n", |
3407 | __func__); | 3410 | __func__); |
3408 | } else { | 3411 | } else { |
3409 | jbd_debug(5, "marking dirty. outer handle=%p\n", | 3412 | jbd_debug(5, "marking dirty. outer handle=%p\n", |
3410 | current_handle); | 3413 | current_handle); |
3411 | ext3_mark_inode_dirty(handle, inode); | 3414 | ext3_mark_inode_dirty(handle, inode); |
3412 | } | 3415 | } |
3413 | ext3_journal_stop(handle); | 3416 | ext3_journal_stop(handle); |
3414 | out: | 3417 | out: |
3415 | return; | 3418 | return; |
3416 | } | 3419 | } |
3417 | 3420 | ||
3418 | #if 0 | 3421 | #if 0 |
3419 | /* | 3422 | /* |
3420 | * Bind an inode's backing buffer_head into this transaction, to prevent | 3423 | * Bind an inode's backing buffer_head into this transaction, to prevent |
3421 | * it from being flushed to disk early. Unlike | 3424 | * it from being flushed to disk early. Unlike |
3422 | * ext3_reserve_inode_write, this leaves behind no bh reference and | 3425 | * ext3_reserve_inode_write, this leaves behind no bh reference and |
3423 | * returns no iloc structure, so the caller needs to repeat the iloc | 3426 | * returns no iloc structure, so the caller needs to repeat the iloc |
3424 | * lookup to mark the inode dirty later. | 3427 | * lookup to mark the inode dirty later. |
3425 | */ | 3428 | */ |
3426 | static int ext3_pin_inode(handle_t *handle, struct inode *inode) | 3429 | static int ext3_pin_inode(handle_t *handle, struct inode *inode) |
3427 | { | 3430 | { |
3428 | struct ext3_iloc iloc; | 3431 | struct ext3_iloc iloc; |
3429 | 3432 | ||
3430 | int err = 0; | 3433 | int err = 0; |
3431 | if (handle) { | 3434 | if (handle) { |
3432 | err = ext3_get_inode_loc(inode, &iloc); | 3435 | err = ext3_get_inode_loc(inode, &iloc); |
3433 | if (!err) { | 3436 | if (!err) { |
3434 | BUFFER_TRACE(iloc.bh, "get_write_access"); | 3437 | BUFFER_TRACE(iloc.bh, "get_write_access"); |
3435 | err = journal_get_write_access(handle, iloc.bh); | 3438 | err = journal_get_write_access(handle, iloc.bh); |
3436 | if (!err) | 3439 | if (!err) |
3437 | err = ext3_journal_dirty_metadata(handle, | 3440 | err = ext3_journal_dirty_metadata(handle, |
3438 | iloc.bh); | 3441 | iloc.bh); |
3439 | brelse(iloc.bh); | 3442 | brelse(iloc.bh); |
3440 | } | 3443 | } |
3441 | } | 3444 | } |
3442 | ext3_std_error(inode->i_sb, err); | 3445 | ext3_std_error(inode->i_sb, err); |
3443 | return err; | 3446 | return err; |
3444 | } | 3447 | } |
3445 | #endif | 3448 | #endif |
3446 | 3449 | ||
3447 | int ext3_change_inode_journal_flag(struct inode *inode, int val) | 3450 | int ext3_change_inode_journal_flag(struct inode *inode, int val) |
3448 | { | 3451 | { |
3449 | journal_t *journal; | 3452 | journal_t *journal; |
3450 | handle_t *handle; | 3453 | handle_t *handle; |
3451 | int err; | 3454 | int err; |
3452 | 3455 | ||
3453 | /* | 3456 | /* |
3454 | * We have to be very careful here: changing a data block's | 3457 | * We have to be very careful here: changing a data block's |
3455 | * journaling status dynamically is dangerous. If we write a | 3458 | * journaling status dynamically is dangerous. If we write a |
3456 | * data block to the journal, change the status and then delete | 3459 | * data block to the journal, change the status and then delete |
3457 | * that block, we risk forgetting to revoke the old log record | 3460 | * that block, we risk forgetting to revoke the old log record |
3458 | * from the journal and so a subsequent replay can corrupt data. | 3461 | * from the journal and so a subsequent replay can corrupt data. |
3459 | * So, first we make sure that the journal is empty and that | 3462 | * So, first we make sure that the journal is empty and that |
3460 | * nobody is changing anything. | 3463 | * nobody is changing anything. |
3461 | */ | 3464 | */ |
3462 | 3465 | ||
3463 | journal = EXT3_JOURNAL(inode); | 3466 | journal = EXT3_JOURNAL(inode); |
3464 | if (is_journal_aborted(journal)) | 3467 | if (is_journal_aborted(journal)) |
3465 | return -EROFS; | 3468 | return -EROFS; |
3466 | 3469 | ||
3467 | journal_lock_updates(journal); | 3470 | journal_lock_updates(journal); |
3468 | journal_flush(journal); | 3471 | journal_flush(journal); |
3469 | 3472 | ||
3470 | /* | 3473 | /* |
3471 | * OK, there are no updates running now, and all cached data is | 3474 | * OK, there are no updates running now, and all cached data is |
3472 | * synced to disk. We are now in a completely consistent state | 3475 | * synced to disk. We are now in a completely consistent state |
3473 | * which doesn't have anything in the journal, and we know that | 3476 | * which doesn't have anything in the journal, and we know that |
3474 | * no filesystem updates are running, so it is safe to modify | 3477 | * no filesystem updates are running, so it is safe to modify |
3475 | * the inode's in-core data-journaling state flag now. | 3478 | * the inode's in-core data-journaling state flag now. |
3476 | */ | 3479 | */ |
3477 | 3480 | ||
3478 | if (val) | 3481 | if (val) |
3479 | EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; | 3482 | EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; |
3480 | else | 3483 | else |
3481 | EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL; | 3484 | EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL; |
3482 | ext3_set_aops(inode); | 3485 | ext3_set_aops(inode); |
3483 | 3486 | ||
3484 | journal_unlock_updates(journal); | 3487 | journal_unlock_updates(journal); |
3485 | 3488 | ||
3486 | /* Finally we can mark the inode as dirty. */ | 3489 | /* Finally we can mark the inode as dirty. */ |
3487 | 3490 | ||
3488 | handle = ext3_journal_start(inode, 1); | 3491 | handle = ext3_journal_start(inode, 1); |
3489 | if (IS_ERR(handle)) | 3492 | if (IS_ERR(handle)) |
3490 | return PTR_ERR(handle); | 3493 | return PTR_ERR(handle); |
3491 | 3494 | ||
3492 | err = ext3_mark_inode_dirty(handle, inode); | 3495 | err = ext3_mark_inode_dirty(handle, inode); |
3493 | handle->h_sync = 1; | 3496 | handle->h_sync = 1; |
3494 | ext3_journal_stop(handle); | 3497 | ext3_journal_stop(handle); |
3495 | ext3_std_error(inode->i_sb, err); | 3498 | ext3_std_error(inode->i_sb, err); |
3496 | 3499 | ||
3497 | return err; | 3500 | return err; |
3498 | } | 3501 | } |
3499 | 3502 |
fs/ext4/inode.c
1 | /* | 1 | /* |
2 | * linux/fs/ext4/inode.c | 2 | * linux/fs/ext4/inode.c |
3 | * | 3 | * |
4 | * Copyright (C) 1992, 1993, 1994, 1995 | 4 | * Copyright (C) 1992, 1993, 1994, 1995 |
5 | * Remy Card (card@masi.ibp.fr) | 5 | * Remy Card (card@masi.ibp.fr) |
6 | * Laboratoire MASI - Institut Blaise Pascal | 6 | * Laboratoire MASI - Institut Blaise Pascal |
7 | * Universite Pierre et Marie Curie (Paris VI) | 7 | * Universite Pierre et Marie Curie (Paris VI) |
8 | * | 8 | * |
9 | * from | 9 | * from |
10 | * | 10 | * |
11 | * linux/fs/minix/inode.c | 11 | * linux/fs/minix/inode.c |
12 | * | 12 | * |
13 | * Copyright (C) 1991, 1992 Linus Torvalds | 13 | * Copyright (C) 1991, 1992 Linus Torvalds |
14 | * | 14 | * |
15 | * Goal-directed block allocation by Stephen Tweedie | 15 | * Goal-directed block allocation by Stephen Tweedie |
16 | * (sct@redhat.com), 1993, 1998 | 16 | * (sct@redhat.com), 1993, 1998 |
17 | * Big-endian to little-endian byte-swapping/bitmaps by | 17 | * Big-endian to little-endian byte-swapping/bitmaps by |
18 | * David S. Miller (davem@caip.rutgers.edu), 1995 | 18 | * David S. Miller (davem@caip.rutgers.edu), 1995 |
19 | * 64-bit file support on 64-bit platforms by Jakub Jelinek | 19 | * 64-bit file support on 64-bit platforms by Jakub Jelinek |
20 | * (jj@sunsite.ms.mff.cuni.cz) | 20 | * (jj@sunsite.ms.mff.cuni.cz) |
21 | * | 21 | * |
22 | * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 | 22 | * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/time.h> | 27 | #include <linux/time.h> |
28 | #include <linux/jbd2.h> | 28 | #include <linux/jbd2.h> |
29 | #include <linux/highuid.h> | 29 | #include <linux/highuid.h> |
30 | #include <linux/pagemap.h> | 30 | #include <linux/pagemap.h> |
31 | #include <linux/quotaops.h> | 31 | #include <linux/quotaops.h> |
32 | #include <linux/string.h> | 32 | #include <linux/string.h> |
33 | #include <linux/buffer_head.h> | 33 | #include <linux/buffer_head.h> |
34 | #include <linux/writeback.h> | 34 | #include <linux/writeback.h> |
35 | #include <linux/pagevec.h> | 35 | #include <linux/pagevec.h> |
36 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
37 | #include <linux/namei.h> | 37 | #include <linux/namei.h> |
38 | #include <linux/uio.h> | 38 | #include <linux/uio.h> |
39 | #include <linux/bio.h> | 39 | #include <linux/bio.h> |
40 | #include <linux/workqueue.h> | 40 | #include <linux/workqueue.h> |
41 | #include <linux/kernel.h> | 41 | #include <linux/kernel.h> |
42 | #include <linux/printk.h> | 42 | #include <linux/printk.h> |
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/ratelimit.h> | 44 | #include <linux/ratelimit.h> |
45 | 45 | ||
46 | #include "ext4_jbd2.h" | 46 | #include "ext4_jbd2.h" |
47 | #include "xattr.h" | 47 | #include "xattr.h" |
48 | #include "acl.h" | 48 | #include "acl.h" |
49 | #include "ext4_extents.h" | 49 | #include "ext4_extents.h" |
50 | 50 | ||
51 | #include <trace/events/ext4.h> | 51 | #include <trace/events/ext4.h> |
52 | 52 | ||
53 | #define MPAGE_DA_EXTENT_TAIL 0x01 | 53 | #define MPAGE_DA_EXTENT_TAIL 0x01 |
54 | 54 | ||
55 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 55 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
56 | loff_t new_size) | 56 | loff_t new_size) |
57 | { | 57 | { |
58 | trace_ext4_begin_ordered_truncate(inode, new_size); | 58 | trace_ext4_begin_ordered_truncate(inode, new_size); |
59 | /* | 59 | /* |
60 | * If jinode is zero, then we never opened the file for | 60 | * If jinode is zero, then we never opened the file for |
61 | * writing, so there's no need to call | 61 | * writing, so there's no need to call |
62 | * jbd2_journal_begin_ordered_truncate() since there's no | 62 | * jbd2_journal_begin_ordered_truncate() since there's no |
63 | * outstanding writes we need to flush. | 63 | * outstanding writes we need to flush. |
64 | */ | 64 | */ |
65 | if (!EXT4_I(inode)->jinode) | 65 | if (!EXT4_I(inode)->jinode) |
66 | return 0; | 66 | return 0; |
67 | return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), | 67 | return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), |
68 | EXT4_I(inode)->jinode, | 68 | EXT4_I(inode)->jinode, |
69 | new_size); | 69 | new_size); |
70 | } | 70 | } |
71 | 71 | ||
72 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 72 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
73 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | 73 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, |
74 | struct buffer_head *bh_result, int create); | 74 | struct buffer_head *bh_result, int create); |
75 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | 75 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); |
76 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | 76 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); |
77 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | 77 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); |
78 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | 78 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Test whether an inode is a fast symlink. | 81 | * Test whether an inode is a fast symlink. |
82 | */ | 82 | */ |
83 | static int ext4_inode_is_fast_symlink(struct inode *inode) | 83 | static int ext4_inode_is_fast_symlink(struct inode *inode) |
84 | { | 84 | { |
85 | int ea_blocks = EXT4_I(inode)->i_file_acl ? | 85 | int ea_blocks = EXT4_I(inode)->i_file_acl ? |
86 | (inode->i_sb->s_blocksize >> 9) : 0; | 86 | (inode->i_sb->s_blocksize >> 9) : 0; |
87 | 87 | ||
88 | return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); | 88 | return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); |
89 | } | 89 | } |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Work out how many blocks we need to proceed with the next chunk of a | 92 | * Work out how many blocks we need to proceed with the next chunk of a |
93 | * truncate transaction. | 93 | * truncate transaction. |
94 | */ | 94 | */ |
95 | static unsigned long blocks_for_truncate(struct inode *inode) | 95 | static unsigned long blocks_for_truncate(struct inode *inode) |
96 | { | 96 | { |
97 | ext4_lblk_t needed; | 97 | ext4_lblk_t needed; |
98 | 98 | ||
99 | needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); | 99 | needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); |
100 | 100 | ||
101 | /* Give ourselves just enough room to cope with inodes in which | 101 | /* Give ourselves just enough room to cope with inodes in which |
102 | * i_blocks is corrupt: we've seen disk corruptions in the past | 102 | * i_blocks is corrupt: we've seen disk corruptions in the past |
103 | * which resulted in random data in an inode which looked enough | 103 | * which resulted in random data in an inode which looked enough |
104 | * like a regular file for ext4 to try to delete it. Things | 104 | * like a regular file for ext4 to try to delete it. Things |
105 | * will go a bit crazy if that happens, but at least we should | 105 | * will go a bit crazy if that happens, but at least we should |
106 | * try not to panic the whole kernel. */ | 106 | * try not to panic the whole kernel. */ |
107 | if (needed < 2) | 107 | if (needed < 2) |
108 | needed = 2; | 108 | needed = 2; |
109 | 109 | ||
110 | /* But we need to bound the transaction so we don't overflow the | 110 | /* But we need to bound the transaction so we don't overflow the |
111 | * journal. */ | 111 | * journal. */ |
112 | if (needed > EXT4_MAX_TRANS_DATA) | 112 | if (needed > EXT4_MAX_TRANS_DATA) |
113 | needed = EXT4_MAX_TRANS_DATA; | 113 | needed = EXT4_MAX_TRANS_DATA; |
114 | 114 | ||
115 | return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; | 115 | return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; |
116 | } | 116 | } |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * Truncate transactions can be complex and absolutely huge. So we need to | 119 | * Truncate transactions can be complex and absolutely huge. So we need to |
120 | * be able to restart the transaction at a conventient checkpoint to make | 120 | * be able to restart the transaction at a conventient checkpoint to make |
121 | * sure we don't overflow the journal. | 121 | * sure we don't overflow the journal. |
122 | * | 122 | * |
123 | * start_transaction gets us a new handle for a truncate transaction, | 123 | * start_transaction gets us a new handle for a truncate transaction, |
124 | * and extend_transaction tries to extend the existing one a bit. If | 124 | * and extend_transaction tries to extend the existing one a bit. If |
125 | * extend fails, we need to propagate the failure up and restart the | 125 | * extend fails, we need to propagate the failure up and restart the |
126 | * transaction in the top-level truncate loop. --sct | 126 | * transaction in the top-level truncate loop. --sct |
127 | */ | 127 | */ |
128 | static handle_t *start_transaction(struct inode *inode) | 128 | static handle_t *start_transaction(struct inode *inode) |
129 | { | 129 | { |
130 | handle_t *result; | 130 | handle_t *result; |
131 | 131 | ||
132 | result = ext4_journal_start(inode, blocks_for_truncate(inode)); | 132 | result = ext4_journal_start(inode, blocks_for_truncate(inode)); |
133 | if (!IS_ERR(result)) | 133 | if (!IS_ERR(result)) |
134 | return result; | 134 | return result; |
135 | 135 | ||
136 | ext4_std_error(inode->i_sb, PTR_ERR(result)); | 136 | ext4_std_error(inode->i_sb, PTR_ERR(result)); |
137 | return result; | 137 | return result; |
138 | } | 138 | } |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * Try to extend this transaction for the purposes of truncation. | 141 | * Try to extend this transaction for the purposes of truncation. |
142 | * | 142 | * |
143 | * Returns 0 if we managed to create more room. If we can't create more | 143 | * Returns 0 if we managed to create more room. If we can't create more |
144 | * room, and the transaction must be restarted we return 1. | 144 | * room, and the transaction must be restarted we return 1. |
145 | */ | 145 | */ |
146 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | 146 | static int try_to_extend_transaction(handle_t *handle, struct inode *inode) |
147 | { | 147 | { |
148 | if (!ext4_handle_valid(handle)) | 148 | if (!ext4_handle_valid(handle)) |
149 | return 0; | 149 | return 0; |
150 | if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) | 150 | if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) |
151 | return 0; | 151 | return 0; |
152 | if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) | 152 | if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) |
153 | return 0; | 153 | return 0; |
154 | return 1; | 154 | return 1; |
155 | } | 155 | } |
156 | 156 | ||
157 | /* | 157 | /* |
158 | * Restart the transaction associated with *handle. This does a commit, | 158 | * Restart the transaction associated with *handle. This does a commit, |
159 | * so before we call here everything must be consistently dirtied against | 159 | * so before we call here everything must be consistently dirtied against |
160 | * this transaction. | 160 | * this transaction. |
161 | */ | 161 | */ |
162 | int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | 162 | int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, |
163 | int nblocks) | 163 | int nblocks) |
164 | { | 164 | { |
165 | int ret; | 165 | int ret; |
166 | 166 | ||
167 | /* | 167 | /* |
168 | * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this | 168 | * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this |
169 | * moment, get_block can be called only for blocks inside i_size since | 169 | * moment, get_block can be called only for blocks inside i_size since |
170 | * page cache has been already dropped and writes are blocked by | 170 | * page cache has been already dropped and writes are blocked by |
171 | * i_mutex. So we can safely drop the i_data_sem here. | 171 | * i_mutex. So we can safely drop the i_data_sem here. |
172 | */ | 172 | */ |
173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
174 | jbd_debug(2, "restarting handle %p\n", handle); | 174 | jbd_debug(2, "restarting handle %p\n", handle); |
175 | up_write(&EXT4_I(inode)->i_data_sem); | 175 | up_write(&EXT4_I(inode)->i_data_sem); |
176 | ret = ext4_journal_restart(handle, nblocks); | 176 | ret = ext4_journal_restart(handle, nblocks); |
177 | down_write(&EXT4_I(inode)->i_data_sem); | 177 | down_write(&EXT4_I(inode)->i_data_sem); |
178 | ext4_discard_preallocations(inode); | 178 | ext4_discard_preallocations(inode); |
179 | 179 | ||
180 | return ret; | 180 | return ret; |
181 | } | 181 | } |
182 | 182 | ||
183 | /* | 183 | /* |
184 | * Called at the last iput() if i_nlink is zero. | 184 | * Called at the last iput() if i_nlink is zero. |
185 | */ | 185 | */ |
186 | void ext4_evict_inode(struct inode *inode) | 186 | void ext4_evict_inode(struct inode *inode) |
187 | { | 187 | { |
188 | handle_t *handle; | 188 | handle_t *handle; |
189 | int err; | 189 | int err; |
190 | 190 | ||
191 | trace_ext4_evict_inode(inode); | 191 | trace_ext4_evict_inode(inode); |
192 | if (inode->i_nlink) { | 192 | if (inode->i_nlink) { |
193 | truncate_inode_pages(&inode->i_data, 0); | 193 | truncate_inode_pages(&inode->i_data, 0); |
194 | goto no_delete; | 194 | goto no_delete; |
195 | } | 195 | } |
196 | 196 | ||
197 | if (!is_bad_inode(inode)) | 197 | if (!is_bad_inode(inode)) |
198 | dquot_initialize(inode); | 198 | dquot_initialize(inode); |
199 | 199 | ||
200 | if (ext4_should_order_data(inode)) | 200 | if (ext4_should_order_data(inode)) |
201 | ext4_begin_ordered_truncate(inode, 0); | 201 | ext4_begin_ordered_truncate(inode, 0); |
202 | truncate_inode_pages(&inode->i_data, 0); | 202 | truncate_inode_pages(&inode->i_data, 0); |
203 | 203 | ||
204 | if (is_bad_inode(inode)) | 204 | if (is_bad_inode(inode)) |
205 | goto no_delete; | 205 | goto no_delete; |
206 | 206 | ||
207 | handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3); | 207 | handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3); |
208 | if (IS_ERR(handle)) { | 208 | if (IS_ERR(handle)) { |
209 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); | 209 | ext4_std_error(inode->i_sb, PTR_ERR(handle)); |
210 | /* | 210 | /* |
211 | * If we're going to skip the normal cleanup, we still need to | 211 | * If we're going to skip the normal cleanup, we still need to |
212 | * make sure that the in-core orphan linked list is properly | 212 | * make sure that the in-core orphan linked list is properly |
213 | * cleaned up. | 213 | * cleaned up. |
214 | */ | 214 | */ |
215 | ext4_orphan_del(NULL, inode); | 215 | ext4_orphan_del(NULL, inode); |
216 | goto no_delete; | 216 | goto no_delete; |
217 | } | 217 | } |
218 | 218 | ||
219 | if (IS_SYNC(inode)) | 219 | if (IS_SYNC(inode)) |
220 | ext4_handle_sync(handle); | 220 | ext4_handle_sync(handle); |
221 | inode->i_size = 0; | 221 | inode->i_size = 0; |
222 | err = ext4_mark_inode_dirty(handle, inode); | 222 | err = ext4_mark_inode_dirty(handle, inode); |
223 | if (err) { | 223 | if (err) { |
224 | ext4_warning(inode->i_sb, | 224 | ext4_warning(inode->i_sb, |
225 | "couldn't mark inode dirty (err %d)", err); | 225 | "couldn't mark inode dirty (err %d)", err); |
226 | goto stop_handle; | 226 | goto stop_handle; |
227 | } | 227 | } |
228 | if (inode->i_blocks) | 228 | if (inode->i_blocks) |
229 | ext4_truncate(inode); | 229 | ext4_truncate(inode); |
230 | 230 | ||
231 | /* | 231 | /* |
232 | * ext4_ext_truncate() doesn't reserve any slop when it | 232 | * ext4_ext_truncate() doesn't reserve any slop when it |
233 | * restarts journal transactions; therefore there may not be | 233 | * restarts journal transactions; therefore there may not be |
234 | * enough credits left in the handle to remove the inode from | 234 | * enough credits left in the handle to remove the inode from |
235 | * the orphan list and set the dtime field. | 235 | * the orphan list and set the dtime field. |
236 | */ | 236 | */ |
237 | if (!ext4_handle_has_enough_credits(handle, 3)) { | 237 | if (!ext4_handle_has_enough_credits(handle, 3)) { |
238 | err = ext4_journal_extend(handle, 3); | 238 | err = ext4_journal_extend(handle, 3); |
239 | if (err > 0) | 239 | if (err > 0) |
240 | err = ext4_journal_restart(handle, 3); | 240 | err = ext4_journal_restart(handle, 3); |
241 | if (err != 0) { | 241 | if (err != 0) { |
242 | ext4_warning(inode->i_sb, | 242 | ext4_warning(inode->i_sb, |
243 | "couldn't extend journal (err %d)", err); | 243 | "couldn't extend journal (err %d)", err); |
244 | stop_handle: | 244 | stop_handle: |
245 | ext4_journal_stop(handle); | 245 | ext4_journal_stop(handle); |
246 | ext4_orphan_del(NULL, inode); | 246 | ext4_orphan_del(NULL, inode); |
247 | goto no_delete; | 247 | goto no_delete; |
248 | } | 248 | } |
249 | } | 249 | } |
250 | 250 | ||
251 | /* | 251 | /* |
252 | * Kill off the orphan record which ext4_truncate created. | 252 | * Kill off the orphan record which ext4_truncate created. |
253 | * AKPM: I think this can be inside the above `if'. | 253 | * AKPM: I think this can be inside the above `if'. |
254 | * Note that ext4_orphan_del() has to be able to cope with the | 254 | * Note that ext4_orphan_del() has to be able to cope with the |
255 | * deletion of a non-existent orphan - this is because we don't | 255 | * deletion of a non-existent orphan - this is because we don't |
256 | * know if ext4_truncate() actually created an orphan record. | 256 | * know if ext4_truncate() actually created an orphan record. |
257 | * (Well, we could do this if we need to, but heck - it works) | 257 | * (Well, we could do this if we need to, but heck - it works) |
258 | */ | 258 | */ |
259 | ext4_orphan_del(handle, inode); | 259 | ext4_orphan_del(handle, inode); |
260 | EXT4_I(inode)->i_dtime = get_seconds(); | 260 | EXT4_I(inode)->i_dtime = get_seconds(); |
261 | 261 | ||
262 | /* | 262 | /* |
263 | * One subtle ordering requirement: if anything has gone wrong | 263 | * One subtle ordering requirement: if anything has gone wrong |
264 | * (transaction abort, IO errors, whatever), then we can still | 264 | * (transaction abort, IO errors, whatever), then we can still |
265 | * do these next steps (the fs will already have been marked as | 265 | * do these next steps (the fs will already have been marked as |
266 | * having errors), but we can't free the inode if the mark_dirty | 266 | * having errors), but we can't free the inode if the mark_dirty |
267 | * fails. | 267 | * fails. |
268 | */ | 268 | */ |
269 | if (ext4_mark_inode_dirty(handle, inode)) | 269 | if (ext4_mark_inode_dirty(handle, inode)) |
270 | /* If that failed, just do the required in-core inode clear. */ | 270 | /* If that failed, just do the required in-core inode clear. */ |
271 | ext4_clear_inode(inode); | 271 | ext4_clear_inode(inode); |
272 | else | 272 | else |
273 | ext4_free_inode(handle, inode); | 273 | ext4_free_inode(handle, inode); |
274 | ext4_journal_stop(handle); | 274 | ext4_journal_stop(handle); |
275 | return; | 275 | return; |
276 | no_delete: | 276 | no_delete: |
277 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ | 277 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ |
278 | } | 278 | } |
279 | 279 | ||
280 | typedef struct { | 280 | typedef struct { |
281 | __le32 *p; | 281 | __le32 *p; |
282 | __le32 key; | 282 | __le32 key; |
283 | struct buffer_head *bh; | 283 | struct buffer_head *bh; |
284 | } Indirect; | 284 | } Indirect; |
285 | 285 | ||
286 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) | 286 | static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) |
287 | { | 287 | { |
288 | p->key = *(p->p = v); | 288 | p->key = *(p->p = v); |
289 | p->bh = bh; | 289 | p->bh = bh; |
290 | } | 290 | } |
291 | 291 | ||
292 | /** | 292 | /** |
293 | * ext4_block_to_path - parse the block number into array of offsets | 293 | * ext4_block_to_path - parse the block number into array of offsets |
294 | * @inode: inode in question (we are only interested in its superblock) | 294 | * @inode: inode in question (we are only interested in its superblock) |
295 | * @i_block: block number to be parsed | 295 | * @i_block: block number to be parsed |
296 | * @offsets: array to store the offsets in | 296 | * @offsets: array to store the offsets in |
297 | * @boundary: set this non-zero if the referred-to block is likely to be | 297 | * @boundary: set this non-zero if the referred-to block is likely to be |
298 | * followed (on disk) by an indirect block. | 298 | * followed (on disk) by an indirect block. |
299 | * | 299 | * |
300 | * To store the locations of file's data ext4 uses a data structure common | 300 | * To store the locations of file's data ext4 uses a data structure common |
301 | * for UNIX filesystems - tree of pointers anchored in the inode, with | 301 | * for UNIX filesystems - tree of pointers anchored in the inode, with |
302 | * data blocks at leaves and indirect blocks in intermediate nodes. | 302 | * data blocks at leaves and indirect blocks in intermediate nodes. |
303 | * This function translates the block number into path in that tree - | 303 | * This function translates the block number into path in that tree - |
304 | * return value is the path length and @offsets[n] is the offset of | 304 | * return value is the path length and @offsets[n] is the offset of |
305 | * pointer to (n+1)th node in the nth one. If @block is out of range | 305 | * pointer to (n+1)th node in the nth one. If @block is out of range |
306 | * (negative or too large) warning is printed and zero returned. | 306 | * (negative or too large) warning is printed and zero returned. |
307 | * | 307 | * |
308 | * Note: function doesn't find node addresses, so no IO is needed. All | 308 | * Note: function doesn't find node addresses, so no IO is needed. All |
309 | * we need to know is the capacity of indirect blocks (taken from the | 309 | * we need to know is the capacity of indirect blocks (taken from the |
310 | * inode->i_sb). | 310 | * inode->i_sb). |
311 | */ | 311 | */ |
312 | 312 | ||
313 | /* | 313 | /* |
314 | * Portability note: the last comparison (check that we fit into triple | 314 | * Portability note: the last comparison (check that we fit into triple |
315 | * indirect block) is spelled differently, because otherwise on an | 315 | * indirect block) is spelled differently, because otherwise on an |
316 | * architecture with 32-bit longs and 8Kb pages we might get into trouble | 316 | * architecture with 32-bit longs and 8Kb pages we might get into trouble |
317 | * if our filesystem had 8Kb blocks. We might use long long, but that would | 317 | * if our filesystem had 8Kb blocks. We might use long long, but that would |
318 | * kill us on x86. Oh, well, at least the sign propagation does not matter - | 318 | * kill us on x86. Oh, well, at least the sign propagation does not matter - |
319 | * i_block would have to be negative in the very beginning, so we would not | 319 | * i_block would have to be negative in the very beginning, so we would not |
320 | * get there at all. | 320 | * get there at all. |
321 | */ | 321 | */ |
322 | 322 | ||
323 | static int ext4_block_to_path(struct inode *inode, | 323 | static int ext4_block_to_path(struct inode *inode, |
324 | ext4_lblk_t i_block, | 324 | ext4_lblk_t i_block, |
325 | ext4_lblk_t offsets[4], int *boundary) | 325 | ext4_lblk_t offsets[4], int *boundary) |
326 | { | 326 | { |
327 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 327 | int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
328 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); | 328 | int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); |
329 | const long direct_blocks = EXT4_NDIR_BLOCKS, | 329 | const long direct_blocks = EXT4_NDIR_BLOCKS, |
330 | indirect_blocks = ptrs, | 330 | indirect_blocks = ptrs, |
331 | double_blocks = (1 << (ptrs_bits * 2)); | 331 | double_blocks = (1 << (ptrs_bits * 2)); |
332 | int n = 0; | 332 | int n = 0; |
333 | int final = 0; | 333 | int final = 0; |
334 | 334 | ||
335 | if (i_block < direct_blocks) { | 335 | if (i_block < direct_blocks) { |
336 | offsets[n++] = i_block; | 336 | offsets[n++] = i_block; |
337 | final = direct_blocks; | 337 | final = direct_blocks; |
338 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | 338 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
339 | offsets[n++] = EXT4_IND_BLOCK; | 339 | offsets[n++] = EXT4_IND_BLOCK; |
340 | offsets[n++] = i_block; | 340 | offsets[n++] = i_block; |
341 | final = ptrs; | 341 | final = ptrs; |
342 | } else if ((i_block -= indirect_blocks) < double_blocks) { | 342 | } else if ((i_block -= indirect_blocks) < double_blocks) { |
343 | offsets[n++] = EXT4_DIND_BLOCK; | 343 | offsets[n++] = EXT4_DIND_BLOCK; |
344 | offsets[n++] = i_block >> ptrs_bits; | 344 | offsets[n++] = i_block >> ptrs_bits; |
345 | offsets[n++] = i_block & (ptrs - 1); | 345 | offsets[n++] = i_block & (ptrs - 1); |
346 | final = ptrs; | 346 | final = ptrs; |
347 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { | 347 | } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { |
348 | offsets[n++] = EXT4_TIND_BLOCK; | 348 | offsets[n++] = EXT4_TIND_BLOCK; |
349 | offsets[n++] = i_block >> (ptrs_bits * 2); | 349 | offsets[n++] = i_block >> (ptrs_bits * 2); |
350 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); | 350 | offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); |
351 | offsets[n++] = i_block & (ptrs - 1); | 351 | offsets[n++] = i_block & (ptrs - 1); |
352 | final = ptrs; | 352 | final = ptrs; |
353 | } else { | 353 | } else { |
354 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", | 354 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", |
355 | i_block + direct_blocks + | 355 | i_block + direct_blocks + |
356 | indirect_blocks + double_blocks, inode->i_ino); | 356 | indirect_blocks + double_blocks, inode->i_ino); |
357 | } | 357 | } |
358 | if (boundary) | 358 | if (boundary) |
359 | *boundary = final - 1 - (i_block & (ptrs - 1)); | 359 | *boundary = final - 1 - (i_block & (ptrs - 1)); |
360 | return n; | 360 | return n; |
361 | } | 361 | } |
362 | 362 | ||
363 | static int __ext4_check_blockref(const char *function, unsigned int line, | 363 | static int __ext4_check_blockref(const char *function, unsigned int line, |
364 | struct inode *inode, | 364 | struct inode *inode, |
365 | __le32 *p, unsigned int max) | 365 | __le32 *p, unsigned int max) |
366 | { | 366 | { |
367 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | 367 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; |
368 | __le32 *bref = p; | 368 | __le32 *bref = p; |
369 | unsigned int blk; | 369 | unsigned int blk; |
370 | 370 | ||
371 | while (bref < p+max) { | 371 | while (bref < p+max) { |
372 | blk = le32_to_cpu(*bref++); | 372 | blk = le32_to_cpu(*bref++); |
373 | if (blk && | 373 | if (blk && |
374 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 374 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
375 | blk, 1))) { | 375 | blk, 1))) { |
376 | es->s_last_error_block = cpu_to_le64(blk); | 376 | es->s_last_error_block = cpu_to_le64(blk); |
377 | ext4_error_inode(inode, function, line, blk, | 377 | ext4_error_inode(inode, function, line, blk, |
378 | "invalid block"); | 378 | "invalid block"); |
379 | return -EIO; | 379 | return -EIO; |
380 | } | 380 | } |
381 | } | 381 | } |
382 | return 0; | 382 | return 0; |
383 | } | 383 | } |
384 | 384 | ||
385 | 385 | ||
386 | #define ext4_check_indirect_blockref(inode, bh) \ | 386 | #define ext4_check_indirect_blockref(inode, bh) \ |
387 | __ext4_check_blockref(__func__, __LINE__, inode, \ | 387 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
388 | (__le32 *)(bh)->b_data, \ | 388 | (__le32 *)(bh)->b_data, \ |
389 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | 389 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) |
390 | 390 | ||
391 | #define ext4_check_inode_blockref(inode) \ | 391 | #define ext4_check_inode_blockref(inode) \ |
392 | __ext4_check_blockref(__func__, __LINE__, inode, \ | 392 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
393 | EXT4_I(inode)->i_data, \ | 393 | EXT4_I(inode)->i_data, \ |
394 | EXT4_NDIR_BLOCKS) | 394 | EXT4_NDIR_BLOCKS) |
395 | 395 | ||
396 | /** | 396 | /** |
397 | * ext4_get_branch - read the chain of indirect blocks leading to data | 397 | * ext4_get_branch - read the chain of indirect blocks leading to data |
398 | * @inode: inode in question | 398 | * @inode: inode in question |
399 | * @depth: depth of the chain (1 - direct pointer, etc.) | 399 | * @depth: depth of the chain (1 - direct pointer, etc.) |
400 | * @offsets: offsets of pointers in inode/indirect blocks | 400 | * @offsets: offsets of pointers in inode/indirect blocks |
401 | * @chain: place to store the result | 401 | * @chain: place to store the result |
402 | * @err: here we store the error value | 402 | * @err: here we store the error value |
403 | * | 403 | * |
404 | * Function fills the array of triples <key, p, bh> and returns %NULL | 404 | * Function fills the array of triples <key, p, bh> and returns %NULL |
405 | * if everything went OK or the pointer to the last filled triple | 405 | * if everything went OK or the pointer to the last filled triple |
406 | * (incomplete one) otherwise. Upon the return chain[i].key contains | 406 | * (incomplete one) otherwise. Upon the return chain[i].key contains |
407 | * the number of (i+1)-th block in the chain (as it is stored in memory, | 407 | * the number of (i+1)-th block in the chain (as it is stored in memory, |
408 | * i.e. little-endian 32-bit), chain[i].p contains the address of that | 408 | * i.e. little-endian 32-bit), chain[i].p contains the address of that |
409 | * number (it points into struct inode for i==0 and into the bh->b_data | 409 | * number (it points into struct inode for i==0 and into the bh->b_data |
410 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect | 410 | * for i>0) and chain[i].bh points to the buffer_head of i-th indirect |
411 | * block for i>0 and NULL for i==0. In other words, it holds the block | 411 | * block for i>0 and NULL for i==0. In other words, it holds the block |
412 | * numbers of the chain, addresses they were taken from (and where we can | 412 | * numbers of the chain, addresses they were taken from (and where we can |
413 | * verify that chain did not change) and buffer_heads hosting these | 413 | * verify that chain did not change) and buffer_heads hosting these |
414 | * numbers. | 414 | * numbers. |
415 | * | 415 | * |
416 | * Function stops when it stumbles upon zero pointer (absent block) | 416 | * Function stops when it stumbles upon zero pointer (absent block) |
417 | * (pointer to last triple returned, *@err == 0) | 417 | * (pointer to last triple returned, *@err == 0) |
418 | * or when it gets an IO error reading an indirect block | 418 | * or when it gets an IO error reading an indirect block |
419 | * (ditto, *@err == -EIO) | 419 | * (ditto, *@err == -EIO) |
420 | * or when it reads all @depth-1 indirect blocks successfully and finds | 420 | * or when it reads all @depth-1 indirect blocks successfully and finds |
421 | * the whole chain, all way to the data (returns %NULL, *err == 0). | 421 | * the whole chain, all way to the data (returns %NULL, *err == 0). |
422 | * | 422 | * |
423 | * Need to be called with | 423 | * Need to be called with |
424 | * down_read(&EXT4_I(inode)->i_data_sem) | 424 | * down_read(&EXT4_I(inode)->i_data_sem) |
425 | */ | 425 | */ |
426 | static Indirect *ext4_get_branch(struct inode *inode, int depth, | 426 | static Indirect *ext4_get_branch(struct inode *inode, int depth, |
427 | ext4_lblk_t *offsets, | 427 | ext4_lblk_t *offsets, |
428 | Indirect chain[4], int *err) | 428 | Indirect chain[4], int *err) |
429 | { | 429 | { |
430 | struct super_block *sb = inode->i_sb; | 430 | struct super_block *sb = inode->i_sb; |
431 | Indirect *p = chain; | 431 | Indirect *p = chain; |
432 | struct buffer_head *bh; | 432 | struct buffer_head *bh; |
433 | 433 | ||
434 | *err = 0; | 434 | *err = 0; |
435 | /* i_data is not going away, no lock needed */ | 435 | /* i_data is not going away, no lock needed */ |
436 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); | 436 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); |
437 | if (!p->key) | 437 | if (!p->key) |
438 | goto no_block; | 438 | goto no_block; |
439 | while (--depth) { | 439 | while (--depth) { |
440 | bh = sb_getblk(sb, le32_to_cpu(p->key)); | 440 | bh = sb_getblk(sb, le32_to_cpu(p->key)); |
441 | if (unlikely(!bh)) | 441 | if (unlikely(!bh)) |
442 | goto failure; | 442 | goto failure; |
443 | 443 | ||
444 | if (!bh_uptodate_or_lock(bh)) { | 444 | if (!bh_uptodate_or_lock(bh)) { |
445 | if (bh_submit_read(bh) < 0) { | 445 | if (bh_submit_read(bh) < 0) { |
446 | put_bh(bh); | 446 | put_bh(bh); |
447 | goto failure; | 447 | goto failure; |
448 | } | 448 | } |
449 | /* validate block references */ | 449 | /* validate block references */ |
450 | if (ext4_check_indirect_blockref(inode, bh)) { | 450 | if (ext4_check_indirect_blockref(inode, bh)) { |
451 | put_bh(bh); | 451 | put_bh(bh); |
452 | goto failure; | 452 | goto failure; |
453 | } | 453 | } |
454 | } | 454 | } |
455 | 455 | ||
456 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); | 456 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); |
457 | /* Reader: end */ | 457 | /* Reader: end */ |
458 | if (!p->key) | 458 | if (!p->key) |
459 | goto no_block; | 459 | goto no_block; |
460 | } | 460 | } |
461 | return NULL; | 461 | return NULL; |
462 | 462 | ||
463 | failure: | 463 | failure: |
464 | *err = -EIO; | 464 | *err = -EIO; |
465 | no_block: | 465 | no_block: |
466 | return p; | 466 | return p; |
467 | } | 467 | } |
468 | 468 | ||
469 | /** | 469 | /** |
470 | * ext4_find_near - find a place for allocation with sufficient locality | 470 | * ext4_find_near - find a place for allocation with sufficient locality |
471 | * @inode: owner | 471 | * @inode: owner |
472 | * @ind: descriptor of indirect block. | 472 | * @ind: descriptor of indirect block. |
473 | * | 473 | * |
474 | * This function returns the preferred place for block allocation. | 474 | * This function returns the preferred place for block allocation. |
475 | * It is used when heuristic for sequential allocation fails. | 475 | * It is used when heuristic for sequential allocation fails. |
476 | * Rules are: | 476 | * Rules are: |
477 | * + if there is a block to the left of our position - allocate near it. | 477 | * + if there is a block to the left of our position - allocate near it. |
478 | * + if pointer will live in indirect block - allocate near that block. | 478 | * + if pointer will live in indirect block - allocate near that block. |
479 | * + if pointer will live in inode - allocate in the same | 479 | * + if pointer will live in inode - allocate in the same |
480 | * cylinder group. | 480 | * cylinder group. |
481 | * | 481 | * |
482 | * In the latter case we colour the starting block by the callers PID to | 482 | * In the latter case we colour the starting block by the callers PID to |
483 | * prevent it from clashing with concurrent allocations for a different inode | 483 | * prevent it from clashing with concurrent allocations for a different inode |
484 | * in the same block group. The PID is used here so that functionally related | 484 | * in the same block group. The PID is used here so that functionally related |
485 | * files will be close-by on-disk. | 485 | * files will be close-by on-disk. |
486 | * | 486 | * |
487 | * Caller must make sure that @ind is valid and will stay that way. | 487 | * Caller must make sure that @ind is valid and will stay that way. |
488 | */ | 488 | */ |
489 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | 489 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) |
490 | { | 490 | { |
491 | struct ext4_inode_info *ei = EXT4_I(inode); | 491 | struct ext4_inode_info *ei = EXT4_I(inode); |
492 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; | 492 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; |
493 | __le32 *p; | 493 | __le32 *p; |
494 | ext4_fsblk_t bg_start; | 494 | ext4_fsblk_t bg_start; |
495 | ext4_fsblk_t last_block; | 495 | ext4_fsblk_t last_block; |
496 | ext4_grpblk_t colour; | 496 | ext4_grpblk_t colour; |
497 | ext4_group_t block_group; | 497 | ext4_group_t block_group; |
498 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); | 498 | int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb)); |
499 | 499 | ||
500 | /* Try to find previous block */ | 500 | /* Try to find previous block */ |
501 | for (p = ind->p - 1; p >= start; p--) { | 501 | for (p = ind->p - 1; p >= start; p--) { |
502 | if (*p) | 502 | if (*p) |
503 | return le32_to_cpu(*p); | 503 | return le32_to_cpu(*p); |
504 | } | 504 | } |
505 | 505 | ||
506 | /* No such thing, so let's try location of indirect block */ | 506 | /* No such thing, so let's try location of indirect block */ |
507 | if (ind->bh) | 507 | if (ind->bh) |
508 | return ind->bh->b_blocknr; | 508 | return ind->bh->b_blocknr; |
509 | 509 | ||
510 | /* | 510 | /* |
511 | * It is going to be referred to from the inode itself? OK, just put it | 511 | * It is going to be referred to from the inode itself? OK, just put it |
512 | * into the same cylinder group then. | 512 | * into the same cylinder group then. |
513 | */ | 513 | */ |
514 | block_group = ei->i_block_group; | 514 | block_group = ei->i_block_group; |
515 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | 515 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { |
516 | block_group &= ~(flex_size-1); | 516 | block_group &= ~(flex_size-1); |
517 | if (S_ISREG(inode->i_mode)) | 517 | if (S_ISREG(inode->i_mode)) |
518 | block_group++; | 518 | block_group++; |
519 | } | 519 | } |
520 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); | 520 | bg_start = ext4_group_first_block_no(inode->i_sb, block_group); |
521 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; | 521 | last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; |
522 | 522 | ||
523 | /* | 523 | /* |
524 | * If we are doing delayed allocation, we don't need take | 524 | * If we are doing delayed allocation, we don't need take |
525 | * colour into account. | 525 | * colour into account. |
526 | */ | 526 | */ |
527 | if (test_opt(inode->i_sb, DELALLOC)) | 527 | if (test_opt(inode->i_sb, DELALLOC)) |
528 | return bg_start; | 528 | return bg_start; |
529 | 529 | ||
530 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) | 530 | if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) |
531 | colour = (current->pid % 16) * | 531 | colour = (current->pid % 16) * |
532 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); | 532 | (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); |
533 | else | 533 | else |
534 | colour = (current->pid % 16) * ((last_block - bg_start) / 16); | 534 | colour = (current->pid % 16) * ((last_block - bg_start) / 16); |
535 | return bg_start + colour; | 535 | return bg_start + colour; |
536 | } | 536 | } |
537 | 537 | ||
538 | /** | 538 | /** |
539 | * ext4_find_goal - find a preferred place for allocation. | 539 | * ext4_find_goal - find a preferred place for allocation. |
540 | * @inode: owner | 540 | * @inode: owner |
541 | * @block: block we want | 541 | * @block: block we want |
542 | * @partial: pointer to the last triple within a chain | 542 | * @partial: pointer to the last triple within a chain |
543 | * | 543 | * |
544 | * Normally this function find the preferred place for block allocation, | 544 | * Normally this function find the preferred place for block allocation, |
545 | * returns it. | 545 | * returns it. |
546 | * Because this is only used for non-extent files, we limit the block nr | 546 | * Because this is only used for non-extent files, we limit the block nr |
547 | * to 32 bits. | 547 | * to 32 bits. |
548 | */ | 548 | */ |
549 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 549 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
550 | Indirect *partial) | 550 | Indirect *partial) |
551 | { | 551 | { |
552 | ext4_fsblk_t goal; | 552 | ext4_fsblk_t goal; |
553 | 553 | ||
554 | /* | 554 | /* |
555 | * XXX need to get goal block from mballoc's data structures | 555 | * XXX need to get goal block from mballoc's data structures |
556 | */ | 556 | */ |
557 | 557 | ||
558 | goal = ext4_find_near(inode, partial); | 558 | goal = ext4_find_near(inode, partial); |
559 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 559 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
560 | return goal; | 560 | return goal; |
561 | } | 561 | } |
562 | 562 | ||
563 | /** | 563 | /** |
564 | * ext4_blks_to_allocate - Look up the block map and count the number | 564 | * ext4_blks_to_allocate - Look up the block map and count the number |
565 | * of direct blocks need to be allocated for the given branch. | 565 | * of direct blocks need to be allocated for the given branch. |
566 | * | 566 | * |
567 | * @branch: chain of indirect blocks | 567 | * @branch: chain of indirect blocks |
568 | * @k: number of blocks need for indirect blocks | 568 | * @k: number of blocks need for indirect blocks |
569 | * @blks: number of data blocks to be mapped. | 569 | * @blks: number of data blocks to be mapped. |
570 | * @blocks_to_boundary: the offset in the indirect block | 570 | * @blocks_to_boundary: the offset in the indirect block |
571 | * | 571 | * |
572 | * return the total number of blocks to be allocate, including the | 572 | * return the total number of blocks to be allocate, including the |
573 | * direct and indirect blocks. | 573 | * direct and indirect blocks. |
574 | */ | 574 | */ |
575 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | 575 | static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, |
576 | int blocks_to_boundary) | 576 | int blocks_to_boundary) |
577 | { | 577 | { |
578 | unsigned int count = 0; | 578 | unsigned int count = 0; |
579 | 579 | ||
580 | /* | 580 | /* |
581 | * Simple case, [t,d]Indirect block(s) has not allocated yet | 581 | * Simple case, [t,d]Indirect block(s) has not allocated yet |
582 | * then it's clear blocks on that path have not allocated | 582 | * then it's clear blocks on that path have not allocated |
583 | */ | 583 | */ |
584 | if (k > 0) { | 584 | if (k > 0) { |
585 | /* right now we don't handle cross boundary allocation */ | 585 | /* right now we don't handle cross boundary allocation */ |
586 | if (blks < blocks_to_boundary + 1) | 586 | if (blks < blocks_to_boundary + 1) |
587 | count += blks; | 587 | count += blks; |
588 | else | 588 | else |
589 | count += blocks_to_boundary + 1; | 589 | count += blocks_to_boundary + 1; |
590 | return count; | 590 | return count; |
591 | } | 591 | } |
592 | 592 | ||
593 | count++; | 593 | count++; |
594 | while (count < blks && count <= blocks_to_boundary && | 594 | while (count < blks && count <= blocks_to_boundary && |
595 | le32_to_cpu(*(branch[0].p + count)) == 0) { | 595 | le32_to_cpu(*(branch[0].p + count)) == 0) { |
596 | count++; | 596 | count++; |
597 | } | 597 | } |
598 | return count; | 598 | return count; |
599 | } | 599 | } |
600 | 600 | ||
601 | /** | 601 | /** |
602 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch | 602 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch |
603 | * @handle: handle for this transaction | 603 | * @handle: handle for this transaction |
604 | * @inode: inode which needs allocated blocks | 604 | * @inode: inode which needs allocated blocks |
605 | * @iblock: the logical block to start allocated at | 605 | * @iblock: the logical block to start allocated at |
606 | * @goal: preferred physical block of allocation | 606 | * @goal: preferred physical block of allocation |
607 | * @indirect_blks: the number of blocks need to allocate for indirect | 607 | * @indirect_blks: the number of blocks need to allocate for indirect |
608 | * blocks | 608 | * blocks |
609 | * @blks: number of desired blocks | 609 | * @blks: number of desired blocks |
610 | * @new_blocks: on return it will store the new block numbers for | 610 | * @new_blocks: on return it will store the new block numbers for |
611 | * the indirect blocks(if needed) and the first direct block, | 611 | * the indirect blocks(if needed) and the first direct block, |
612 | * @err: on return it will store the error code | 612 | * @err: on return it will store the error code |
613 | * | 613 | * |
614 | * This function will return the number of blocks allocated as | 614 | * This function will return the number of blocks allocated as |
615 | * requested by the passed-in parameters. | 615 | * requested by the passed-in parameters. |
616 | */ | 616 | */ |
617 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | 617 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, |
618 | ext4_lblk_t iblock, ext4_fsblk_t goal, | 618 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
619 | int indirect_blks, int blks, | 619 | int indirect_blks, int blks, |
620 | ext4_fsblk_t new_blocks[4], int *err) | 620 | ext4_fsblk_t new_blocks[4], int *err) |
621 | { | 621 | { |
622 | struct ext4_allocation_request ar; | 622 | struct ext4_allocation_request ar; |
623 | int target, i; | 623 | int target, i; |
624 | unsigned long count = 0, blk_allocated = 0; | 624 | unsigned long count = 0, blk_allocated = 0; |
625 | int index = 0; | 625 | int index = 0; |
626 | ext4_fsblk_t current_block = 0; | 626 | ext4_fsblk_t current_block = 0; |
627 | int ret = 0; | 627 | int ret = 0; |
628 | 628 | ||
629 | /* | 629 | /* |
630 | * Here we try to allocate the requested multiple blocks at once, | 630 | * Here we try to allocate the requested multiple blocks at once, |
631 | * on a best-effort basis. | 631 | * on a best-effort basis. |
632 | * To build a branch, we should allocate blocks for | 632 | * To build a branch, we should allocate blocks for |
633 | * the indirect blocks(if not allocated yet), and at least | 633 | * the indirect blocks(if not allocated yet), and at least |
634 | * the first direct block of this branch. That's the | 634 | * the first direct block of this branch. That's the |
635 | * minimum number of blocks need to allocate(required) | 635 | * minimum number of blocks need to allocate(required) |
636 | */ | 636 | */ |
637 | /* first we try to allocate the indirect blocks */ | 637 | /* first we try to allocate the indirect blocks */ |
638 | target = indirect_blks; | 638 | target = indirect_blks; |
639 | while (target > 0) { | 639 | while (target > 0) { |
640 | count = target; | 640 | count = target; |
641 | /* allocating blocks for indirect blocks and direct blocks */ | 641 | /* allocating blocks for indirect blocks and direct blocks */ |
642 | current_block = ext4_new_meta_blocks(handle, inode, goal, | 642 | current_block = ext4_new_meta_blocks(handle, inode, goal, |
643 | 0, &count, err); | 643 | 0, &count, err); |
644 | if (*err) | 644 | if (*err) |
645 | goto failed_out; | 645 | goto failed_out; |
646 | 646 | ||
647 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { | 647 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { |
648 | EXT4_ERROR_INODE(inode, | 648 | EXT4_ERROR_INODE(inode, |
649 | "current_block %llu + count %lu > %d!", | 649 | "current_block %llu + count %lu > %d!", |
650 | current_block, count, | 650 | current_block, count, |
651 | EXT4_MAX_BLOCK_FILE_PHYS); | 651 | EXT4_MAX_BLOCK_FILE_PHYS); |
652 | *err = -EIO; | 652 | *err = -EIO; |
653 | goto failed_out; | 653 | goto failed_out; |
654 | } | 654 | } |
655 | 655 | ||
656 | target -= count; | 656 | target -= count; |
657 | /* allocate blocks for indirect blocks */ | 657 | /* allocate blocks for indirect blocks */ |
658 | while (index < indirect_blks && count) { | 658 | while (index < indirect_blks && count) { |
659 | new_blocks[index++] = current_block++; | 659 | new_blocks[index++] = current_block++; |
660 | count--; | 660 | count--; |
661 | } | 661 | } |
662 | if (count > 0) { | 662 | if (count > 0) { |
663 | /* | 663 | /* |
664 | * save the new block number | 664 | * save the new block number |
665 | * for the first direct block | 665 | * for the first direct block |
666 | */ | 666 | */ |
667 | new_blocks[index] = current_block; | 667 | new_blocks[index] = current_block; |
668 | printk(KERN_INFO "%s returned more blocks than " | 668 | printk(KERN_INFO "%s returned more blocks than " |
669 | "requested\n", __func__); | 669 | "requested\n", __func__); |
670 | WARN_ON(1); | 670 | WARN_ON(1); |
671 | break; | 671 | break; |
672 | } | 672 | } |
673 | } | 673 | } |
674 | 674 | ||
675 | target = blks - count ; | 675 | target = blks - count ; |
676 | blk_allocated = count; | 676 | blk_allocated = count; |
677 | if (!target) | 677 | if (!target) |
678 | goto allocated; | 678 | goto allocated; |
679 | /* Now allocate data blocks */ | 679 | /* Now allocate data blocks */ |
680 | memset(&ar, 0, sizeof(ar)); | 680 | memset(&ar, 0, sizeof(ar)); |
681 | ar.inode = inode; | 681 | ar.inode = inode; |
682 | ar.goal = goal; | 682 | ar.goal = goal; |
683 | ar.len = target; | 683 | ar.len = target; |
684 | ar.logical = iblock; | 684 | ar.logical = iblock; |
685 | if (S_ISREG(inode->i_mode)) | 685 | if (S_ISREG(inode->i_mode)) |
686 | /* enable in-core preallocation only for regular files */ | 686 | /* enable in-core preallocation only for regular files */ |
687 | ar.flags = EXT4_MB_HINT_DATA; | 687 | ar.flags = EXT4_MB_HINT_DATA; |
688 | 688 | ||
689 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 689 | current_block = ext4_mb_new_blocks(handle, &ar, err); |
690 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { | 690 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { |
691 | EXT4_ERROR_INODE(inode, | 691 | EXT4_ERROR_INODE(inode, |
692 | "current_block %llu + ar.len %d > %d!", | 692 | "current_block %llu + ar.len %d > %d!", |
693 | current_block, ar.len, | 693 | current_block, ar.len, |
694 | EXT4_MAX_BLOCK_FILE_PHYS); | 694 | EXT4_MAX_BLOCK_FILE_PHYS); |
695 | *err = -EIO; | 695 | *err = -EIO; |
696 | goto failed_out; | 696 | goto failed_out; |
697 | } | 697 | } |
698 | 698 | ||
699 | if (*err && (target == blks)) { | 699 | if (*err && (target == blks)) { |
700 | /* | 700 | /* |
701 | * if the allocation failed and we didn't allocate | 701 | * if the allocation failed and we didn't allocate |
702 | * any blocks before | 702 | * any blocks before |
703 | */ | 703 | */ |
704 | goto failed_out; | 704 | goto failed_out; |
705 | } | 705 | } |
706 | if (!*err) { | 706 | if (!*err) { |
707 | if (target == blks) { | 707 | if (target == blks) { |
708 | /* | 708 | /* |
709 | * save the new block number | 709 | * save the new block number |
710 | * for the first direct block | 710 | * for the first direct block |
711 | */ | 711 | */ |
712 | new_blocks[index] = current_block; | 712 | new_blocks[index] = current_block; |
713 | } | 713 | } |
714 | blk_allocated += ar.len; | 714 | blk_allocated += ar.len; |
715 | } | 715 | } |
716 | allocated: | 716 | allocated: |
717 | /* total number of blocks allocated for direct blocks */ | 717 | /* total number of blocks allocated for direct blocks */ |
718 | ret = blk_allocated; | 718 | ret = blk_allocated; |
719 | *err = 0; | 719 | *err = 0; |
720 | return ret; | 720 | return ret; |
721 | failed_out: | 721 | failed_out: |
722 | for (i = 0; i < index; i++) | 722 | for (i = 0; i < index; i++) |
723 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | 723 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
724 | return ret; | 724 | return ret; |
725 | } | 725 | } |
726 | 726 | ||
727 | /** | 727 | /** |
728 | * ext4_alloc_branch - allocate and set up a chain of blocks. | 728 | * ext4_alloc_branch - allocate and set up a chain of blocks. |
729 | * @handle: handle for this transaction | 729 | * @handle: handle for this transaction |
730 | * @inode: owner | 730 | * @inode: owner |
731 | * @indirect_blks: number of allocated indirect blocks | 731 | * @indirect_blks: number of allocated indirect blocks |
732 | * @blks: number of allocated direct blocks | 732 | * @blks: number of allocated direct blocks |
733 | * @goal: preferred place for allocation | 733 | * @goal: preferred place for allocation |
734 | * @offsets: offsets (in the blocks) to store the pointers to next. | 734 | * @offsets: offsets (in the blocks) to store the pointers to next. |
735 | * @branch: place to store the chain in. | 735 | * @branch: place to store the chain in. |
736 | * | 736 | * |
737 | * This function allocates blocks, zeroes out all but the last one, | 737 | * This function allocates blocks, zeroes out all but the last one, |
738 | * links them into chain and (if we are synchronous) writes them to disk. | 738 | * links them into chain and (if we are synchronous) writes them to disk. |
739 | * In other words, it prepares a branch that can be spliced onto the | 739 | * In other words, it prepares a branch that can be spliced onto the |
740 | * inode. It stores the information about that chain in the branch[], in | 740 | * inode. It stores the information about that chain in the branch[], in |
741 | * the same format as ext4_get_branch() would do. We are calling it after | 741 | * the same format as ext4_get_branch() would do. We are calling it after |
742 | * we had read the existing part of chain and partial points to the last | 742 | * we had read the existing part of chain and partial points to the last |
743 | * triple of that (one with zero ->key). Upon the exit we have the same | 743 | * triple of that (one with zero ->key). Upon the exit we have the same |
744 | * picture as after the successful ext4_get_block(), except that in one | 744 | * picture as after the successful ext4_get_block(), except that in one |
745 | * place chain is disconnected - *branch->p is still zero (we did not | 745 | * place chain is disconnected - *branch->p is still zero (we did not |
746 | * set the last link), but branch->key contains the number that should | 746 | * set the last link), but branch->key contains the number that should |
747 | * be placed into *branch->p to fill that gap. | 747 | * be placed into *branch->p to fill that gap. |
748 | * | 748 | * |
749 | * If allocation fails we free all blocks we've allocated (and forget | 749 | * If allocation fails we free all blocks we've allocated (and forget |
750 | * their buffer_heads) and return the error value the from failed | 750 | * their buffer_heads) and return the error value the from failed |
751 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain | 751 | * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain |
752 | * as described above and return 0. | 752 | * as described above and return 0. |
753 | */ | 753 | */ |
754 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | 754 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, |
755 | ext4_lblk_t iblock, int indirect_blks, | 755 | ext4_lblk_t iblock, int indirect_blks, |
756 | int *blks, ext4_fsblk_t goal, | 756 | int *blks, ext4_fsblk_t goal, |
757 | ext4_lblk_t *offsets, Indirect *branch) | 757 | ext4_lblk_t *offsets, Indirect *branch) |
758 | { | 758 | { |
759 | int blocksize = inode->i_sb->s_blocksize; | 759 | int blocksize = inode->i_sb->s_blocksize; |
760 | int i, n = 0; | 760 | int i, n = 0; |
761 | int err = 0; | 761 | int err = 0; |
762 | struct buffer_head *bh; | 762 | struct buffer_head *bh; |
763 | int num; | 763 | int num; |
764 | ext4_fsblk_t new_blocks[4]; | 764 | ext4_fsblk_t new_blocks[4]; |
765 | ext4_fsblk_t current_block; | 765 | ext4_fsblk_t current_block; |
766 | 766 | ||
767 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, | 767 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, |
768 | *blks, new_blocks, &err); | 768 | *blks, new_blocks, &err); |
769 | if (err) | 769 | if (err) |
770 | return err; | 770 | return err; |
771 | 771 | ||
772 | branch[0].key = cpu_to_le32(new_blocks[0]); | 772 | branch[0].key = cpu_to_le32(new_blocks[0]); |
773 | /* | 773 | /* |
774 | * metadata blocks and data blocks are allocated. | 774 | * metadata blocks and data blocks are allocated. |
775 | */ | 775 | */ |
776 | for (n = 1; n <= indirect_blks; n++) { | 776 | for (n = 1; n <= indirect_blks; n++) { |
777 | /* | 777 | /* |
778 | * Get buffer_head for parent block, zero it out | 778 | * Get buffer_head for parent block, zero it out |
779 | * and set the pointer to new one, then send | 779 | * and set the pointer to new one, then send |
780 | * parent to disk. | 780 | * parent to disk. |
781 | */ | 781 | */ |
782 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 782 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
783 | if (unlikely(!bh)) { | 783 | if (unlikely(!bh)) { |
784 | err = -EIO; | 784 | err = -EIO; |
785 | goto failed; | 785 | goto failed; |
786 | } | 786 | } |
787 | 787 | ||
788 | branch[n].bh = bh; | 788 | branch[n].bh = bh; |
789 | lock_buffer(bh); | 789 | lock_buffer(bh); |
790 | BUFFER_TRACE(bh, "call get_create_access"); | 790 | BUFFER_TRACE(bh, "call get_create_access"); |
791 | err = ext4_journal_get_create_access(handle, bh); | 791 | err = ext4_journal_get_create_access(handle, bh); |
792 | if (err) { | 792 | if (err) { |
793 | /* Don't brelse(bh) here; it's done in | 793 | /* Don't brelse(bh) here; it's done in |
794 | * ext4_journal_forget() below */ | 794 | * ext4_journal_forget() below */ |
795 | unlock_buffer(bh); | 795 | unlock_buffer(bh); |
796 | goto failed; | 796 | goto failed; |
797 | } | 797 | } |
798 | 798 | ||
799 | memset(bh->b_data, 0, blocksize); | 799 | memset(bh->b_data, 0, blocksize); |
800 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | 800 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; |
801 | branch[n].key = cpu_to_le32(new_blocks[n]); | 801 | branch[n].key = cpu_to_le32(new_blocks[n]); |
802 | *branch[n].p = branch[n].key; | 802 | *branch[n].p = branch[n].key; |
803 | if (n == indirect_blks) { | 803 | if (n == indirect_blks) { |
804 | current_block = new_blocks[n]; | 804 | current_block = new_blocks[n]; |
805 | /* | 805 | /* |
806 | * End of chain, update the last new metablock of | 806 | * End of chain, update the last new metablock of |
807 | * the chain to point to the new allocated | 807 | * the chain to point to the new allocated |
808 | * data blocks numbers | 808 | * data blocks numbers |
809 | */ | 809 | */ |
810 | for (i = 1; i < num; i++) | 810 | for (i = 1; i < num; i++) |
811 | *(branch[n].p + i) = cpu_to_le32(++current_block); | 811 | *(branch[n].p + i) = cpu_to_le32(++current_block); |
812 | } | 812 | } |
813 | BUFFER_TRACE(bh, "marking uptodate"); | 813 | BUFFER_TRACE(bh, "marking uptodate"); |
814 | set_buffer_uptodate(bh); | 814 | set_buffer_uptodate(bh); |
815 | unlock_buffer(bh); | 815 | unlock_buffer(bh); |
816 | 816 | ||
817 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 817 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
818 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 818 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
819 | if (err) | 819 | if (err) |
820 | goto failed; | 820 | goto failed; |
821 | } | 821 | } |
822 | *blks = num; | 822 | *blks = num; |
823 | return err; | 823 | return err; |
824 | failed: | 824 | failed: |
825 | /* Allocation failed, free what we already allocated */ | 825 | /* Allocation failed, free what we already allocated */ |
826 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); | 826 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); |
827 | for (i = 1; i <= n ; i++) { | 827 | for (i = 1; i <= n ; i++) { |
828 | /* | 828 | /* |
829 | * branch[i].bh is newly allocated, so there is no | 829 | * branch[i].bh is newly allocated, so there is no |
830 | * need to revoke the block, which is why we don't | 830 | * need to revoke the block, which is why we don't |
831 | * need to set EXT4_FREE_BLOCKS_METADATA. | 831 | * need to set EXT4_FREE_BLOCKS_METADATA. |
832 | */ | 832 | */ |
833 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, | 833 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, |
834 | EXT4_FREE_BLOCKS_FORGET); | 834 | EXT4_FREE_BLOCKS_FORGET); |
835 | } | 835 | } |
836 | for (i = n+1; i < indirect_blks; i++) | 836 | for (i = n+1; i < indirect_blks; i++) |
837 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); | 837 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
838 | 838 | ||
839 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); | 839 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); |
840 | 840 | ||
841 | return err; | 841 | return err; |
842 | } | 842 | } |
843 | 843 | ||
844 | /** | 844 | /** |
845 | * ext4_splice_branch - splice the allocated branch onto inode. | 845 | * ext4_splice_branch - splice the allocated branch onto inode. |
846 | * @handle: handle for this transaction | 846 | * @handle: handle for this transaction |
847 | * @inode: owner | 847 | * @inode: owner |
848 | * @block: (logical) number of block we are adding | 848 | * @block: (logical) number of block we are adding |
849 | * @chain: chain of indirect blocks (with a missing link - see | 849 | * @chain: chain of indirect blocks (with a missing link - see |
850 | * ext4_alloc_branch) | 850 | * ext4_alloc_branch) |
851 | * @where: location of missing link | 851 | * @where: location of missing link |
852 | * @num: number of indirect blocks we are adding | 852 | * @num: number of indirect blocks we are adding |
853 | * @blks: number of direct blocks we are adding | 853 | * @blks: number of direct blocks we are adding |
854 | * | 854 | * |
855 | * This function fills the missing link and does all housekeeping needed in | 855 | * This function fills the missing link and does all housekeeping needed in |
856 | * inode (->i_blocks, etc.). In case of success we end up with the full | 856 | * inode (->i_blocks, etc.). In case of success we end up with the full |
857 | * chain to new block and return 0. | 857 | * chain to new block and return 0. |
858 | */ | 858 | */ |
859 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, | 859 | static int ext4_splice_branch(handle_t *handle, struct inode *inode, |
860 | ext4_lblk_t block, Indirect *where, int num, | 860 | ext4_lblk_t block, Indirect *where, int num, |
861 | int blks) | 861 | int blks) |
862 | { | 862 | { |
863 | int i; | 863 | int i; |
864 | int err = 0; | 864 | int err = 0; |
865 | ext4_fsblk_t current_block; | 865 | ext4_fsblk_t current_block; |
866 | 866 | ||
867 | /* | 867 | /* |
868 | * If we're splicing into a [td]indirect block (as opposed to the | 868 | * If we're splicing into a [td]indirect block (as opposed to the |
869 | * inode) then we need to get write access to the [td]indirect block | 869 | * inode) then we need to get write access to the [td]indirect block |
870 | * before the splice. | 870 | * before the splice. |
871 | */ | 871 | */ |
872 | if (where->bh) { | 872 | if (where->bh) { |
873 | BUFFER_TRACE(where->bh, "get_write_access"); | 873 | BUFFER_TRACE(where->bh, "get_write_access"); |
874 | err = ext4_journal_get_write_access(handle, where->bh); | 874 | err = ext4_journal_get_write_access(handle, where->bh); |
875 | if (err) | 875 | if (err) |
876 | goto err_out; | 876 | goto err_out; |
877 | } | 877 | } |
878 | /* That's it */ | 878 | /* That's it */ |
879 | 879 | ||
880 | *where->p = where->key; | 880 | *where->p = where->key; |
881 | 881 | ||
882 | /* | 882 | /* |
883 | * Update the host buffer_head or inode to point to more just allocated | 883 | * Update the host buffer_head or inode to point to more just allocated |
884 | * direct blocks blocks | 884 | * direct blocks blocks |
885 | */ | 885 | */ |
886 | if (num == 0 && blks > 1) { | 886 | if (num == 0 && blks > 1) { |
887 | current_block = le32_to_cpu(where->key) + 1; | 887 | current_block = le32_to_cpu(where->key) + 1; |
888 | for (i = 1; i < blks; i++) | 888 | for (i = 1; i < blks; i++) |
889 | *(where->p + i) = cpu_to_le32(current_block++); | 889 | *(where->p + i) = cpu_to_le32(current_block++); |
890 | } | 890 | } |
891 | 891 | ||
892 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 892 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
893 | /* had we spliced it onto indirect block? */ | 893 | /* had we spliced it onto indirect block? */ |
894 | if (where->bh) { | 894 | if (where->bh) { |
895 | /* | 895 | /* |
896 | * If we spliced it onto an indirect block, we haven't | 896 | * If we spliced it onto an indirect block, we haven't |
897 | * altered the inode. Note however that if it is being spliced | 897 | * altered the inode. Note however that if it is being spliced |
898 | * onto an indirect block at the very end of the file (the | 898 | * onto an indirect block at the very end of the file (the |
899 | * file is growing) then we *will* alter the inode to reflect | 899 | * file is growing) then we *will* alter the inode to reflect |
900 | * the new i_size. But that is not done here - it is done in | 900 | * the new i_size. But that is not done here - it is done in |
901 | * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. | 901 | * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. |
902 | */ | 902 | */ |
903 | jbd_debug(5, "splicing indirect only\n"); | 903 | jbd_debug(5, "splicing indirect only\n"); |
904 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); | 904 | BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); |
905 | err = ext4_handle_dirty_metadata(handle, inode, where->bh); | 905 | err = ext4_handle_dirty_metadata(handle, inode, where->bh); |
906 | if (err) | 906 | if (err) |
907 | goto err_out; | 907 | goto err_out; |
908 | } else { | 908 | } else { |
909 | /* | 909 | /* |
910 | * OK, we spliced it into the inode itself on a direct block. | 910 | * OK, we spliced it into the inode itself on a direct block. |
911 | */ | 911 | */ |
912 | ext4_mark_inode_dirty(handle, inode); | 912 | ext4_mark_inode_dirty(handle, inode); |
913 | jbd_debug(5, "splicing direct\n"); | 913 | jbd_debug(5, "splicing direct\n"); |
914 | } | 914 | } |
915 | return err; | 915 | return err; |
916 | 916 | ||
917 | err_out: | 917 | err_out: |
918 | for (i = 1; i <= num; i++) { | 918 | for (i = 1; i <= num; i++) { |
919 | /* | 919 | /* |
920 | * branch[i].bh is newly allocated, so there is no | 920 | * branch[i].bh is newly allocated, so there is no |
921 | * need to revoke the block, which is why we don't | 921 | * need to revoke the block, which is why we don't |
922 | * need to set EXT4_FREE_BLOCKS_METADATA. | 922 | * need to set EXT4_FREE_BLOCKS_METADATA. |
923 | */ | 923 | */ |
924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | 924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, |
925 | EXT4_FREE_BLOCKS_FORGET); | 925 | EXT4_FREE_BLOCKS_FORGET); |
926 | } | 926 | } |
927 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), | 927 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), |
928 | blks, 0); | 928 | blks, 0); |
929 | 929 | ||
930 | return err; | 930 | return err; |
931 | } | 931 | } |
932 | 932 | ||
933 | /* | 933 | /* |
934 | * The ext4_ind_map_blocks() function handles non-extents inodes | 934 | * The ext4_ind_map_blocks() function handles non-extents inodes |
935 | * (i.e., using the traditional indirect/double-indirect i_blocks | 935 | * (i.e., using the traditional indirect/double-indirect i_blocks |
936 | * scheme) for ext4_map_blocks(). | 936 | * scheme) for ext4_map_blocks(). |
937 | * | 937 | * |
938 | * Allocation strategy is simple: if we have to allocate something, we will | 938 | * Allocation strategy is simple: if we have to allocate something, we will |
939 | * have to go the whole way to leaf. So let's do it before attaching anything | 939 | * have to go the whole way to leaf. So let's do it before attaching anything |
940 | * to tree, set linkage between the newborn blocks, write them if sync is | 940 | * to tree, set linkage between the newborn blocks, write them if sync is |
941 | * required, recheck the path, free and repeat if check fails, otherwise | 941 | * required, recheck the path, free and repeat if check fails, otherwise |
942 | * set the last missing link (that will protect us from any truncate-generated | 942 | * set the last missing link (that will protect us from any truncate-generated |
943 | * removals - all blocks on the path are immune now) and possibly force the | 943 | * removals - all blocks on the path are immune now) and possibly force the |
944 | * write on the parent block. | 944 | * write on the parent block. |
945 | * That has a nice additional property: no special recovery from the failed | 945 | * That has a nice additional property: no special recovery from the failed |
946 | * allocations is needed - we simply release blocks and do not touch anything | 946 | * allocations is needed - we simply release blocks and do not touch anything |
947 | * reachable from inode. | 947 | * reachable from inode. |
948 | * | 948 | * |
949 | * `handle' can be NULL if create == 0. | 949 | * `handle' can be NULL if create == 0. |
950 | * | 950 | * |
951 | * return > 0, # of blocks mapped or allocated. | 951 | * return > 0, # of blocks mapped or allocated. |
952 | * return = 0, if plain lookup failed. | 952 | * return = 0, if plain lookup failed. |
953 | * return < 0, error case. | 953 | * return < 0, error case. |
954 | * | 954 | * |
955 | * The ext4_ind_get_blocks() function should be called with | 955 | * The ext4_ind_get_blocks() function should be called with |
956 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem | 956 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem |
957 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or | 957 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or |
958 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system | 958 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system |
959 | * blocks. | 959 | * blocks. |
960 | */ | 960 | */ |
961 | static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | 961 | static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
962 | struct ext4_map_blocks *map, | 962 | struct ext4_map_blocks *map, |
963 | int flags) | 963 | int flags) |
964 | { | 964 | { |
965 | int err = -EIO; | 965 | int err = -EIO; |
966 | ext4_lblk_t offsets[4]; | 966 | ext4_lblk_t offsets[4]; |
967 | Indirect chain[4]; | 967 | Indirect chain[4]; |
968 | Indirect *partial; | 968 | Indirect *partial; |
969 | ext4_fsblk_t goal; | 969 | ext4_fsblk_t goal; |
970 | int indirect_blks; | 970 | int indirect_blks; |
971 | int blocks_to_boundary = 0; | 971 | int blocks_to_boundary = 0; |
972 | int depth; | 972 | int depth; |
973 | int count = 0; | 973 | int count = 0; |
974 | ext4_fsblk_t first_block = 0; | 974 | ext4_fsblk_t first_block = 0; |
975 | 975 | ||
976 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | 976 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); |
977 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | 977 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
978 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 978 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
979 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | 979 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
980 | &blocks_to_boundary); | 980 | &blocks_to_boundary); |
981 | 981 | ||
982 | if (depth == 0) | 982 | if (depth == 0) |
983 | goto out; | 983 | goto out; |
984 | 984 | ||
985 | partial = ext4_get_branch(inode, depth, offsets, chain, &err); | 985 | partial = ext4_get_branch(inode, depth, offsets, chain, &err); |
986 | 986 | ||
987 | /* Simplest case - block found, no allocation needed */ | 987 | /* Simplest case - block found, no allocation needed */ |
988 | if (!partial) { | 988 | if (!partial) { |
989 | first_block = le32_to_cpu(chain[depth - 1].key); | 989 | first_block = le32_to_cpu(chain[depth - 1].key); |
990 | count++; | 990 | count++; |
991 | /*map more blocks*/ | 991 | /*map more blocks*/ |
992 | while (count < map->m_len && count <= blocks_to_boundary) { | 992 | while (count < map->m_len && count <= blocks_to_boundary) { |
993 | ext4_fsblk_t blk; | 993 | ext4_fsblk_t blk; |
994 | 994 | ||
995 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | 995 | blk = le32_to_cpu(*(chain[depth-1].p + count)); |
996 | 996 | ||
997 | if (blk == first_block + count) | 997 | if (blk == first_block + count) |
998 | count++; | 998 | count++; |
999 | else | 999 | else |
1000 | break; | 1000 | break; |
1001 | } | 1001 | } |
1002 | goto got_it; | 1002 | goto got_it; |
1003 | } | 1003 | } |
1004 | 1004 | ||
1005 | /* Next simple case - plain lookup or failed read of indirect block */ | 1005 | /* Next simple case - plain lookup or failed read of indirect block */ |
1006 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) | 1006 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) |
1007 | goto cleanup; | 1007 | goto cleanup; |
1008 | 1008 | ||
1009 | /* | 1009 | /* |
1010 | * Okay, we need to do block allocation. | 1010 | * Okay, we need to do block allocation. |
1011 | */ | 1011 | */ |
1012 | goal = ext4_find_goal(inode, map->m_lblk, partial); | 1012 | goal = ext4_find_goal(inode, map->m_lblk, partial); |
1013 | 1013 | ||
1014 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 1014 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
1015 | indirect_blks = (chain + depth) - partial - 1; | 1015 | indirect_blks = (chain + depth) - partial - 1; |
1016 | 1016 | ||
1017 | /* | 1017 | /* |
1018 | * Next look up the indirect map to count the totoal number of | 1018 | * Next look up the indirect map to count the totoal number of |
1019 | * direct blocks to allocate for this branch. | 1019 | * direct blocks to allocate for this branch. |
1020 | */ | 1020 | */ |
1021 | count = ext4_blks_to_allocate(partial, indirect_blks, | 1021 | count = ext4_blks_to_allocate(partial, indirect_blks, |
1022 | map->m_len, blocks_to_boundary); | 1022 | map->m_len, blocks_to_boundary); |
1023 | /* | 1023 | /* |
1024 | * Block out ext4_truncate while we alter the tree | 1024 | * Block out ext4_truncate while we alter the tree |
1025 | */ | 1025 | */ |
1026 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, | 1026 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, |
1027 | &count, goal, | 1027 | &count, goal, |
1028 | offsets + (partial - chain), partial); | 1028 | offsets + (partial - chain), partial); |
1029 | 1029 | ||
1030 | /* | 1030 | /* |
1031 | * The ext4_splice_branch call will free and forget any buffers | 1031 | * The ext4_splice_branch call will free and forget any buffers |
1032 | * on the new chain if there is a failure, but that risks using | 1032 | * on the new chain if there is a failure, but that risks using |
1033 | * up transaction credits, especially for bitmaps where the | 1033 | * up transaction credits, especially for bitmaps where the |
1034 | * credits cannot be returned. Can we handle this somehow? We | 1034 | * credits cannot be returned. Can we handle this somehow? We |
1035 | * may need to return -EAGAIN upwards in the worst case. --sct | 1035 | * may need to return -EAGAIN upwards in the worst case. --sct |
1036 | */ | 1036 | */ |
1037 | if (!err) | 1037 | if (!err) |
1038 | err = ext4_splice_branch(handle, inode, map->m_lblk, | 1038 | err = ext4_splice_branch(handle, inode, map->m_lblk, |
1039 | partial, indirect_blks, count); | 1039 | partial, indirect_blks, count); |
1040 | if (err) | 1040 | if (err) |
1041 | goto cleanup; | 1041 | goto cleanup; |
1042 | 1042 | ||
1043 | map->m_flags |= EXT4_MAP_NEW; | 1043 | map->m_flags |= EXT4_MAP_NEW; |
1044 | 1044 | ||
1045 | ext4_update_inode_fsync_trans(handle, inode, 1); | 1045 | ext4_update_inode_fsync_trans(handle, inode, 1); |
1046 | got_it: | 1046 | got_it: |
1047 | map->m_flags |= EXT4_MAP_MAPPED; | 1047 | map->m_flags |= EXT4_MAP_MAPPED; |
1048 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | 1048 | map->m_pblk = le32_to_cpu(chain[depth-1].key); |
1049 | map->m_len = count; | 1049 | map->m_len = count; |
1050 | if (count > blocks_to_boundary) | 1050 | if (count > blocks_to_boundary) |
1051 | map->m_flags |= EXT4_MAP_BOUNDARY; | 1051 | map->m_flags |= EXT4_MAP_BOUNDARY; |
1052 | err = count; | 1052 | err = count; |
1053 | /* Clean up and exit */ | 1053 | /* Clean up and exit */ |
1054 | partial = chain + depth - 1; /* the whole chain */ | 1054 | partial = chain + depth - 1; /* the whole chain */ |
1055 | cleanup: | 1055 | cleanup: |
1056 | while (partial > chain) { | 1056 | while (partial > chain) { |
1057 | BUFFER_TRACE(partial->bh, "call brelse"); | 1057 | BUFFER_TRACE(partial->bh, "call brelse"); |
1058 | brelse(partial->bh); | 1058 | brelse(partial->bh); |
1059 | partial--; | 1059 | partial--; |
1060 | } | 1060 | } |
1061 | out: | 1061 | out: |
1062 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | 1062 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, |
1063 | map->m_pblk, map->m_len, err); | 1063 | map->m_pblk, map->m_len, err); |
1064 | return err; | 1064 | return err; |
1065 | } | 1065 | } |
1066 | 1066 | ||
1067 | #ifdef CONFIG_QUOTA | 1067 | #ifdef CONFIG_QUOTA |
1068 | qsize_t *ext4_get_reserved_space(struct inode *inode) | 1068 | qsize_t *ext4_get_reserved_space(struct inode *inode) |
1069 | { | 1069 | { |
1070 | return &EXT4_I(inode)->i_reserved_quota; | 1070 | return &EXT4_I(inode)->i_reserved_quota; |
1071 | } | 1071 | } |
1072 | #endif | 1072 | #endif |
1073 | 1073 | ||
1074 | /* | 1074 | /* |
1075 | * Calculate the number of metadata blocks need to reserve | 1075 | * Calculate the number of metadata blocks need to reserve |
1076 | * to allocate a new block at @lblocks for non extent file based file | 1076 | * to allocate a new block at @lblocks for non extent file based file |
1077 | */ | 1077 | */ |
1078 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, | 1078 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, |
1079 | sector_t lblock) | 1079 | sector_t lblock) |
1080 | { | 1080 | { |
1081 | struct ext4_inode_info *ei = EXT4_I(inode); | 1081 | struct ext4_inode_info *ei = EXT4_I(inode); |
1082 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); | 1082 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); |
1083 | int blk_bits; | 1083 | int blk_bits; |
1084 | 1084 | ||
1085 | if (lblock < EXT4_NDIR_BLOCKS) | 1085 | if (lblock < EXT4_NDIR_BLOCKS) |
1086 | return 0; | 1086 | return 0; |
1087 | 1087 | ||
1088 | lblock -= EXT4_NDIR_BLOCKS; | 1088 | lblock -= EXT4_NDIR_BLOCKS; |
1089 | 1089 | ||
1090 | if (ei->i_da_metadata_calc_len && | 1090 | if (ei->i_da_metadata_calc_len && |
1091 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | 1091 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { |
1092 | ei->i_da_metadata_calc_len++; | 1092 | ei->i_da_metadata_calc_len++; |
1093 | return 0; | 1093 | return 0; |
1094 | } | 1094 | } |
1095 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | 1095 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; |
1096 | ei->i_da_metadata_calc_len = 1; | 1096 | ei->i_da_metadata_calc_len = 1; |
1097 | blk_bits = order_base_2(lblock); | 1097 | blk_bits = order_base_2(lblock); |
1098 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | 1098 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | /* | 1101 | /* |
1102 | * Calculate the number of metadata blocks need to reserve | 1102 | * Calculate the number of metadata blocks need to reserve |
1103 | * to allocate a block located at @lblock | 1103 | * to allocate a block located at @lblock |
1104 | */ | 1104 | */ |
1105 | static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) | 1105 | static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) |
1106 | { | 1106 | { |
1107 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 1107 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1108 | return ext4_ext_calc_metadata_amount(inode, lblock); | 1108 | return ext4_ext_calc_metadata_amount(inode, lblock); |
1109 | 1109 | ||
1110 | return ext4_indirect_calc_metadata_amount(inode, lblock); | 1110 | return ext4_indirect_calc_metadata_amount(inode, lblock); |
1111 | } | 1111 | } |
1112 | 1112 | ||
1113 | /* | 1113 | /* |
1114 | * Called with i_data_sem down, which is important since we can call | 1114 | * Called with i_data_sem down, which is important since we can call |
1115 | * ext4_discard_preallocations() from here. | 1115 | * ext4_discard_preallocations() from here. |
1116 | */ | 1116 | */ |
1117 | void ext4_da_update_reserve_space(struct inode *inode, | 1117 | void ext4_da_update_reserve_space(struct inode *inode, |
1118 | int used, int quota_claim) | 1118 | int used, int quota_claim) |
1119 | { | 1119 | { |
1120 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1120 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1121 | struct ext4_inode_info *ei = EXT4_I(inode); | 1121 | struct ext4_inode_info *ei = EXT4_I(inode); |
1122 | 1122 | ||
1123 | spin_lock(&ei->i_block_reservation_lock); | 1123 | spin_lock(&ei->i_block_reservation_lock); |
1124 | trace_ext4_da_update_reserve_space(inode, used); | 1124 | trace_ext4_da_update_reserve_space(inode, used); |
1125 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 1125 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
1126 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 1126 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
1127 | "with only %d reserved data blocks\n", | 1127 | "with only %d reserved data blocks\n", |
1128 | __func__, inode->i_ino, used, | 1128 | __func__, inode->i_ino, used, |
1129 | ei->i_reserved_data_blocks); | 1129 | ei->i_reserved_data_blocks); |
1130 | WARN_ON(1); | 1130 | WARN_ON(1); |
1131 | used = ei->i_reserved_data_blocks; | 1131 | used = ei->i_reserved_data_blocks; |
1132 | } | 1132 | } |
1133 | 1133 | ||
1134 | /* Update per-inode reservations */ | 1134 | /* Update per-inode reservations */ |
1135 | ei->i_reserved_data_blocks -= used; | 1135 | ei->i_reserved_data_blocks -= used; |
1136 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | 1136 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; |
1137 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 1137 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1138 | used + ei->i_allocated_meta_blocks); | 1138 | used + ei->i_allocated_meta_blocks); |
1139 | ei->i_allocated_meta_blocks = 0; | 1139 | ei->i_allocated_meta_blocks = 0; |
1140 | 1140 | ||
1141 | if (ei->i_reserved_data_blocks == 0) { | 1141 | if (ei->i_reserved_data_blocks == 0) { |
1142 | /* | 1142 | /* |
1143 | * We can release all of the reserved metadata blocks | 1143 | * We can release all of the reserved metadata blocks |
1144 | * only when we have written all of the delayed | 1144 | * only when we have written all of the delayed |
1145 | * allocation blocks. | 1145 | * allocation blocks. |
1146 | */ | 1146 | */ |
1147 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 1147 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1148 | ei->i_reserved_meta_blocks); | 1148 | ei->i_reserved_meta_blocks); |
1149 | ei->i_reserved_meta_blocks = 0; | 1149 | ei->i_reserved_meta_blocks = 0; |
1150 | ei->i_da_metadata_calc_len = 0; | 1150 | ei->i_da_metadata_calc_len = 0; |
1151 | } | 1151 | } |
1152 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1152 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1153 | 1153 | ||
1154 | /* Update quota subsystem for data blocks */ | 1154 | /* Update quota subsystem for data blocks */ |
1155 | if (quota_claim) | 1155 | if (quota_claim) |
1156 | dquot_claim_block(inode, used); | 1156 | dquot_claim_block(inode, used); |
1157 | else { | 1157 | else { |
1158 | /* | 1158 | /* |
1159 | * We did fallocate with an offset that is already delayed | 1159 | * We did fallocate with an offset that is already delayed |
1160 | * allocated. So on delayed allocated writeback we should | 1160 | * allocated. So on delayed allocated writeback we should |
1161 | * not re-claim the quota for fallocated blocks. | 1161 | * not re-claim the quota for fallocated blocks. |
1162 | */ | 1162 | */ |
1163 | dquot_release_reservation_block(inode, used); | 1163 | dquot_release_reservation_block(inode, used); |
1164 | } | 1164 | } |
1165 | 1165 | ||
1166 | /* | 1166 | /* |
1167 | * If we have done all the pending block allocations and if | 1167 | * If we have done all the pending block allocations and if |
1168 | * there aren't any writers on the inode, we can discard the | 1168 | * there aren't any writers on the inode, we can discard the |
1169 | * inode's preallocations. | 1169 | * inode's preallocations. |
1170 | */ | 1170 | */ |
1171 | if ((ei->i_reserved_data_blocks == 0) && | 1171 | if ((ei->i_reserved_data_blocks == 0) && |
1172 | (atomic_read(&inode->i_writecount) == 0)) | 1172 | (atomic_read(&inode->i_writecount) == 0)) |
1173 | ext4_discard_preallocations(inode); | 1173 | ext4_discard_preallocations(inode); |
1174 | } | 1174 | } |
1175 | 1175 | ||
1176 | static int __check_block_validity(struct inode *inode, const char *func, | 1176 | static int __check_block_validity(struct inode *inode, const char *func, |
1177 | unsigned int line, | 1177 | unsigned int line, |
1178 | struct ext4_map_blocks *map) | 1178 | struct ext4_map_blocks *map) |
1179 | { | 1179 | { |
1180 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, | 1180 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, |
1181 | map->m_len)) { | 1181 | map->m_len)) { |
1182 | ext4_error_inode(inode, func, line, map->m_pblk, | 1182 | ext4_error_inode(inode, func, line, map->m_pblk, |
1183 | "lblock %lu mapped to illegal pblock " | 1183 | "lblock %lu mapped to illegal pblock " |
1184 | "(length %d)", (unsigned long) map->m_lblk, | 1184 | "(length %d)", (unsigned long) map->m_lblk, |
1185 | map->m_len); | 1185 | map->m_len); |
1186 | return -EIO; | 1186 | return -EIO; |
1187 | } | 1187 | } |
1188 | return 0; | 1188 | return 0; |
1189 | } | 1189 | } |
1190 | 1190 | ||
1191 | #define check_block_validity(inode, map) \ | 1191 | #define check_block_validity(inode, map) \ |
1192 | __check_block_validity((inode), __func__, __LINE__, (map)) | 1192 | __check_block_validity((inode), __func__, __LINE__, (map)) |
1193 | 1193 | ||
1194 | /* | 1194 | /* |
1195 | * Return the number of contiguous dirty pages in a given inode | 1195 | * Return the number of contiguous dirty pages in a given inode |
1196 | * starting at page frame idx. | 1196 | * starting at page frame idx. |
1197 | */ | 1197 | */ |
1198 | static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | 1198 | static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, |
1199 | unsigned int max_pages) | 1199 | unsigned int max_pages) |
1200 | { | 1200 | { |
1201 | struct address_space *mapping = inode->i_mapping; | 1201 | struct address_space *mapping = inode->i_mapping; |
1202 | pgoff_t index; | 1202 | pgoff_t index; |
1203 | struct pagevec pvec; | 1203 | struct pagevec pvec; |
1204 | pgoff_t num = 0; | 1204 | pgoff_t num = 0; |
1205 | int i, nr_pages, done = 0; | 1205 | int i, nr_pages, done = 0; |
1206 | 1206 | ||
1207 | if (max_pages == 0) | 1207 | if (max_pages == 0) |
1208 | return 0; | 1208 | return 0; |
1209 | pagevec_init(&pvec, 0); | 1209 | pagevec_init(&pvec, 0); |
1210 | while (!done) { | 1210 | while (!done) { |
1211 | index = idx; | 1211 | index = idx; |
1212 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 1212 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
1213 | PAGECACHE_TAG_DIRTY, | 1213 | PAGECACHE_TAG_DIRTY, |
1214 | (pgoff_t)PAGEVEC_SIZE); | 1214 | (pgoff_t)PAGEVEC_SIZE); |
1215 | if (nr_pages == 0) | 1215 | if (nr_pages == 0) |
1216 | break; | 1216 | break; |
1217 | for (i = 0; i < nr_pages; i++) { | 1217 | for (i = 0; i < nr_pages; i++) { |
1218 | struct page *page = pvec.pages[i]; | 1218 | struct page *page = pvec.pages[i]; |
1219 | struct buffer_head *bh, *head; | 1219 | struct buffer_head *bh, *head; |
1220 | 1220 | ||
1221 | lock_page(page); | 1221 | lock_page(page); |
1222 | if (unlikely(page->mapping != mapping) || | 1222 | if (unlikely(page->mapping != mapping) || |
1223 | !PageDirty(page) || | 1223 | !PageDirty(page) || |
1224 | PageWriteback(page) || | 1224 | PageWriteback(page) || |
1225 | page->index != idx) { | 1225 | page->index != idx) { |
1226 | done = 1; | 1226 | done = 1; |
1227 | unlock_page(page); | 1227 | unlock_page(page); |
1228 | break; | 1228 | break; |
1229 | } | 1229 | } |
1230 | if (page_has_buffers(page)) { | 1230 | if (page_has_buffers(page)) { |
1231 | bh = head = page_buffers(page); | 1231 | bh = head = page_buffers(page); |
1232 | do { | 1232 | do { |
1233 | if (!buffer_delay(bh) && | 1233 | if (!buffer_delay(bh) && |
1234 | !buffer_unwritten(bh)) | 1234 | !buffer_unwritten(bh)) |
1235 | done = 1; | 1235 | done = 1; |
1236 | bh = bh->b_this_page; | 1236 | bh = bh->b_this_page; |
1237 | } while (!done && (bh != head)); | 1237 | } while (!done && (bh != head)); |
1238 | } | 1238 | } |
1239 | unlock_page(page); | 1239 | unlock_page(page); |
1240 | if (done) | 1240 | if (done) |
1241 | break; | 1241 | break; |
1242 | idx++; | 1242 | idx++; |
1243 | num++; | 1243 | num++; |
1244 | if (num >= max_pages) { | 1244 | if (num >= max_pages) { |
1245 | done = 1; | 1245 | done = 1; |
1246 | break; | 1246 | break; |
1247 | } | 1247 | } |
1248 | } | 1248 | } |
1249 | pagevec_release(&pvec); | 1249 | pagevec_release(&pvec); |
1250 | } | 1250 | } |
1251 | return num; | 1251 | return num; |
1252 | } | 1252 | } |
1253 | 1253 | ||
1254 | /* | 1254 | /* |
1255 | * The ext4_map_blocks() function tries to look up the requested blocks, | 1255 | * The ext4_map_blocks() function tries to look up the requested blocks, |
1256 | * and returns if the blocks are already mapped. | 1256 | * and returns if the blocks are already mapped. |
1257 | * | 1257 | * |
1258 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks | 1258 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks |
1259 | * and store the allocated blocks in the result buffer head and mark it | 1259 | * and store the allocated blocks in the result buffer head and mark it |
1260 | * mapped. | 1260 | * mapped. |
1261 | * | 1261 | * |
1262 | * If file type is extents based, it will call ext4_ext_map_blocks(), | 1262 | * If file type is extents based, it will call ext4_ext_map_blocks(), |
1263 | * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping | 1263 | * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping |
1264 | * based files | 1264 | * based files |
1265 | * | 1265 | * |
1266 | * On success, it returns the number of blocks being mapped or allocate. | 1266 | * On success, it returns the number of blocks being mapped or allocate. |
1267 | * if create==0 and the blocks are pre-allocated and uninitialized block, | 1267 | * if create==0 and the blocks are pre-allocated and uninitialized block, |
1268 | * the result buffer head is unmapped. If the create ==1, it will make sure | 1268 | * the result buffer head is unmapped. If the create ==1, it will make sure |
1269 | * the buffer head is mapped. | 1269 | * the buffer head is mapped. |
1270 | * | 1270 | * |
1271 | * It returns 0 if plain look up failed (blocks have not been allocated), in | 1271 | * It returns 0 if plain look up failed (blocks have not been allocated), in |
1272 | * that casem, buffer head is unmapped | 1272 | * that casem, buffer head is unmapped |
1273 | * | 1273 | * |
1274 | * It returns the error in case of allocation failure. | 1274 | * It returns the error in case of allocation failure. |
1275 | */ | 1275 | */ |
1276 | int ext4_map_blocks(handle_t *handle, struct inode *inode, | 1276 | int ext4_map_blocks(handle_t *handle, struct inode *inode, |
1277 | struct ext4_map_blocks *map, int flags) | 1277 | struct ext4_map_blocks *map, int flags) |
1278 | { | 1278 | { |
1279 | int retval; | 1279 | int retval; |
1280 | 1280 | ||
1281 | map->m_flags = 0; | 1281 | map->m_flags = 0; |
1282 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," | 1282 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," |
1283 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | 1283 | "logical block %lu\n", inode->i_ino, flags, map->m_len, |
1284 | (unsigned long) map->m_lblk); | 1284 | (unsigned long) map->m_lblk); |
1285 | /* | 1285 | /* |
1286 | * Try to see if we can get the block without requesting a new | 1286 | * Try to see if we can get the block without requesting a new |
1287 | * file system block. | 1287 | * file system block. |
1288 | */ | 1288 | */ |
1289 | down_read((&EXT4_I(inode)->i_data_sem)); | 1289 | down_read((&EXT4_I(inode)->i_data_sem)); |
1290 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 1290 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
1291 | retval = ext4_ext_map_blocks(handle, inode, map, 0); | 1291 | retval = ext4_ext_map_blocks(handle, inode, map, 0); |
1292 | } else { | 1292 | } else { |
1293 | retval = ext4_ind_map_blocks(handle, inode, map, 0); | 1293 | retval = ext4_ind_map_blocks(handle, inode, map, 0); |
1294 | } | 1294 | } |
1295 | up_read((&EXT4_I(inode)->i_data_sem)); | 1295 | up_read((&EXT4_I(inode)->i_data_sem)); |
1296 | 1296 | ||
1297 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1297 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1298 | int ret = check_block_validity(inode, map); | 1298 | int ret = check_block_validity(inode, map); |
1299 | if (ret != 0) | 1299 | if (ret != 0) |
1300 | return ret; | 1300 | return ret; |
1301 | } | 1301 | } |
1302 | 1302 | ||
1303 | /* If it is only a block(s) look up */ | 1303 | /* If it is only a block(s) look up */ |
1304 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) | 1304 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) |
1305 | return retval; | 1305 | return retval; |
1306 | 1306 | ||
1307 | /* | 1307 | /* |
1308 | * Returns if the blocks have already allocated | 1308 | * Returns if the blocks have already allocated |
1309 | * | 1309 | * |
1310 | * Note that if blocks have been preallocated | 1310 | * Note that if blocks have been preallocated |
1311 | * ext4_ext_get_block() returns th create = 0 | 1311 | * ext4_ext_get_block() returns th create = 0 |
1312 | * with buffer head unmapped. | 1312 | * with buffer head unmapped. |
1313 | */ | 1313 | */ |
1314 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | 1314 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
1315 | return retval; | 1315 | return retval; |
1316 | 1316 | ||
1317 | /* | 1317 | /* |
1318 | * When we call get_blocks without the create flag, the | 1318 | * When we call get_blocks without the create flag, the |
1319 | * BH_Unwritten flag could have gotten set if the blocks | 1319 | * BH_Unwritten flag could have gotten set if the blocks |
1320 | * requested were part of a uninitialized extent. We need to | 1320 | * requested were part of a uninitialized extent. We need to |
1321 | * clear this flag now that we are committed to convert all or | 1321 | * clear this flag now that we are committed to convert all or |
1322 | * part of the uninitialized extent to be an initialized | 1322 | * part of the uninitialized extent to be an initialized |
1323 | * extent. This is because we need to avoid the combination | 1323 | * extent. This is because we need to avoid the combination |
1324 | * of BH_Unwritten and BH_Mapped flags being simultaneously | 1324 | * of BH_Unwritten and BH_Mapped flags being simultaneously |
1325 | * set on the buffer_head. | 1325 | * set on the buffer_head. |
1326 | */ | 1326 | */ |
1327 | map->m_flags &= ~EXT4_MAP_UNWRITTEN; | 1327 | map->m_flags &= ~EXT4_MAP_UNWRITTEN; |
1328 | 1328 | ||
1329 | /* | 1329 | /* |
1330 | * New blocks allocate and/or writing to uninitialized extent | 1330 | * New blocks allocate and/or writing to uninitialized extent |
1331 | * will possibly result in updating i_data, so we take | 1331 | * will possibly result in updating i_data, so we take |
1332 | * the write lock of i_data_sem, and call get_blocks() | 1332 | * the write lock of i_data_sem, and call get_blocks() |
1333 | * with create == 1 flag. | 1333 | * with create == 1 flag. |
1334 | */ | 1334 | */ |
1335 | down_write((&EXT4_I(inode)->i_data_sem)); | 1335 | down_write((&EXT4_I(inode)->i_data_sem)); |
1336 | 1336 | ||
1337 | /* | 1337 | /* |
1338 | * if the caller is from delayed allocation writeout path | 1338 | * if the caller is from delayed allocation writeout path |
1339 | * we have already reserved fs blocks for allocation | 1339 | * we have already reserved fs blocks for allocation |
1340 | * let the underlying get_block() function know to | 1340 | * let the underlying get_block() function know to |
1341 | * avoid double accounting | 1341 | * avoid double accounting |
1342 | */ | 1342 | */ |
1343 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 1343 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1344 | ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 1344 | ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
1345 | /* | 1345 | /* |
1346 | * We need to check for EXT4 here because migrate | 1346 | * We need to check for EXT4 here because migrate |
1347 | * could have changed the inode type in between | 1347 | * could have changed the inode type in between |
1348 | */ | 1348 | */ |
1349 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 1349 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
1350 | retval = ext4_ext_map_blocks(handle, inode, map, flags); | 1350 | retval = ext4_ext_map_blocks(handle, inode, map, flags); |
1351 | } else { | 1351 | } else { |
1352 | retval = ext4_ind_map_blocks(handle, inode, map, flags); | 1352 | retval = ext4_ind_map_blocks(handle, inode, map, flags); |
1353 | 1353 | ||
1354 | if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { | 1354 | if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { |
1355 | /* | 1355 | /* |
1356 | * We allocated new blocks which will result in | 1356 | * We allocated new blocks which will result in |
1357 | * i_data's format changing. Force the migrate | 1357 | * i_data's format changing. Force the migrate |
1358 | * to fail by clearing migrate flags | 1358 | * to fail by clearing migrate flags |
1359 | */ | 1359 | */ |
1360 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); | 1360 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); |
1361 | } | 1361 | } |
1362 | 1362 | ||
1363 | /* | 1363 | /* |
1364 | * Update reserved blocks/metadata blocks after successful | 1364 | * Update reserved blocks/metadata blocks after successful |
1365 | * block allocation which had been deferred till now. We don't | 1365 | * block allocation which had been deferred till now. We don't |
1366 | * support fallocate for non extent files. So we can update | 1366 | * support fallocate for non extent files. So we can update |
1367 | * reserve space here. | 1367 | * reserve space here. |
1368 | */ | 1368 | */ |
1369 | if ((retval > 0) && | 1369 | if ((retval > 0) && |
1370 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | 1370 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) |
1371 | ext4_da_update_reserve_space(inode, retval, 1); | 1371 | ext4_da_update_reserve_space(inode, retval, 1); |
1372 | } | 1372 | } |
1373 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 1373 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1374 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 1374 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
1375 | 1375 | ||
1376 | up_write((&EXT4_I(inode)->i_data_sem)); | 1376 | up_write((&EXT4_I(inode)->i_data_sem)); |
1377 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1377 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1378 | int ret = check_block_validity(inode, map); | 1378 | int ret = check_block_validity(inode, map); |
1379 | if (ret != 0) | 1379 | if (ret != 0) |
1380 | return ret; | 1380 | return ret; |
1381 | } | 1381 | } |
1382 | return retval; | 1382 | return retval; |
1383 | } | 1383 | } |
1384 | 1384 | ||
1385 | /* Maximum number of blocks we map for direct IO at once. */ | 1385 | /* Maximum number of blocks we map for direct IO at once. */ |
1386 | #define DIO_MAX_BLOCKS 4096 | 1386 | #define DIO_MAX_BLOCKS 4096 |
1387 | 1387 | ||
1388 | static int _ext4_get_block(struct inode *inode, sector_t iblock, | 1388 | static int _ext4_get_block(struct inode *inode, sector_t iblock, |
1389 | struct buffer_head *bh, int flags) | 1389 | struct buffer_head *bh, int flags) |
1390 | { | 1390 | { |
1391 | handle_t *handle = ext4_journal_current_handle(); | 1391 | handle_t *handle = ext4_journal_current_handle(); |
1392 | struct ext4_map_blocks map; | 1392 | struct ext4_map_blocks map; |
1393 | int ret = 0, started = 0; | 1393 | int ret = 0, started = 0; |
1394 | int dio_credits; | 1394 | int dio_credits; |
1395 | 1395 | ||
1396 | map.m_lblk = iblock; | 1396 | map.m_lblk = iblock; |
1397 | map.m_len = bh->b_size >> inode->i_blkbits; | 1397 | map.m_len = bh->b_size >> inode->i_blkbits; |
1398 | 1398 | ||
1399 | if (flags && !handle) { | 1399 | if (flags && !handle) { |
1400 | /* Direct IO write... */ | 1400 | /* Direct IO write... */ |
1401 | if (map.m_len > DIO_MAX_BLOCKS) | 1401 | if (map.m_len > DIO_MAX_BLOCKS) |
1402 | map.m_len = DIO_MAX_BLOCKS; | 1402 | map.m_len = DIO_MAX_BLOCKS; |
1403 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); | 1403 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); |
1404 | handle = ext4_journal_start(inode, dio_credits); | 1404 | handle = ext4_journal_start(inode, dio_credits); |
1405 | if (IS_ERR(handle)) { | 1405 | if (IS_ERR(handle)) { |
1406 | ret = PTR_ERR(handle); | 1406 | ret = PTR_ERR(handle); |
1407 | return ret; | 1407 | return ret; |
1408 | } | 1408 | } |
1409 | started = 1; | 1409 | started = 1; |
1410 | } | 1410 | } |
1411 | 1411 | ||
1412 | ret = ext4_map_blocks(handle, inode, &map, flags); | 1412 | ret = ext4_map_blocks(handle, inode, &map, flags); |
1413 | if (ret > 0) { | 1413 | if (ret > 0) { |
1414 | map_bh(bh, inode->i_sb, map.m_pblk); | 1414 | map_bh(bh, inode->i_sb, map.m_pblk); |
1415 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 1415 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
1416 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 1416 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
1417 | ret = 0; | 1417 | ret = 0; |
1418 | } | 1418 | } |
1419 | if (started) | 1419 | if (started) |
1420 | ext4_journal_stop(handle); | 1420 | ext4_journal_stop(handle); |
1421 | return ret; | 1421 | return ret; |
1422 | } | 1422 | } |
1423 | 1423 | ||
1424 | int ext4_get_block(struct inode *inode, sector_t iblock, | 1424 | int ext4_get_block(struct inode *inode, sector_t iblock, |
1425 | struct buffer_head *bh, int create) | 1425 | struct buffer_head *bh, int create) |
1426 | { | 1426 | { |
1427 | return _ext4_get_block(inode, iblock, bh, | 1427 | return _ext4_get_block(inode, iblock, bh, |
1428 | create ? EXT4_GET_BLOCKS_CREATE : 0); | 1428 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
1429 | } | 1429 | } |
1430 | 1430 | ||
1431 | /* | 1431 | /* |
1432 | * `handle' can be NULL if create is zero | 1432 | * `handle' can be NULL if create is zero |
1433 | */ | 1433 | */ |
1434 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | 1434 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, |
1435 | ext4_lblk_t block, int create, int *errp) | 1435 | ext4_lblk_t block, int create, int *errp) |
1436 | { | 1436 | { |
1437 | struct ext4_map_blocks map; | 1437 | struct ext4_map_blocks map; |
1438 | struct buffer_head *bh; | 1438 | struct buffer_head *bh; |
1439 | int fatal = 0, err; | 1439 | int fatal = 0, err; |
1440 | 1440 | ||
1441 | J_ASSERT(handle != NULL || create == 0); | 1441 | J_ASSERT(handle != NULL || create == 0); |
1442 | 1442 | ||
1443 | map.m_lblk = block; | 1443 | map.m_lblk = block; |
1444 | map.m_len = 1; | 1444 | map.m_len = 1; |
1445 | err = ext4_map_blocks(handle, inode, &map, | 1445 | err = ext4_map_blocks(handle, inode, &map, |
1446 | create ? EXT4_GET_BLOCKS_CREATE : 0); | 1446 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
1447 | 1447 | ||
1448 | if (err < 0) | 1448 | if (err < 0) |
1449 | *errp = err; | 1449 | *errp = err; |
1450 | if (err <= 0) | 1450 | if (err <= 0) |
1451 | return NULL; | 1451 | return NULL; |
1452 | *errp = 0; | 1452 | *errp = 0; |
1453 | 1453 | ||
1454 | bh = sb_getblk(inode->i_sb, map.m_pblk); | 1454 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
1455 | if (!bh) { | 1455 | if (!bh) { |
1456 | *errp = -EIO; | 1456 | *errp = -EIO; |
1457 | return NULL; | 1457 | return NULL; |
1458 | } | 1458 | } |
1459 | if (map.m_flags & EXT4_MAP_NEW) { | 1459 | if (map.m_flags & EXT4_MAP_NEW) { |
1460 | J_ASSERT(create != 0); | 1460 | J_ASSERT(create != 0); |
1461 | J_ASSERT(handle != NULL); | 1461 | J_ASSERT(handle != NULL); |
1462 | 1462 | ||
1463 | /* | 1463 | /* |
1464 | * Now that we do not always journal data, we should | 1464 | * Now that we do not always journal data, we should |
1465 | * keep in mind whether this should always journal the | 1465 | * keep in mind whether this should always journal the |
1466 | * new buffer as metadata. For now, regular file | 1466 | * new buffer as metadata. For now, regular file |
1467 | * writes use ext4_get_block instead, so it's not a | 1467 | * writes use ext4_get_block instead, so it's not a |
1468 | * problem. | 1468 | * problem. |
1469 | */ | 1469 | */ |
1470 | lock_buffer(bh); | 1470 | lock_buffer(bh); |
1471 | BUFFER_TRACE(bh, "call get_create_access"); | 1471 | BUFFER_TRACE(bh, "call get_create_access"); |
1472 | fatal = ext4_journal_get_create_access(handle, bh); | 1472 | fatal = ext4_journal_get_create_access(handle, bh); |
1473 | if (!fatal && !buffer_uptodate(bh)) { | 1473 | if (!fatal && !buffer_uptodate(bh)) { |
1474 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); | 1474 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
1475 | set_buffer_uptodate(bh); | 1475 | set_buffer_uptodate(bh); |
1476 | } | 1476 | } |
1477 | unlock_buffer(bh); | 1477 | unlock_buffer(bh); |
1478 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 1478 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
1479 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 1479 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
1480 | if (!fatal) | 1480 | if (!fatal) |
1481 | fatal = err; | 1481 | fatal = err; |
1482 | } else { | 1482 | } else { |
1483 | BUFFER_TRACE(bh, "not a new buffer"); | 1483 | BUFFER_TRACE(bh, "not a new buffer"); |
1484 | } | 1484 | } |
1485 | if (fatal) { | 1485 | if (fatal) { |
1486 | *errp = fatal; | 1486 | *errp = fatal; |
1487 | brelse(bh); | 1487 | brelse(bh); |
1488 | bh = NULL; | 1488 | bh = NULL; |
1489 | } | 1489 | } |
1490 | return bh; | 1490 | return bh; |
1491 | } | 1491 | } |
1492 | 1492 | ||
1493 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1493 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
1494 | ext4_lblk_t block, int create, int *err) | 1494 | ext4_lblk_t block, int create, int *err) |
1495 | { | 1495 | { |
1496 | struct buffer_head *bh; | 1496 | struct buffer_head *bh; |
1497 | 1497 | ||
1498 | bh = ext4_getblk(handle, inode, block, create, err); | 1498 | bh = ext4_getblk(handle, inode, block, create, err); |
1499 | if (!bh) | 1499 | if (!bh) |
1500 | return bh; | 1500 | return bh; |
1501 | if (buffer_uptodate(bh)) | 1501 | if (buffer_uptodate(bh)) |
1502 | return bh; | 1502 | return bh; |
1503 | ll_rw_block(READ_META, 1, &bh); | 1503 | ll_rw_block(READ_META, 1, &bh); |
1504 | wait_on_buffer(bh); | 1504 | wait_on_buffer(bh); |
1505 | if (buffer_uptodate(bh)) | 1505 | if (buffer_uptodate(bh)) |
1506 | return bh; | 1506 | return bh; |
1507 | put_bh(bh); | 1507 | put_bh(bh); |
1508 | *err = -EIO; | 1508 | *err = -EIO; |
1509 | return NULL; | 1509 | return NULL; |
1510 | } | 1510 | } |
1511 | 1511 | ||
1512 | static int walk_page_buffers(handle_t *handle, | 1512 | static int walk_page_buffers(handle_t *handle, |
1513 | struct buffer_head *head, | 1513 | struct buffer_head *head, |
1514 | unsigned from, | 1514 | unsigned from, |
1515 | unsigned to, | 1515 | unsigned to, |
1516 | int *partial, | 1516 | int *partial, |
1517 | int (*fn)(handle_t *handle, | 1517 | int (*fn)(handle_t *handle, |
1518 | struct buffer_head *bh)) | 1518 | struct buffer_head *bh)) |
1519 | { | 1519 | { |
1520 | struct buffer_head *bh; | 1520 | struct buffer_head *bh; |
1521 | unsigned block_start, block_end; | 1521 | unsigned block_start, block_end; |
1522 | unsigned blocksize = head->b_size; | 1522 | unsigned blocksize = head->b_size; |
1523 | int err, ret = 0; | 1523 | int err, ret = 0; |
1524 | struct buffer_head *next; | 1524 | struct buffer_head *next; |
1525 | 1525 | ||
1526 | for (bh = head, block_start = 0; | 1526 | for (bh = head, block_start = 0; |
1527 | ret == 0 && (bh != head || !block_start); | 1527 | ret == 0 && (bh != head || !block_start); |
1528 | block_start = block_end, bh = next) { | 1528 | block_start = block_end, bh = next) { |
1529 | next = bh->b_this_page; | 1529 | next = bh->b_this_page; |
1530 | block_end = block_start + blocksize; | 1530 | block_end = block_start + blocksize; |
1531 | if (block_end <= from || block_start >= to) { | 1531 | if (block_end <= from || block_start >= to) { |
1532 | if (partial && !buffer_uptodate(bh)) | 1532 | if (partial && !buffer_uptodate(bh)) |
1533 | *partial = 1; | 1533 | *partial = 1; |
1534 | continue; | 1534 | continue; |
1535 | } | 1535 | } |
1536 | err = (*fn)(handle, bh); | 1536 | err = (*fn)(handle, bh); |
1537 | if (!ret) | 1537 | if (!ret) |
1538 | ret = err; | 1538 | ret = err; |
1539 | } | 1539 | } |
1540 | return ret; | 1540 | return ret; |
1541 | } | 1541 | } |
1542 | 1542 | ||
1543 | /* | 1543 | /* |
1544 | * To preserve ordering, it is essential that the hole instantiation and | 1544 | * To preserve ordering, it is essential that the hole instantiation and |
1545 | * the data write be encapsulated in a single transaction. We cannot | 1545 | * the data write be encapsulated in a single transaction. We cannot |
1546 | * close off a transaction and start a new one between the ext4_get_block() | 1546 | * close off a transaction and start a new one between the ext4_get_block() |
1547 | * and the commit_write(). So doing the jbd2_journal_start at the start of | 1547 | * and the commit_write(). So doing the jbd2_journal_start at the start of |
1548 | * prepare_write() is the right place. | 1548 | * prepare_write() is the right place. |
1549 | * | 1549 | * |
1550 | * Also, this function can nest inside ext4_writepage() -> | 1550 | * Also, this function can nest inside ext4_writepage() -> |
1551 | * block_write_full_page(). In that case, we *know* that ext4_writepage() | 1551 | * block_write_full_page(). In that case, we *know* that ext4_writepage() |
1552 | * has generated enough buffer credits to do the whole page. So we won't | 1552 | * has generated enough buffer credits to do the whole page. So we won't |
1553 | * block on the journal in that case, which is good, because the caller may | 1553 | * block on the journal in that case, which is good, because the caller may |
1554 | * be PF_MEMALLOC. | 1554 | * be PF_MEMALLOC. |
1555 | * | 1555 | * |
1556 | * By accident, ext4 can be reentered when a transaction is open via | 1556 | * By accident, ext4 can be reentered when a transaction is open via |
1557 | * quota file writes. If we were to commit the transaction while thus | 1557 | * quota file writes. If we were to commit the transaction while thus |
1558 | * reentered, there can be a deadlock - we would be holding a quota | 1558 | * reentered, there can be a deadlock - we would be holding a quota |
1559 | * lock, and the commit would never complete if another thread had a | 1559 | * lock, and the commit would never complete if another thread had a |
1560 | * transaction open and was blocking on the quota lock - a ranking | 1560 | * transaction open and was blocking on the quota lock - a ranking |
1561 | * violation. | 1561 | * violation. |
1562 | * | 1562 | * |
1563 | * So what we do is to rely on the fact that jbd2_journal_stop/journal_start | 1563 | * So what we do is to rely on the fact that jbd2_journal_stop/journal_start |
1564 | * will _not_ run commit under these circumstances because handle->h_ref | 1564 | * will _not_ run commit under these circumstances because handle->h_ref |
1565 | * is elevated. We'll still have enough credits for the tiny quotafile | 1565 | * is elevated. We'll still have enough credits for the tiny quotafile |
1566 | * write. | 1566 | * write. |
1567 | */ | 1567 | */ |
1568 | static int do_journal_get_write_access(handle_t *handle, | 1568 | static int do_journal_get_write_access(handle_t *handle, |
1569 | struct buffer_head *bh) | 1569 | struct buffer_head *bh) |
1570 | { | 1570 | { |
1571 | int dirty = buffer_dirty(bh); | 1571 | int dirty = buffer_dirty(bh); |
1572 | int ret; | 1572 | int ret; |
1573 | 1573 | ||
1574 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1574 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1575 | return 0; | 1575 | return 0; |
1576 | /* | 1576 | /* |
1577 | * __block_write_begin() could have dirtied some buffers. Clean | 1577 | * __block_write_begin() could have dirtied some buffers. Clean |
1578 | * the dirty bit as jbd2_journal_get_write_access() could complain | 1578 | * the dirty bit as jbd2_journal_get_write_access() could complain |
1579 | * otherwise about fs integrity issues. Setting of the dirty bit | 1579 | * otherwise about fs integrity issues. Setting of the dirty bit |
1580 | * by __block_write_begin() isn't a real problem here as we clear | 1580 | * by __block_write_begin() isn't a real problem here as we clear |
1581 | * the bit before releasing a page lock and thus writeback cannot | 1581 | * the bit before releasing a page lock and thus writeback cannot |
1582 | * ever write the buffer. | 1582 | * ever write the buffer. |
1583 | */ | 1583 | */ |
1584 | if (dirty) | 1584 | if (dirty) |
1585 | clear_buffer_dirty(bh); | 1585 | clear_buffer_dirty(bh); |
1586 | ret = ext4_journal_get_write_access(handle, bh); | 1586 | ret = ext4_journal_get_write_access(handle, bh); |
1587 | if (!ret && dirty) | 1587 | if (!ret && dirty) |
1588 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); | 1588 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); |
1589 | return ret; | 1589 | return ret; |
1590 | } | 1590 | } |
1591 | 1591 | ||
1592 | /* | 1592 | /* |
1593 | * Truncate blocks that were not used by write. We have to truncate the | 1593 | * Truncate blocks that were not used by write. We have to truncate the |
1594 | * pagecache as well so that corresponding buffers get properly unmapped. | 1594 | * pagecache as well so that corresponding buffers get properly unmapped. |
1595 | */ | 1595 | */ |
1596 | static void ext4_truncate_failed_write(struct inode *inode) | 1596 | static void ext4_truncate_failed_write(struct inode *inode) |
1597 | { | 1597 | { |
1598 | truncate_inode_pages(inode->i_mapping, inode->i_size); | 1598 | truncate_inode_pages(inode->i_mapping, inode->i_size); |
1599 | ext4_truncate(inode); | 1599 | ext4_truncate(inode); |
1600 | } | 1600 | } |
1601 | 1601 | ||
1602 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 1602 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
1603 | struct buffer_head *bh_result, int create); | 1603 | struct buffer_head *bh_result, int create); |
1604 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 1604 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
1605 | loff_t pos, unsigned len, unsigned flags, | 1605 | loff_t pos, unsigned len, unsigned flags, |
1606 | struct page **pagep, void **fsdata) | 1606 | struct page **pagep, void **fsdata) |
1607 | { | 1607 | { |
1608 | struct inode *inode = mapping->host; | 1608 | struct inode *inode = mapping->host; |
1609 | int ret, needed_blocks; | 1609 | int ret, needed_blocks; |
1610 | handle_t *handle; | 1610 | handle_t *handle; |
1611 | int retries = 0; | 1611 | int retries = 0; |
1612 | struct page *page; | 1612 | struct page *page; |
1613 | pgoff_t index; | 1613 | pgoff_t index; |
1614 | unsigned from, to; | 1614 | unsigned from, to; |
1615 | 1615 | ||
1616 | trace_ext4_write_begin(inode, pos, len, flags); | 1616 | trace_ext4_write_begin(inode, pos, len, flags); |
1617 | /* | 1617 | /* |
1618 | * Reserve one block more for addition to orphan list in case | 1618 | * Reserve one block more for addition to orphan list in case |
1619 | * we allocate blocks but write fails for some reason | 1619 | * we allocate blocks but write fails for some reason |
1620 | */ | 1620 | */ |
1621 | needed_blocks = ext4_writepage_trans_blocks(inode) + 1; | 1621 | needed_blocks = ext4_writepage_trans_blocks(inode) + 1; |
1622 | index = pos >> PAGE_CACHE_SHIFT; | 1622 | index = pos >> PAGE_CACHE_SHIFT; |
1623 | from = pos & (PAGE_CACHE_SIZE - 1); | 1623 | from = pos & (PAGE_CACHE_SIZE - 1); |
1624 | to = from + len; | 1624 | to = from + len; |
1625 | 1625 | ||
1626 | retry: | 1626 | retry: |
1627 | handle = ext4_journal_start(inode, needed_blocks); | 1627 | handle = ext4_journal_start(inode, needed_blocks); |
1628 | if (IS_ERR(handle)) { | 1628 | if (IS_ERR(handle)) { |
1629 | ret = PTR_ERR(handle); | 1629 | ret = PTR_ERR(handle); |
1630 | goto out; | 1630 | goto out; |
1631 | } | 1631 | } |
1632 | 1632 | ||
1633 | /* We cannot recurse into the filesystem as the transaction is already | 1633 | /* We cannot recurse into the filesystem as the transaction is already |
1634 | * started */ | 1634 | * started */ |
1635 | flags |= AOP_FLAG_NOFS; | 1635 | flags |= AOP_FLAG_NOFS; |
1636 | 1636 | ||
1637 | page = grab_cache_page_write_begin(mapping, index, flags); | 1637 | page = grab_cache_page_write_begin(mapping, index, flags); |
1638 | if (!page) { | 1638 | if (!page) { |
1639 | ext4_journal_stop(handle); | 1639 | ext4_journal_stop(handle); |
1640 | ret = -ENOMEM; | 1640 | ret = -ENOMEM; |
1641 | goto out; | 1641 | goto out; |
1642 | } | 1642 | } |
1643 | *pagep = page; | 1643 | *pagep = page; |
1644 | 1644 | ||
1645 | if (ext4_should_dioread_nolock(inode)) | 1645 | if (ext4_should_dioread_nolock(inode)) |
1646 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); | 1646 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); |
1647 | else | 1647 | else |
1648 | ret = __block_write_begin(page, pos, len, ext4_get_block); | 1648 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
1649 | 1649 | ||
1650 | if (!ret && ext4_should_journal_data(inode)) { | 1650 | if (!ret && ext4_should_journal_data(inode)) { |
1651 | ret = walk_page_buffers(handle, page_buffers(page), | 1651 | ret = walk_page_buffers(handle, page_buffers(page), |
1652 | from, to, NULL, do_journal_get_write_access); | 1652 | from, to, NULL, do_journal_get_write_access); |
1653 | } | 1653 | } |
1654 | 1654 | ||
1655 | if (ret) { | 1655 | if (ret) { |
1656 | unlock_page(page); | 1656 | unlock_page(page); |
1657 | page_cache_release(page); | 1657 | page_cache_release(page); |
1658 | /* | 1658 | /* |
1659 | * __block_write_begin may have instantiated a few blocks | 1659 | * __block_write_begin may have instantiated a few blocks |
1660 | * outside i_size. Trim these off again. Don't need | 1660 | * outside i_size. Trim these off again. Don't need |
1661 | * i_size_read because we hold i_mutex. | 1661 | * i_size_read because we hold i_mutex. |
1662 | * | 1662 | * |
1663 | * Add inode to orphan list in case we crash before | 1663 | * Add inode to orphan list in case we crash before |
1664 | * truncate finishes | 1664 | * truncate finishes |
1665 | */ | 1665 | */ |
1666 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1666 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1667 | ext4_orphan_add(handle, inode); | 1667 | ext4_orphan_add(handle, inode); |
1668 | 1668 | ||
1669 | ext4_journal_stop(handle); | 1669 | ext4_journal_stop(handle); |
1670 | if (pos + len > inode->i_size) { | 1670 | if (pos + len > inode->i_size) { |
1671 | ext4_truncate_failed_write(inode); | 1671 | ext4_truncate_failed_write(inode); |
1672 | /* | 1672 | /* |
1673 | * If truncate failed early the inode might | 1673 | * If truncate failed early the inode might |
1674 | * still be on the orphan list; we need to | 1674 | * still be on the orphan list; we need to |
1675 | * make sure the inode is removed from the | 1675 | * make sure the inode is removed from the |
1676 | * orphan list in that case. | 1676 | * orphan list in that case. |
1677 | */ | 1677 | */ |
1678 | if (inode->i_nlink) | 1678 | if (inode->i_nlink) |
1679 | ext4_orphan_del(NULL, inode); | 1679 | ext4_orphan_del(NULL, inode); |
1680 | } | 1680 | } |
1681 | } | 1681 | } |
1682 | 1682 | ||
1683 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 1683 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
1684 | goto retry; | 1684 | goto retry; |
1685 | out: | 1685 | out: |
1686 | return ret; | 1686 | return ret; |
1687 | } | 1687 | } |
1688 | 1688 | ||
1689 | /* For write_end() in data=journal mode */ | 1689 | /* For write_end() in data=journal mode */ |
1690 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) | 1690 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) |
1691 | { | 1691 | { |
1692 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1692 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1693 | return 0; | 1693 | return 0; |
1694 | set_buffer_uptodate(bh); | 1694 | set_buffer_uptodate(bh); |
1695 | return ext4_handle_dirty_metadata(handle, NULL, bh); | 1695 | return ext4_handle_dirty_metadata(handle, NULL, bh); |
1696 | } | 1696 | } |
1697 | 1697 | ||
1698 | static int ext4_generic_write_end(struct file *file, | 1698 | static int ext4_generic_write_end(struct file *file, |
1699 | struct address_space *mapping, | 1699 | struct address_space *mapping, |
1700 | loff_t pos, unsigned len, unsigned copied, | 1700 | loff_t pos, unsigned len, unsigned copied, |
1701 | struct page *page, void *fsdata) | 1701 | struct page *page, void *fsdata) |
1702 | { | 1702 | { |
1703 | int i_size_changed = 0; | 1703 | int i_size_changed = 0; |
1704 | struct inode *inode = mapping->host; | 1704 | struct inode *inode = mapping->host; |
1705 | handle_t *handle = ext4_journal_current_handle(); | 1705 | handle_t *handle = ext4_journal_current_handle(); |
1706 | 1706 | ||
1707 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1707 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1708 | 1708 | ||
1709 | /* | 1709 | /* |
1710 | * No need to use i_size_read() here, the i_size | 1710 | * No need to use i_size_read() here, the i_size |
1711 | * cannot change under us because we hold i_mutex. | 1711 | * cannot change under us because we hold i_mutex. |
1712 | * | 1712 | * |
1713 | * But it's important to update i_size while still holding page lock: | 1713 | * But it's important to update i_size while still holding page lock: |
1714 | * page writeout could otherwise come in and zero beyond i_size. | 1714 | * page writeout could otherwise come in and zero beyond i_size. |
1715 | */ | 1715 | */ |
1716 | if (pos + copied > inode->i_size) { | 1716 | if (pos + copied > inode->i_size) { |
1717 | i_size_write(inode, pos + copied); | 1717 | i_size_write(inode, pos + copied); |
1718 | i_size_changed = 1; | 1718 | i_size_changed = 1; |
1719 | } | 1719 | } |
1720 | 1720 | ||
1721 | if (pos + copied > EXT4_I(inode)->i_disksize) { | 1721 | if (pos + copied > EXT4_I(inode)->i_disksize) { |
1722 | /* We need to mark inode dirty even if | 1722 | /* We need to mark inode dirty even if |
1723 | * new_i_size is less that inode->i_size | 1723 | * new_i_size is less that inode->i_size |
1724 | * bu greater than i_disksize.(hint delalloc) | 1724 | * bu greater than i_disksize.(hint delalloc) |
1725 | */ | 1725 | */ |
1726 | ext4_update_i_disksize(inode, (pos + copied)); | 1726 | ext4_update_i_disksize(inode, (pos + copied)); |
1727 | i_size_changed = 1; | 1727 | i_size_changed = 1; |
1728 | } | 1728 | } |
1729 | unlock_page(page); | 1729 | unlock_page(page); |
1730 | page_cache_release(page); | 1730 | page_cache_release(page); |
1731 | 1731 | ||
1732 | /* | 1732 | /* |
1733 | * Don't mark the inode dirty under page lock. First, it unnecessarily | 1733 | * Don't mark the inode dirty under page lock. First, it unnecessarily |
1734 | * makes the holding time of page lock longer. Second, it forces lock | 1734 | * makes the holding time of page lock longer. Second, it forces lock |
1735 | * ordering of page lock and transaction start for journaling | 1735 | * ordering of page lock and transaction start for journaling |
1736 | * filesystems. | 1736 | * filesystems. |
1737 | */ | 1737 | */ |
1738 | if (i_size_changed) | 1738 | if (i_size_changed) |
1739 | ext4_mark_inode_dirty(handle, inode); | 1739 | ext4_mark_inode_dirty(handle, inode); |
1740 | 1740 | ||
1741 | return copied; | 1741 | return copied; |
1742 | } | 1742 | } |
1743 | 1743 | ||
1744 | /* | 1744 | /* |
1745 | * We need to pick up the new inode size which generic_commit_write gave us | 1745 | * We need to pick up the new inode size which generic_commit_write gave us |
1746 | * `file' can be NULL - eg, when called from page_symlink(). | 1746 | * `file' can be NULL - eg, when called from page_symlink(). |
1747 | * | 1747 | * |
1748 | * ext4 never places buffers on inode->i_mapping->private_list. metadata | 1748 | * ext4 never places buffers on inode->i_mapping->private_list. metadata |
1749 | * buffers are managed internally. | 1749 | * buffers are managed internally. |
1750 | */ | 1750 | */ |
1751 | static int ext4_ordered_write_end(struct file *file, | 1751 | static int ext4_ordered_write_end(struct file *file, |
1752 | struct address_space *mapping, | 1752 | struct address_space *mapping, |
1753 | loff_t pos, unsigned len, unsigned copied, | 1753 | loff_t pos, unsigned len, unsigned copied, |
1754 | struct page *page, void *fsdata) | 1754 | struct page *page, void *fsdata) |
1755 | { | 1755 | { |
1756 | handle_t *handle = ext4_journal_current_handle(); | 1756 | handle_t *handle = ext4_journal_current_handle(); |
1757 | struct inode *inode = mapping->host; | 1757 | struct inode *inode = mapping->host; |
1758 | int ret = 0, ret2; | 1758 | int ret = 0, ret2; |
1759 | 1759 | ||
1760 | trace_ext4_ordered_write_end(inode, pos, len, copied); | 1760 | trace_ext4_ordered_write_end(inode, pos, len, copied); |
1761 | ret = ext4_jbd2_file_inode(handle, inode); | 1761 | ret = ext4_jbd2_file_inode(handle, inode); |
1762 | 1762 | ||
1763 | if (ret == 0) { | 1763 | if (ret == 0) { |
1764 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1764 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1765 | page, fsdata); | 1765 | page, fsdata); |
1766 | copied = ret2; | 1766 | copied = ret2; |
1767 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1767 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1768 | /* if we have allocated more blocks and copied | 1768 | /* if we have allocated more blocks and copied |
1769 | * less. We will have blocks allocated outside | 1769 | * less. We will have blocks allocated outside |
1770 | * inode->i_size. So truncate them | 1770 | * inode->i_size. So truncate them |
1771 | */ | 1771 | */ |
1772 | ext4_orphan_add(handle, inode); | 1772 | ext4_orphan_add(handle, inode); |
1773 | if (ret2 < 0) | 1773 | if (ret2 < 0) |
1774 | ret = ret2; | 1774 | ret = ret2; |
1775 | } | 1775 | } |
1776 | ret2 = ext4_journal_stop(handle); | 1776 | ret2 = ext4_journal_stop(handle); |
1777 | if (!ret) | 1777 | if (!ret) |
1778 | ret = ret2; | 1778 | ret = ret2; |
1779 | 1779 | ||
1780 | if (pos + len > inode->i_size) { | 1780 | if (pos + len > inode->i_size) { |
1781 | ext4_truncate_failed_write(inode); | 1781 | ext4_truncate_failed_write(inode); |
1782 | /* | 1782 | /* |
1783 | * If truncate failed early the inode might still be | 1783 | * If truncate failed early the inode might still be |
1784 | * on the orphan list; we need to make sure the inode | 1784 | * on the orphan list; we need to make sure the inode |
1785 | * is removed from the orphan list in that case. | 1785 | * is removed from the orphan list in that case. |
1786 | */ | 1786 | */ |
1787 | if (inode->i_nlink) | 1787 | if (inode->i_nlink) |
1788 | ext4_orphan_del(NULL, inode); | 1788 | ext4_orphan_del(NULL, inode); |
1789 | } | 1789 | } |
1790 | 1790 | ||
1791 | 1791 | ||
1792 | return ret ? ret : copied; | 1792 | return ret ? ret : copied; |
1793 | } | 1793 | } |
1794 | 1794 | ||
1795 | static int ext4_writeback_write_end(struct file *file, | 1795 | static int ext4_writeback_write_end(struct file *file, |
1796 | struct address_space *mapping, | 1796 | struct address_space *mapping, |
1797 | loff_t pos, unsigned len, unsigned copied, | 1797 | loff_t pos, unsigned len, unsigned copied, |
1798 | struct page *page, void *fsdata) | 1798 | struct page *page, void *fsdata) |
1799 | { | 1799 | { |
1800 | handle_t *handle = ext4_journal_current_handle(); | 1800 | handle_t *handle = ext4_journal_current_handle(); |
1801 | struct inode *inode = mapping->host; | 1801 | struct inode *inode = mapping->host; |
1802 | int ret = 0, ret2; | 1802 | int ret = 0, ret2; |
1803 | 1803 | ||
1804 | trace_ext4_writeback_write_end(inode, pos, len, copied); | 1804 | trace_ext4_writeback_write_end(inode, pos, len, copied); |
1805 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1805 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1806 | page, fsdata); | 1806 | page, fsdata); |
1807 | copied = ret2; | 1807 | copied = ret2; |
1808 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1808 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1809 | /* if we have allocated more blocks and copied | 1809 | /* if we have allocated more blocks and copied |
1810 | * less. We will have blocks allocated outside | 1810 | * less. We will have blocks allocated outside |
1811 | * inode->i_size. So truncate them | 1811 | * inode->i_size. So truncate them |
1812 | */ | 1812 | */ |
1813 | ext4_orphan_add(handle, inode); | 1813 | ext4_orphan_add(handle, inode); |
1814 | 1814 | ||
1815 | if (ret2 < 0) | 1815 | if (ret2 < 0) |
1816 | ret = ret2; | 1816 | ret = ret2; |
1817 | 1817 | ||
1818 | ret2 = ext4_journal_stop(handle); | 1818 | ret2 = ext4_journal_stop(handle); |
1819 | if (!ret) | 1819 | if (!ret) |
1820 | ret = ret2; | 1820 | ret = ret2; |
1821 | 1821 | ||
1822 | if (pos + len > inode->i_size) { | 1822 | if (pos + len > inode->i_size) { |
1823 | ext4_truncate_failed_write(inode); | 1823 | ext4_truncate_failed_write(inode); |
1824 | /* | 1824 | /* |
1825 | * If truncate failed early the inode might still be | 1825 | * If truncate failed early the inode might still be |
1826 | * on the orphan list; we need to make sure the inode | 1826 | * on the orphan list; we need to make sure the inode |
1827 | * is removed from the orphan list in that case. | 1827 | * is removed from the orphan list in that case. |
1828 | */ | 1828 | */ |
1829 | if (inode->i_nlink) | 1829 | if (inode->i_nlink) |
1830 | ext4_orphan_del(NULL, inode); | 1830 | ext4_orphan_del(NULL, inode); |
1831 | } | 1831 | } |
1832 | 1832 | ||
1833 | return ret ? ret : copied; | 1833 | return ret ? ret : copied; |
1834 | } | 1834 | } |
1835 | 1835 | ||
1836 | static int ext4_journalled_write_end(struct file *file, | 1836 | static int ext4_journalled_write_end(struct file *file, |
1837 | struct address_space *mapping, | 1837 | struct address_space *mapping, |
1838 | loff_t pos, unsigned len, unsigned copied, | 1838 | loff_t pos, unsigned len, unsigned copied, |
1839 | struct page *page, void *fsdata) | 1839 | struct page *page, void *fsdata) |
1840 | { | 1840 | { |
1841 | handle_t *handle = ext4_journal_current_handle(); | 1841 | handle_t *handle = ext4_journal_current_handle(); |
1842 | struct inode *inode = mapping->host; | 1842 | struct inode *inode = mapping->host; |
1843 | int ret = 0, ret2; | 1843 | int ret = 0, ret2; |
1844 | int partial = 0; | 1844 | int partial = 0; |
1845 | unsigned from, to; | 1845 | unsigned from, to; |
1846 | loff_t new_i_size; | 1846 | loff_t new_i_size; |
1847 | 1847 | ||
1848 | trace_ext4_journalled_write_end(inode, pos, len, copied); | 1848 | trace_ext4_journalled_write_end(inode, pos, len, copied); |
1849 | from = pos & (PAGE_CACHE_SIZE - 1); | 1849 | from = pos & (PAGE_CACHE_SIZE - 1); |
1850 | to = from + len; | 1850 | to = from + len; |
1851 | 1851 | ||
1852 | if (copied < len) { | 1852 | if (copied < len) { |
1853 | if (!PageUptodate(page)) | 1853 | if (!PageUptodate(page)) |
1854 | copied = 0; | 1854 | copied = 0; |
1855 | page_zero_new_buffers(page, from+copied, to); | 1855 | page_zero_new_buffers(page, from+copied, to); |
1856 | } | 1856 | } |
1857 | 1857 | ||
1858 | ret = walk_page_buffers(handle, page_buffers(page), from, | 1858 | ret = walk_page_buffers(handle, page_buffers(page), from, |
1859 | to, &partial, write_end_fn); | 1859 | to, &partial, write_end_fn); |
1860 | if (!partial) | 1860 | if (!partial) |
1861 | SetPageUptodate(page); | 1861 | SetPageUptodate(page); |
1862 | new_i_size = pos + copied; | 1862 | new_i_size = pos + copied; |
1863 | if (new_i_size > inode->i_size) | 1863 | if (new_i_size > inode->i_size) |
1864 | i_size_write(inode, pos+copied); | 1864 | i_size_write(inode, pos+copied); |
1865 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 1865 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1866 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 1866 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1867 | ext4_update_i_disksize(inode, new_i_size); | 1867 | ext4_update_i_disksize(inode, new_i_size); |
1868 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1868 | ret2 = ext4_mark_inode_dirty(handle, inode); |
1869 | if (!ret) | 1869 | if (!ret) |
1870 | ret = ret2; | 1870 | ret = ret2; |
1871 | } | 1871 | } |
1872 | 1872 | ||
1873 | unlock_page(page); | 1873 | unlock_page(page); |
1874 | page_cache_release(page); | 1874 | page_cache_release(page); |
1875 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1875 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1876 | /* if we have allocated more blocks and copied | 1876 | /* if we have allocated more blocks and copied |
1877 | * less. We will have blocks allocated outside | 1877 | * less. We will have blocks allocated outside |
1878 | * inode->i_size. So truncate them | 1878 | * inode->i_size. So truncate them |
1879 | */ | 1879 | */ |
1880 | ext4_orphan_add(handle, inode); | 1880 | ext4_orphan_add(handle, inode); |
1881 | 1881 | ||
1882 | ret2 = ext4_journal_stop(handle); | 1882 | ret2 = ext4_journal_stop(handle); |
1883 | if (!ret) | 1883 | if (!ret) |
1884 | ret = ret2; | 1884 | ret = ret2; |
1885 | if (pos + len > inode->i_size) { | 1885 | if (pos + len > inode->i_size) { |
1886 | ext4_truncate_failed_write(inode); | 1886 | ext4_truncate_failed_write(inode); |
1887 | /* | 1887 | /* |
1888 | * If truncate failed early the inode might still be | 1888 | * If truncate failed early the inode might still be |
1889 | * on the orphan list; we need to make sure the inode | 1889 | * on the orphan list; we need to make sure the inode |
1890 | * is removed from the orphan list in that case. | 1890 | * is removed from the orphan list in that case. |
1891 | */ | 1891 | */ |
1892 | if (inode->i_nlink) | 1892 | if (inode->i_nlink) |
1893 | ext4_orphan_del(NULL, inode); | 1893 | ext4_orphan_del(NULL, inode); |
1894 | } | 1894 | } |
1895 | 1895 | ||
1896 | return ret ? ret : copied; | 1896 | return ret ? ret : copied; |
1897 | } | 1897 | } |
1898 | 1898 | ||
1899 | /* | 1899 | /* |
1900 | * Reserve a single block located at lblock | 1900 | * Reserve a single block located at lblock |
1901 | */ | 1901 | */ |
1902 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1902 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1903 | { | 1903 | { |
1904 | int retries = 0; | 1904 | int retries = 0; |
1905 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1905 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1906 | struct ext4_inode_info *ei = EXT4_I(inode); | 1906 | struct ext4_inode_info *ei = EXT4_I(inode); |
1907 | unsigned long md_needed; | 1907 | unsigned long md_needed; |
1908 | int ret; | 1908 | int ret; |
1909 | 1909 | ||
1910 | /* | 1910 | /* |
1911 | * recalculate the amount of metadata blocks to reserve | 1911 | * recalculate the amount of metadata blocks to reserve |
1912 | * in order to allocate nrblocks | 1912 | * in order to allocate nrblocks |
1913 | * worse case is one extent per block | 1913 | * worse case is one extent per block |
1914 | */ | 1914 | */ |
1915 | repeat: | 1915 | repeat: |
1916 | spin_lock(&ei->i_block_reservation_lock); | 1916 | spin_lock(&ei->i_block_reservation_lock); |
1917 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1917 | md_needed = ext4_calc_metadata_amount(inode, lblock); |
1918 | trace_ext4_da_reserve_space(inode, md_needed); | 1918 | trace_ext4_da_reserve_space(inode, md_needed); |
1919 | spin_unlock(&ei->i_block_reservation_lock); | 1919 | spin_unlock(&ei->i_block_reservation_lock); |
1920 | 1920 | ||
1921 | /* | 1921 | /* |
1922 | * We will charge metadata quota at writeout time; this saves | 1922 | * We will charge metadata quota at writeout time; this saves |
1923 | * us from metadata over-estimation, though we may go over by | 1923 | * us from metadata over-estimation, though we may go over by |
1924 | * a small amount in the end. Here we just reserve for data. | 1924 | * a small amount in the end. Here we just reserve for data. |
1925 | */ | 1925 | */ |
1926 | ret = dquot_reserve_block(inode, 1); | 1926 | ret = dquot_reserve_block(inode, 1); |
1927 | if (ret) | 1927 | if (ret) |
1928 | return ret; | 1928 | return ret; |
1929 | /* | 1929 | /* |
1930 | * We do still charge estimated metadata to the sb though; | 1930 | * We do still charge estimated metadata to the sb though; |
1931 | * we cannot afford to run out of free blocks. | 1931 | * we cannot afford to run out of free blocks. |
1932 | */ | 1932 | */ |
1933 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { | 1933 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { |
1934 | dquot_release_reservation_block(inode, 1); | 1934 | dquot_release_reservation_block(inode, 1); |
1935 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1935 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1936 | yield(); | 1936 | yield(); |
1937 | goto repeat; | 1937 | goto repeat; |
1938 | } | 1938 | } |
1939 | return -ENOSPC; | 1939 | return -ENOSPC; |
1940 | } | 1940 | } |
1941 | spin_lock(&ei->i_block_reservation_lock); | 1941 | spin_lock(&ei->i_block_reservation_lock); |
1942 | ei->i_reserved_data_blocks++; | 1942 | ei->i_reserved_data_blocks++; |
1943 | ei->i_reserved_meta_blocks += md_needed; | 1943 | ei->i_reserved_meta_blocks += md_needed; |
1944 | spin_unlock(&ei->i_block_reservation_lock); | 1944 | spin_unlock(&ei->i_block_reservation_lock); |
1945 | 1945 | ||
1946 | return 0; /* success */ | 1946 | return 0; /* success */ |
1947 | } | 1947 | } |
1948 | 1948 | ||
1949 | static void ext4_da_release_space(struct inode *inode, int to_free) | 1949 | static void ext4_da_release_space(struct inode *inode, int to_free) |
1950 | { | 1950 | { |
1951 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1951 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1952 | struct ext4_inode_info *ei = EXT4_I(inode); | 1952 | struct ext4_inode_info *ei = EXT4_I(inode); |
1953 | 1953 | ||
1954 | if (!to_free) | 1954 | if (!to_free) |
1955 | return; /* Nothing to release, exit */ | 1955 | return; /* Nothing to release, exit */ |
1956 | 1956 | ||
1957 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1957 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1958 | 1958 | ||
1959 | trace_ext4_da_release_space(inode, to_free); | 1959 | trace_ext4_da_release_space(inode, to_free); |
1960 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { | 1960 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { |
1961 | /* | 1961 | /* |
1962 | * if there aren't enough reserved blocks, then the | 1962 | * if there aren't enough reserved blocks, then the |
1963 | * counter is messed up somewhere. Since this | 1963 | * counter is messed up somewhere. Since this |
1964 | * function is called from invalidate page, it's | 1964 | * function is called from invalidate page, it's |
1965 | * harmless to return without any action. | 1965 | * harmless to return without any action. |
1966 | */ | 1966 | */ |
1967 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " | 1967 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " |
1968 | "ino %lu, to_free %d with only %d reserved " | 1968 | "ino %lu, to_free %d with only %d reserved " |
1969 | "data blocks\n", inode->i_ino, to_free, | 1969 | "data blocks\n", inode->i_ino, to_free, |
1970 | ei->i_reserved_data_blocks); | 1970 | ei->i_reserved_data_blocks); |
1971 | WARN_ON(1); | 1971 | WARN_ON(1); |
1972 | to_free = ei->i_reserved_data_blocks; | 1972 | to_free = ei->i_reserved_data_blocks; |
1973 | } | 1973 | } |
1974 | ei->i_reserved_data_blocks -= to_free; | 1974 | ei->i_reserved_data_blocks -= to_free; |
1975 | 1975 | ||
1976 | if (ei->i_reserved_data_blocks == 0) { | 1976 | if (ei->i_reserved_data_blocks == 0) { |
1977 | /* | 1977 | /* |
1978 | * We can release all of the reserved metadata blocks | 1978 | * We can release all of the reserved metadata blocks |
1979 | * only when we have written all of the delayed | 1979 | * only when we have written all of the delayed |
1980 | * allocation blocks. | 1980 | * allocation blocks. |
1981 | */ | 1981 | */ |
1982 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 1982 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1983 | ei->i_reserved_meta_blocks); | 1983 | ei->i_reserved_meta_blocks); |
1984 | ei->i_reserved_meta_blocks = 0; | 1984 | ei->i_reserved_meta_blocks = 0; |
1985 | ei->i_da_metadata_calc_len = 0; | 1985 | ei->i_da_metadata_calc_len = 0; |
1986 | } | 1986 | } |
1987 | 1987 | ||
1988 | /* update fs dirty data blocks counter */ | 1988 | /* update fs dirty data blocks counter */ |
1989 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); | 1989 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); |
1990 | 1990 | ||
1991 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1991 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1992 | 1992 | ||
1993 | dquot_release_reservation_block(inode, to_free); | 1993 | dquot_release_reservation_block(inode, to_free); |
1994 | } | 1994 | } |
1995 | 1995 | ||
1996 | static void ext4_da_page_release_reservation(struct page *page, | 1996 | static void ext4_da_page_release_reservation(struct page *page, |
1997 | unsigned long offset) | 1997 | unsigned long offset) |
1998 | { | 1998 | { |
1999 | int to_release = 0; | 1999 | int to_release = 0; |
2000 | struct buffer_head *head, *bh; | 2000 | struct buffer_head *head, *bh; |
2001 | unsigned int curr_off = 0; | 2001 | unsigned int curr_off = 0; |
2002 | 2002 | ||
2003 | head = page_buffers(page); | 2003 | head = page_buffers(page); |
2004 | bh = head; | 2004 | bh = head; |
2005 | do { | 2005 | do { |
2006 | unsigned int next_off = curr_off + bh->b_size; | 2006 | unsigned int next_off = curr_off + bh->b_size; |
2007 | 2007 | ||
2008 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 2008 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
2009 | to_release++; | 2009 | to_release++; |
2010 | clear_buffer_delay(bh); | 2010 | clear_buffer_delay(bh); |
2011 | } | 2011 | } |
2012 | curr_off = next_off; | 2012 | curr_off = next_off; |
2013 | } while ((bh = bh->b_this_page) != head); | 2013 | } while ((bh = bh->b_this_page) != head); |
2014 | ext4_da_release_space(page->mapping->host, to_release); | 2014 | ext4_da_release_space(page->mapping->host, to_release); |
2015 | } | 2015 | } |
2016 | 2016 | ||
2017 | /* | 2017 | /* |
2018 | * Delayed allocation stuff | 2018 | * Delayed allocation stuff |
2019 | */ | 2019 | */ |
2020 | 2020 | ||
2021 | /* | 2021 | /* |
2022 | * mpage_da_submit_io - walks through extent of pages and try to write | 2022 | * mpage_da_submit_io - walks through extent of pages and try to write |
2023 | * them with writepage() call back | 2023 | * them with writepage() call back |
2024 | * | 2024 | * |
2025 | * @mpd->inode: inode | 2025 | * @mpd->inode: inode |
2026 | * @mpd->first_page: first page of the extent | 2026 | * @mpd->first_page: first page of the extent |
2027 | * @mpd->next_page: page after the last page of the extent | 2027 | * @mpd->next_page: page after the last page of the extent |
2028 | * | 2028 | * |
2029 | * By the time mpage_da_submit_io() is called we expect all blocks | 2029 | * By the time mpage_da_submit_io() is called we expect all blocks |
2030 | * to be allocated. this may be wrong if allocation failed. | 2030 | * to be allocated. this may be wrong if allocation failed. |
2031 | * | 2031 | * |
2032 | * As pages are already locked by write_cache_pages(), we can't use it | 2032 | * As pages are already locked by write_cache_pages(), we can't use it |
2033 | */ | 2033 | */ |
2034 | static int mpage_da_submit_io(struct mpage_da_data *mpd, | 2034 | static int mpage_da_submit_io(struct mpage_da_data *mpd, |
2035 | struct ext4_map_blocks *map) | 2035 | struct ext4_map_blocks *map) |
2036 | { | 2036 | { |
2037 | struct pagevec pvec; | 2037 | struct pagevec pvec; |
2038 | unsigned long index, end; | 2038 | unsigned long index, end; |
2039 | int ret = 0, err, nr_pages, i; | 2039 | int ret = 0, err, nr_pages, i; |
2040 | struct inode *inode = mpd->inode; | 2040 | struct inode *inode = mpd->inode; |
2041 | struct address_space *mapping = inode->i_mapping; | 2041 | struct address_space *mapping = inode->i_mapping; |
2042 | loff_t size = i_size_read(inode); | 2042 | loff_t size = i_size_read(inode); |
2043 | unsigned int len, block_start; | 2043 | unsigned int len, block_start; |
2044 | struct buffer_head *bh, *page_bufs = NULL; | 2044 | struct buffer_head *bh, *page_bufs = NULL; |
2045 | int journal_data = ext4_should_journal_data(inode); | 2045 | int journal_data = ext4_should_journal_data(inode); |
2046 | sector_t pblock = 0, cur_logical = 0; | 2046 | sector_t pblock = 0, cur_logical = 0; |
2047 | struct ext4_io_submit io_submit; | 2047 | struct ext4_io_submit io_submit; |
2048 | 2048 | ||
2049 | BUG_ON(mpd->next_page <= mpd->first_page); | 2049 | BUG_ON(mpd->next_page <= mpd->first_page); |
2050 | memset(&io_submit, 0, sizeof(io_submit)); | 2050 | memset(&io_submit, 0, sizeof(io_submit)); |
2051 | /* | 2051 | /* |
2052 | * We need to start from the first_page to the next_page - 1 | 2052 | * We need to start from the first_page to the next_page - 1 |
2053 | * to make sure we also write the mapped dirty buffer_heads. | 2053 | * to make sure we also write the mapped dirty buffer_heads. |
2054 | * If we look at mpd->b_blocknr we would only be looking | 2054 | * If we look at mpd->b_blocknr we would only be looking |
2055 | * at the currently mapped buffer_heads. | 2055 | * at the currently mapped buffer_heads. |
2056 | */ | 2056 | */ |
2057 | index = mpd->first_page; | 2057 | index = mpd->first_page; |
2058 | end = mpd->next_page - 1; | 2058 | end = mpd->next_page - 1; |
2059 | 2059 | ||
2060 | pagevec_init(&pvec, 0); | 2060 | pagevec_init(&pvec, 0); |
2061 | while (index <= end) { | 2061 | while (index <= end) { |
2062 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 2062 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
2063 | if (nr_pages == 0) | 2063 | if (nr_pages == 0) |
2064 | break; | 2064 | break; |
2065 | for (i = 0; i < nr_pages; i++) { | 2065 | for (i = 0; i < nr_pages; i++) { |
2066 | int commit_write = 0, skip_page = 0; | 2066 | int commit_write = 0, skip_page = 0; |
2067 | struct page *page = pvec.pages[i]; | 2067 | struct page *page = pvec.pages[i]; |
2068 | 2068 | ||
2069 | index = page->index; | 2069 | index = page->index; |
2070 | if (index > end) | 2070 | if (index > end) |
2071 | break; | 2071 | break; |
2072 | 2072 | ||
2073 | if (index == size >> PAGE_CACHE_SHIFT) | 2073 | if (index == size >> PAGE_CACHE_SHIFT) |
2074 | len = size & ~PAGE_CACHE_MASK; | 2074 | len = size & ~PAGE_CACHE_MASK; |
2075 | else | 2075 | else |
2076 | len = PAGE_CACHE_SIZE; | 2076 | len = PAGE_CACHE_SIZE; |
2077 | if (map) { | 2077 | if (map) { |
2078 | cur_logical = index << (PAGE_CACHE_SHIFT - | 2078 | cur_logical = index << (PAGE_CACHE_SHIFT - |
2079 | inode->i_blkbits); | 2079 | inode->i_blkbits); |
2080 | pblock = map->m_pblk + (cur_logical - | 2080 | pblock = map->m_pblk + (cur_logical - |
2081 | map->m_lblk); | 2081 | map->m_lblk); |
2082 | } | 2082 | } |
2083 | index++; | 2083 | index++; |
2084 | 2084 | ||
2085 | BUG_ON(!PageLocked(page)); | 2085 | BUG_ON(!PageLocked(page)); |
2086 | BUG_ON(PageWriteback(page)); | 2086 | BUG_ON(PageWriteback(page)); |
2087 | 2087 | ||
2088 | /* | 2088 | /* |
2089 | * If the page does not have buffers (for | 2089 | * If the page does not have buffers (for |
2090 | * whatever reason), try to create them using | 2090 | * whatever reason), try to create them using |
2091 | * __block_write_begin. If this fails, | 2091 | * __block_write_begin. If this fails, |
2092 | * skip the page and move on. | 2092 | * skip the page and move on. |
2093 | */ | 2093 | */ |
2094 | if (!page_has_buffers(page)) { | 2094 | if (!page_has_buffers(page)) { |
2095 | if (__block_write_begin(page, 0, len, | 2095 | if (__block_write_begin(page, 0, len, |
2096 | noalloc_get_block_write)) { | 2096 | noalloc_get_block_write)) { |
2097 | skip_page: | 2097 | skip_page: |
2098 | unlock_page(page); | 2098 | unlock_page(page); |
2099 | continue; | 2099 | continue; |
2100 | } | 2100 | } |
2101 | commit_write = 1; | 2101 | commit_write = 1; |
2102 | } | 2102 | } |
2103 | 2103 | ||
2104 | bh = page_bufs = page_buffers(page); | 2104 | bh = page_bufs = page_buffers(page); |
2105 | block_start = 0; | 2105 | block_start = 0; |
2106 | do { | 2106 | do { |
2107 | if (!bh) | 2107 | if (!bh) |
2108 | goto skip_page; | 2108 | goto skip_page; |
2109 | if (map && (cur_logical >= map->m_lblk) && | 2109 | if (map && (cur_logical >= map->m_lblk) && |
2110 | (cur_logical <= (map->m_lblk + | 2110 | (cur_logical <= (map->m_lblk + |
2111 | (map->m_len - 1)))) { | 2111 | (map->m_len - 1)))) { |
2112 | if (buffer_delay(bh)) { | 2112 | if (buffer_delay(bh)) { |
2113 | clear_buffer_delay(bh); | 2113 | clear_buffer_delay(bh); |
2114 | bh->b_blocknr = pblock; | 2114 | bh->b_blocknr = pblock; |
2115 | } | 2115 | } |
2116 | if (buffer_unwritten(bh) || | 2116 | if (buffer_unwritten(bh) || |
2117 | buffer_mapped(bh)) | 2117 | buffer_mapped(bh)) |
2118 | BUG_ON(bh->b_blocknr != pblock); | 2118 | BUG_ON(bh->b_blocknr != pblock); |
2119 | if (map->m_flags & EXT4_MAP_UNINIT) | 2119 | if (map->m_flags & EXT4_MAP_UNINIT) |
2120 | set_buffer_uninit(bh); | 2120 | set_buffer_uninit(bh); |
2121 | clear_buffer_unwritten(bh); | 2121 | clear_buffer_unwritten(bh); |
2122 | } | 2122 | } |
2123 | 2123 | ||
2124 | /* skip page if block allocation undone */ | 2124 | /* skip page if block allocation undone */ |
2125 | if (buffer_delay(bh) || buffer_unwritten(bh)) | 2125 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2126 | skip_page = 1; | 2126 | skip_page = 1; |
2127 | bh = bh->b_this_page; | 2127 | bh = bh->b_this_page; |
2128 | block_start += bh->b_size; | 2128 | block_start += bh->b_size; |
2129 | cur_logical++; | 2129 | cur_logical++; |
2130 | pblock++; | 2130 | pblock++; |
2131 | } while (bh != page_bufs); | 2131 | } while (bh != page_bufs); |
2132 | 2132 | ||
2133 | if (skip_page) | 2133 | if (skip_page) |
2134 | goto skip_page; | 2134 | goto skip_page; |
2135 | 2135 | ||
2136 | if (commit_write) | 2136 | if (commit_write) |
2137 | /* mark the buffer_heads as dirty & uptodate */ | 2137 | /* mark the buffer_heads as dirty & uptodate */ |
2138 | block_commit_write(page, 0, len); | 2138 | block_commit_write(page, 0, len); |
2139 | 2139 | ||
2140 | clear_page_dirty_for_io(page); | 2140 | clear_page_dirty_for_io(page); |
2141 | /* | 2141 | /* |
2142 | * Delalloc doesn't support data journalling, | 2142 | * Delalloc doesn't support data journalling, |
2143 | * but eventually maybe we'll lift this | 2143 | * but eventually maybe we'll lift this |
2144 | * restriction. | 2144 | * restriction. |
2145 | */ | 2145 | */ |
2146 | if (unlikely(journal_data && PageChecked(page))) | 2146 | if (unlikely(journal_data && PageChecked(page))) |
2147 | err = __ext4_journalled_writepage(page, len); | 2147 | err = __ext4_journalled_writepage(page, len); |
2148 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) | 2148 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) |
2149 | err = ext4_bio_write_page(&io_submit, page, | 2149 | err = ext4_bio_write_page(&io_submit, page, |
2150 | len, mpd->wbc); | 2150 | len, mpd->wbc); |
2151 | else | 2151 | else |
2152 | err = block_write_full_page(page, | 2152 | err = block_write_full_page(page, |
2153 | noalloc_get_block_write, mpd->wbc); | 2153 | noalloc_get_block_write, mpd->wbc); |
2154 | 2154 | ||
2155 | if (!err) | 2155 | if (!err) |
2156 | mpd->pages_written++; | 2156 | mpd->pages_written++; |
2157 | /* | 2157 | /* |
2158 | * In error case, we have to continue because | 2158 | * In error case, we have to continue because |
2159 | * remaining pages are still locked | 2159 | * remaining pages are still locked |
2160 | */ | 2160 | */ |
2161 | if (ret == 0) | 2161 | if (ret == 0) |
2162 | ret = err; | 2162 | ret = err; |
2163 | } | 2163 | } |
2164 | pagevec_release(&pvec); | 2164 | pagevec_release(&pvec); |
2165 | } | 2165 | } |
2166 | ext4_io_submit(&io_submit); | 2166 | ext4_io_submit(&io_submit); |
2167 | return ret; | 2167 | return ret; |
2168 | } | 2168 | } |
2169 | 2169 | ||
2170 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | 2170 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) |
2171 | { | 2171 | { |
2172 | int nr_pages, i; | 2172 | int nr_pages, i; |
2173 | pgoff_t index, end; | 2173 | pgoff_t index, end; |
2174 | struct pagevec pvec; | 2174 | struct pagevec pvec; |
2175 | struct inode *inode = mpd->inode; | 2175 | struct inode *inode = mpd->inode; |
2176 | struct address_space *mapping = inode->i_mapping; | 2176 | struct address_space *mapping = inode->i_mapping; |
2177 | 2177 | ||
2178 | index = mpd->first_page; | 2178 | index = mpd->first_page; |
2179 | end = mpd->next_page - 1; | 2179 | end = mpd->next_page - 1; |
2180 | while (index <= end) { | 2180 | while (index <= end) { |
2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
2182 | if (nr_pages == 0) | 2182 | if (nr_pages == 0) |
2183 | break; | 2183 | break; |
2184 | for (i = 0; i < nr_pages; i++) { | 2184 | for (i = 0; i < nr_pages; i++) { |
2185 | struct page *page = pvec.pages[i]; | 2185 | struct page *page = pvec.pages[i]; |
2186 | if (page->index > end) | 2186 | if (page->index > end) |
2187 | break; | 2187 | break; |
2188 | BUG_ON(!PageLocked(page)); | 2188 | BUG_ON(!PageLocked(page)); |
2189 | BUG_ON(PageWriteback(page)); | 2189 | BUG_ON(PageWriteback(page)); |
2190 | block_invalidatepage(page, 0); | 2190 | block_invalidatepage(page, 0); |
2191 | ClearPageUptodate(page); | 2191 | ClearPageUptodate(page); |
2192 | unlock_page(page); | 2192 | unlock_page(page); |
2193 | } | 2193 | } |
2194 | index = pvec.pages[nr_pages - 1]->index + 1; | 2194 | index = pvec.pages[nr_pages - 1]->index + 1; |
2195 | pagevec_release(&pvec); | 2195 | pagevec_release(&pvec); |
2196 | } | 2196 | } |
2197 | return; | 2197 | return; |
2198 | } | 2198 | } |
2199 | 2199 | ||
2200 | static void ext4_print_free_blocks(struct inode *inode) | 2200 | static void ext4_print_free_blocks(struct inode *inode) |
2201 | { | 2201 | { |
2202 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2202 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
2203 | printk(KERN_CRIT "Total free blocks count %lld\n", | 2203 | printk(KERN_CRIT "Total free blocks count %lld\n", |
2204 | ext4_count_free_blocks(inode->i_sb)); | 2204 | ext4_count_free_blocks(inode->i_sb)); |
2205 | printk(KERN_CRIT "Free/Dirty block details\n"); | 2205 | printk(KERN_CRIT "Free/Dirty block details\n"); |
2206 | printk(KERN_CRIT "free_blocks=%lld\n", | 2206 | printk(KERN_CRIT "free_blocks=%lld\n", |
2207 | (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); | 2207 | (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); |
2208 | printk(KERN_CRIT "dirty_blocks=%lld\n", | 2208 | printk(KERN_CRIT "dirty_blocks=%lld\n", |
2209 | (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | 2209 | (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); |
2210 | printk(KERN_CRIT "Block reservation details\n"); | 2210 | printk(KERN_CRIT "Block reservation details\n"); |
2211 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", | 2211 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", |
2212 | EXT4_I(inode)->i_reserved_data_blocks); | 2212 | EXT4_I(inode)->i_reserved_data_blocks); |
2213 | printk(KERN_CRIT "i_reserved_meta_blocks=%u\n", | 2213 | printk(KERN_CRIT "i_reserved_meta_blocks=%u\n", |
2214 | EXT4_I(inode)->i_reserved_meta_blocks); | 2214 | EXT4_I(inode)->i_reserved_meta_blocks); |
2215 | return; | 2215 | return; |
2216 | } | 2216 | } |
2217 | 2217 | ||
2218 | /* | 2218 | /* |
2219 | * mpage_da_map_and_submit - go through given space, map them | 2219 | * mpage_da_map_and_submit - go through given space, map them |
2220 | * if necessary, and then submit them for I/O | 2220 | * if necessary, and then submit them for I/O |
2221 | * | 2221 | * |
2222 | * @mpd - bh describing space | 2222 | * @mpd - bh describing space |
2223 | * | 2223 | * |
2224 | * The function skips space we know is already mapped to disk blocks. | 2224 | * The function skips space we know is already mapped to disk blocks. |
2225 | * | 2225 | * |
2226 | */ | 2226 | */ |
2227 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | 2227 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) |
2228 | { | 2228 | { |
2229 | int err, blks, get_blocks_flags; | 2229 | int err, blks, get_blocks_flags; |
2230 | struct ext4_map_blocks map, *mapp = NULL; | 2230 | struct ext4_map_blocks map, *mapp = NULL; |
2231 | sector_t next = mpd->b_blocknr; | 2231 | sector_t next = mpd->b_blocknr; |
2232 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2232 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2233 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2233 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
2234 | handle_t *handle = NULL; | 2234 | handle_t *handle = NULL; |
2235 | 2235 | ||
2236 | /* | 2236 | /* |
2237 | * If the blocks are mapped already, or we couldn't accumulate | 2237 | * If the blocks are mapped already, or we couldn't accumulate |
2238 | * any blocks, then proceed immediately to the submission stage. | 2238 | * any blocks, then proceed immediately to the submission stage. |
2239 | */ | 2239 | */ |
2240 | if ((mpd->b_size == 0) || | 2240 | if ((mpd->b_size == 0) || |
2241 | ((mpd->b_state & (1 << BH_Mapped)) && | 2241 | ((mpd->b_state & (1 << BH_Mapped)) && |
2242 | !(mpd->b_state & (1 << BH_Delay)) && | 2242 | !(mpd->b_state & (1 << BH_Delay)) && |
2243 | !(mpd->b_state & (1 << BH_Unwritten)))) | 2243 | !(mpd->b_state & (1 << BH_Unwritten)))) |
2244 | goto submit_io; | 2244 | goto submit_io; |
2245 | 2245 | ||
2246 | handle = ext4_journal_current_handle(); | 2246 | handle = ext4_journal_current_handle(); |
2247 | BUG_ON(!handle); | 2247 | BUG_ON(!handle); |
2248 | 2248 | ||
2249 | /* | 2249 | /* |
2250 | * Call ext4_map_blocks() to allocate any delayed allocation | 2250 | * Call ext4_map_blocks() to allocate any delayed allocation |
2251 | * blocks, or to convert an uninitialized extent to be | 2251 | * blocks, or to convert an uninitialized extent to be |
2252 | * initialized (in the case where we have written into | 2252 | * initialized (in the case where we have written into |
2253 | * one or more preallocated blocks). | 2253 | * one or more preallocated blocks). |
2254 | * | 2254 | * |
2255 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to | 2255 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to |
2256 | * indicate that we are on the delayed allocation path. This | 2256 | * indicate that we are on the delayed allocation path. This |
2257 | * affects functions in many different parts of the allocation | 2257 | * affects functions in many different parts of the allocation |
2258 | * call path. This flag exists primarily because we don't | 2258 | * call path. This flag exists primarily because we don't |
2259 | * want to change *many* call functions, so ext4_map_blocks() | 2259 | * want to change *many* call functions, so ext4_map_blocks() |
2260 | * will set the EXT4_STATE_DELALLOC_RESERVED flag once the | 2260 | * will set the EXT4_STATE_DELALLOC_RESERVED flag once the |
2261 | * inode's allocation semaphore is taken. | 2261 | * inode's allocation semaphore is taken. |
2262 | * | 2262 | * |
2263 | * If the blocks in questions were delalloc blocks, set | 2263 | * If the blocks in questions were delalloc blocks, set |
2264 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | 2264 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting |
2265 | * variables are updated after the blocks have been allocated. | 2265 | * variables are updated after the blocks have been allocated. |
2266 | */ | 2266 | */ |
2267 | map.m_lblk = next; | 2267 | map.m_lblk = next; |
2268 | map.m_len = max_blocks; | 2268 | map.m_len = max_blocks; |
2269 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; | 2269 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; |
2270 | if (ext4_should_dioread_nolock(mpd->inode)) | 2270 | if (ext4_should_dioread_nolock(mpd->inode)) |
2271 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | 2271 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; |
2272 | if (mpd->b_state & (1 << BH_Delay)) | 2272 | if (mpd->b_state & (1 << BH_Delay)) |
2273 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | 2273 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; |
2274 | 2274 | ||
2275 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | 2275 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); |
2276 | if (blks < 0) { | 2276 | if (blks < 0) { |
2277 | struct super_block *sb = mpd->inode->i_sb; | 2277 | struct super_block *sb = mpd->inode->i_sb; |
2278 | 2278 | ||
2279 | err = blks; | 2279 | err = blks; |
2280 | /* | 2280 | /* |
2281 | * If get block returns EAGAIN or ENOSPC and there | 2281 | * If get block returns EAGAIN or ENOSPC and there |
2282 | * appears to be free blocks we will just let | 2282 | * appears to be free blocks we will just let |
2283 | * mpage_da_submit_io() unlock all of the pages. | 2283 | * mpage_da_submit_io() unlock all of the pages. |
2284 | */ | 2284 | */ |
2285 | if (err == -EAGAIN) | 2285 | if (err == -EAGAIN) |
2286 | goto submit_io; | 2286 | goto submit_io; |
2287 | 2287 | ||
2288 | if (err == -ENOSPC && | 2288 | if (err == -ENOSPC && |
2289 | ext4_count_free_blocks(sb)) { | 2289 | ext4_count_free_blocks(sb)) { |
2290 | mpd->retval = err; | 2290 | mpd->retval = err; |
2291 | goto submit_io; | 2291 | goto submit_io; |
2292 | } | 2292 | } |
2293 | 2293 | ||
2294 | /* | 2294 | /* |
2295 | * get block failure will cause us to loop in | 2295 | * get block failure will cause us to loop in |
2296 | * writepages, because a_ops->writepage won't be able | 2296 | * writepages, because a_ops->writepage won't be able |
2297 | * to make progress. The page will be redirtied by | 2297 | * to make progress. The page will be redirtied by |
2298 | * writepage and writepages will again try to write | 2298 | * writepage and writepages will again try to write |
2299 | * the same. | 2299 | * the same. |
2300 | */ | 2300 | */ |
2301 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { | 2301 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { |
2302 | ext4_msg(sb, KERN_CRIT, | 2302 | ext4_msg(sb, KERN_CRIT, |
2303 | "delayed block allocation failed for inode %lu " | 2303 | "delayed block allocation failed for inode %lu " |
2304 | "at logical offset %llu with max blocks %zd " | 2304 | "at logical offset %llu with max blocks %zd " |
2305 | "with error %d", mpd->inode->i_ino, | 2305 | "with error %d", mpd->inode->i_ino, |
2306 | (unsigned long long) next, | 2306 | (unsigned long long) next, |
2307 | mpd->b_size >> mpd->inode->i_blkbits, err); | 2307 | mpd->b_size >> mpd->inode->i_blkbits, err); |
2308 | ext4_msg(sb, KERN_CRIT, | 2308 | ext4_msg(sb, KERN_CRIT, |
2309 | "This should not happen!! Data will be lost\n"); | 2309 | "This should not happen!! Data will be lost\n"); |
2310 | if (err == -ENOSPC) | 2310 | if (err == -ENOSPC) |
2311 | ext4_print_free_blocks(mpd->inode); | 2311 | ext4_print_free_blocks(mpd->inode); |
2312 | } | 2312 | } |
2313 | /* invalidate all the pages */ | 2313 | /* invalidate all the pages */ |
2314 | ext4_da_block_invalidatepages(mpd); | 2314 | ext4_da_block_invalidatepages(mpd); |
2315 | 2315 | ||
2316 | /* Mark this page range as having been completed */ | 2316 | /* Mark this page range as having been completed */ |
2317 | mpd->io_done = 1; | 2317 | mpd->io_done = 1; |
2318 | return; | 2318 | return; |
2319 | } | 2319 | } |
2320 | BUG_ON(blks == 0); | 2320 | BUG_ON(blks == 0); |
2321 | 2321 | ||
2322 | mapp = ↦ | 2322 | mapp = ↦ |
2323 | if (map.m_flags & EXT4_MAP_NEW) { | 2323 | if (map.m_flags & EXT4_MAP_NEW) { |
2324 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | 2324 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; |
2325 | int i; | 2325 | int i; |
2326 | 2326 | ||
2327 | for (i = 0; i < map.m_len; i++) | 2327 | for (i = 0; i < map.m_len; i++) |
2328 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 2328 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2329 | } | 2329 | } |
2330 | 2330 | ||
2331 | if (ext4_should_order_data(mpd->inode)) { | 2331 | if (ext4_should_order_data(mpd->inode)) { |
2332 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2332 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
2333 | if (err) | 2333 | if (err) |
2334 | /* This only happens if the journal is aborted */ | 2334 | /* This only happens if the journal is aborted */ |
2335 | return; | 2335 | return; |
2336 | } | 2336 | } |
2337 | 2337 | ||
2338 | /* | 2338 | /* |
2339 | * Update on-disk size along with block allocation. | 2339 | * Update on-disk size along with block allocation. |
2340 | */ | 2340 | */ |
2341 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; | 2341 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; |
2342 | if (disksize > i_size_read(mpd->inode)) | 2342 | if (disksize > i_size_read(mpd->inode)) |
2343 | disksize = i_size_read(mpd->inode); | 2343 | disksize = i_size_read(mpd->inode); |
2344 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | 2344 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { |
2345 | ext4_update_i_disksize(mpd->inode, disksize); | 2345 | ext4_update_i_disksize(mpd->inode, disksize); |
2346 | err = ext4_mark_inode_dirty(handle, mpd->inode); | 2346 | err = ext4_mark_inode_dirty(handle, mpd->inode); |
2347 | if (err) | 2347 | if (err) |
2348 | ext4_error(mpd->inode->i_sb, | 2348 | ext4_error(mpd->inode->i_sb, |
2349 | "Failed to mark inode %lu dirty", | 2349 | "Failed to mark inode %lu dirty", |
2350 | mpd->inode->i_ino); | 2350 | mpd->inode->i_ino); |
2351 | } | 2351 | } |
2352 | 2352 | ||
2353 | submit_io: | 2353 | submit_io: |
2354 | mpage_da_submit_io(mpd, mapp); | 2354 | mpage_da_submit_io(mpd, mapp); |
2355 | mpd->io_done = 1; | 2355 | mpd->io_done = 1; |
2356 | } | 2356 | } |
2357 | 2357 | ||
2358 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 2358 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
2359 | (1 << BH_Delay) | (1 << BH_Unwritten)) | 2359 | (1 << BH_Delay) | (1 << BH_Unwritten)) |
2360 | 2360 | ||
2361 | /* | 2361 | /* |
2362 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | 2362 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks |
2363 | * | 2363 | * |
2364 | * @mpd->lbh - extent of blocks | 2364 | * @mpd->lbh - extent of blocks |
2365 | * @logical - logical number of the block in the file | 2365 | * @logical - logical number of the block in the file |
2366 | * @bh - bh of the block (used to access block's state) | 2366 | * @bh - bh of the block (used to access block's state) |
2367 | * | 2367 | * |
2368 | * the function is used to collect contig. blocks in same state | 2368 | * the function is used to collect contig. blocks in same state |
2369 | */ | 2369 | */ |
2370 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | 2370 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, |
2371 | sector_t logical, size_t b_size, | 2371 | sector_t logical, size_t b_size, |
2372 | unsigned long b_state) | 2372 | unsigned long b_state) |
2373 | { | 2373 | { |
2374 | sector_t next; | 2374 | sector_t next; |
2375 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; | 2375 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; |
2376 | 2376 | ||
2377 | /* | 2377 | /* |
2378 | * XXX Don't go larger than mballoc is willing to allocate | 2378 | * XXX Don't go larger than mballoc is willing to allocate |
2379 | * This is a stopgap solution. We eventually need to fold | 2379 | * This is a stopgap solution. We eventually need to fold |
2380 | * mpage_da_submit_io() into this function and then call | 2380 | * mpage_da_submit_io() into this function and then call |
2381 | * ext4_map_blocks() multiple times in a loop | 2381 | * ext4_map_blocks() multiple times in a loop |
2382 | */ | 2382 | */ |
2383 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) | 2383 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) |
2384 | goto flush_it; | 2384 | goto flush_it; |
2385 | 2385 | ||
2386 | /* check if thereserved journal credits might overflow */ | 2386 | /* check if thereserved journal credits might overflow */ |
2387 | if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { | 2387 | if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { |
2388 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | 2388 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { |
2389 | /* | 2389 | /* |
2390 | * With non-extent format we are limited by the journal | 2390 | * With non-extent format we are limited by the journal |
2391 | * credit available. Total credit needed to insert | 2391 | * credit available. Total credit needed to insert |
2392 | * nrblocks contiguous blocks is dependent on the | 2392 | * nrblocks contiguous blocks is dependent on the |
2393 | * nrblocks. So limit nrblocks. | 2393 | * nrblocks. So limit nrblocks. |
2394 | */ | 2394 | */ |
2395 | goto flush_it; | 2395 | goto flush_it; |
2396 | } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > | 2396 | } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > |
2397 | EXT4_MAX_TRANS_DATA) { | 2397 | EXT4_MAX_TRANS_DATA) { |
2398 | /* | 2398 | /* |
2399 | * Adding the new buffer_head would make it cross the | 2399 | * Adding the new buffer_head would make it cross the |
2400 | * allowed limit for which we have journal credit | 2400 | * allowed limit for which we have journal credit |
2401 | * reserved. So limit the new bh->b_size | 2401 | * reserved. So limit the new bh->b_size |
2402 | */ | 2402 | */ |
2403 | b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << | 2403 | b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << |
2404 | mpd->inode->i_blkbits; | 2404 | mpd->inode->i_blkbits; |
2405 | /* we will do mpage_da_submit_io in the next loop */ | 2405 | /* we will do mpage_da_submit_io in the next loop */ |
2406 | } | 2406 | } |
2407 | } | 2407 | } |
2408 | /* | 2408 | /* |
2409 | * First block in the extent | 2409 | * First block in the extent |
2410 | */ | 2410 | */ |
2411 | if (mpd->b_size == 0) { | 2411 | if (mpd->b_size == 0) { |
2412 | mpd->b_blocknr = logical; | 2412 | mpd->b_blocknr = logical; |
2413 | mpd->b_size = b_size; | 2413 | mpd->b_size = b_size; |
2414 | mpd->b_state = b_state & BH_FLAGS; | 2414 | mpd->b_state = b_state & BH_FLAGS; |
2415 | return; | 2415 | return; |
2416 | } | 2416 | } |
2417 | 2417 | ||
2418 | next = mpd->b_blocknr + nrblocks; | 2418 | next = mpd->b_blocknr + nrblocks; |
2419 | /* | 2419 | /* |
2420 | * Can we merge the block to our big extent? | 2420 | * Can we merge the block to our big extent? |
2421 | */ | 2421 | */ |
2422 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { | 2422 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { |
2423 | mpd->b_size += b_size; | 2423 | mpd->b_size += b_size; |
2424 | return; | 2424 | return; |
2425 | } | 2425 | } |
2426 | 2426 | ||
2427 | flush_it: | 2427 | flush_it: |
2428 | /* | 2428 | /* |
2429 | * We couldn't merge the block to our extent, so we | 2429 | * We couldn't merge the block to our extent, so we |
2430 | * need to flush current extent and start new one | 2430 | * need to flush current extent and start new one |
2431 | */ | 2431 | */ |
2432 | mpage_da_map_and_submit(mpd); | 2432 | mpage_da_map_and_submit(mpd); |
2433 | return; | 2433 | return; |
2434 | } | 2434 | } |
2435 | 2435 | ||
2436 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | 2436 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) |
2437 | { | 2437 | { |
2438 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); | 2438 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); |
2439 | } | 2439 | } |
2440 | 2440 | ||
2441 | /* | 2441 | /* |
2442 | * This is a special get_blocks_t callback which is used by | 2442 | * This is a special get_blocks_t callback which is used by |
2443 | * ext4_da_write_begin(). It will either return mapped block or | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2444 | * reserve space for a single block. | 2444 | * reserve space for a single block. |
2445 | * | 2445 | * |
2446 | * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. | 2446 | * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. |
2447 | * We also have b_blocknr = -1 and b_bdev initialized properly | 2447 | * We also have b_blocknr = -1 and b_bdev initialized properly |
2448 | * | 2448 | * |
2449 | * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. | 2449 | * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. |
2450 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev | 2450 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev |
2451 | * initialized properly. | 2451 | * initialized properly. |
2452 | */ | 2452 | */ |
2453 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2453 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
2454 | struct buffer_head *bh, int create) | 2454 | struct buffer_head *bh, int create) |
2455 | { | 2455 | { |
2456 | struct ext4_map_blocks map; | 2456 | struct ext4_map_blocks map; |
2457 | int ret = 0; | 2457 | int ret = 0; |
2458 | sector_t invalid_block = ~((sector_t) 0xffff); | 2458 | sector_t invalid_block = ~((sector_t) 0xffff); |
2459 | 2459 | ||
2460 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | 2460 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) |
2461 | invalid_block = ~0; | 2461 | invalid_block = ~0; |
2462 | 2462 | ||
2463 | BUG_ON(create == 0); | 2463 | BUG_ON(create == 0); |
2464 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); | 2464 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); |
2465 | 2465 | ||
2466 | map.m_lblk = iblock; | 2466 | map.m_lblk = iblock; |
2467 | map.m_len = 1; | 2467 | map.m_len = 1; |
2468 | 2468 | ||
2469 | /* | 2469 | /* |
2470 | * first, we need to know whether the block is allocated already | 2470 | * first, we need to know whether the block is allocated already |
2471 | * preallocated blocks are unmapped but should treated | 2471 | * preallocated blocks are unmapped but should treated |
2472 | * the same as allocated blocks. | 2472 | * the same as allocated blocks. |
2473 | */ | 2473 | */ |
2474 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 2474 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
2475 | if (ret < 0) | 2475 | if (ret < 0) |
2476 | return ret; | 2476 | return ret; |
2477 | if (ret == 0) { | 2477 | if (ret == 0) { |
2478 | if (buffer_delay(bh)) | 2478 | if (buffer_delay(bh)) |
2479 | return 0; /* Not sure this could or should happen */ | 2479 | return 0; /* Not sure this could or should happen */ |
2480 | /* | 2480 | /* |
2481 | * XXX: __block_write_begin() unmaps passed block, is it OK? | 2481 | * XXX: __block_write_begin() unmaps passed block, is it OK? |
2482 | */ | 2482 | */ |
2483 | ret = ext4_da_reserve_space(inode, iblock); | 2483 | ret = ext4_da_reserve_space(inode, iblock); |
2484 | if (ret) | 2484 | if (ret) |
2485 | /* not enough space to reserve */ | 2485 | /* not enough space to reserve */ |
2486 | return ret; | 2486 | return ret; |
2487 | 2487 | ||
2488 | map_bh(bh, inode->i_sb, invalid_block); | 2488 | map_bh(bh, inode->i_sb, invalid_block); |
2489 | set_buffer_new(bh); | 2489 | set_buffer_new(bh); |
2490 | set_buffer_delay(bh); | 2490 | set_buffer_delay(bh); |
2491 | return 0; | 2491 | return 0; |
2492 | } | 2492 | } |
2493 | 2493 | ||
2494 | map_bh(bh, inode->i_sb, map.m_pblk); | 2494 | map_bh(bh, inode->i_sb, map.m_pblk); |
2495 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 2495 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
2496 | 2496 | ||
2497 | if (buffer_unwritten(bh)) { | 2497 | if (buffer_unwritten(bh)) { |
2498 | /* A delayed write to unwritten bh should be marked | 2498 | /* A delayed write to unwritten bh should be marked |
2499 | * new and mapped. Mapped ensures that we don't do | 2499 | * new and mapped. Mapped ensures that we don't do |
2500 | * get_block multiple times when we write to the same | 2500 | * get_block multiple times when we write to the same |
2501 | * offset and new ensures that we do proper zero out | 2501 | * offset and new ensures that we do proper zero out |
2502 | * for partial write. | 2502 | * for partial write. |
2503 | */ | 2503 | */ |
2504 | set_buffer_new(bh); | 2504 | set_buffer_new(bh); |
2505 | set_buffer_mapped(bh); | 2505 | set_buffer_mapped(bh); |
2506 | } | 2506 | } |
2507 | return 0; | 2507 | return 0; |
2508 | } | 2508 | } |
2509 | 2509 | ||
2510 | /* | 2510 | /* |
2511 | * This function is used as a standard get_block_t calback function | 2511 | * This function is used as a standard get_block_t calback function |
2512 | * when there is no desire to allocate any blocks. It is used as a | 2512 | * when there is no desire to allocate any blocks. It is used as a |
2513 | * callback function for block_write_begin() and block_write_full_page(). | 2513 | * callback function for block_write_begin() and block_write_full_page(). |
2514 | * These functions should only try to map a single block at a time. | 2514 | * These functions should only try to map a single block at a time. |
2515 | * | 2515 | * |
2516 | * Since this function doesn't do block allocations even if the caller | 2516 | * Since this function doesn't do block allocations even if the caller |
2517 | * requests it by passing in create=1, it is critically important that | 2517 | * requests it by passing in create=1, it is critically important that |
2518 | * any caller checks to make sure that any buffer heads are returned | 2518 | * any caller checks to make sure that any buffer heads are returned |
2519 | * by this function are either all already mapped or marked for | 2519 | * by this function are either all already mapped or marked for |
2520 | * delayed allocation before calling block_write_full_page(). Otherwise, | 2520 | * delayed allocation before calling block_write_full_page(). Otherwise, |
2521 | * b_blocknr could be left unitialized, and the page write functions will | 2521 | * b_blocknr could be left unitialized, and the page write functions will |
2522 | * be taken by surprise. | 2522 | * be taken by surprise. |
2523 | */ | 2523 | */ |
2524 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | 2524 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, |
2525 | struct buffer_head *bh_result, int create) | 2525 | struct buffer_head *bh_result, int create) |
2526 | { | 2526 | { |
2527 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | 2527 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); |
2528 | return _ext4_get_block(inode, iblock, bh_result, 0); | 2528 | return _ext4_get_block(inode, iblock, bh_result, 0); |
2529 | } | 2529 | } |
2530 | 2530 | ||
2531 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 2531 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
2532 | { | 2532 | { |
2533 | get_bh(bh); | 2533 | get_bh(bh); |
2534 | return 0; | 2534 | return 0; |
2535 | } | 2535 | } |
2536 | 2536 | ||
2537 | static int bput_one(handle_t *handle, struct buffer_head *bh) | 2537 | static int bput_one(handle_t *handle, struct buffer_head *bh) |
2538 | { | 2538 | { |
2539 | put_bh(bh); | 2539 | put_bh(bh); |
2540 | return 0; | 2540 | return 0; |
2541 | } | 2541 | } |
2542 | 2542 | ||
2543 | static int __ext4_journalled_writepage(struct page *page, | 2543 | static int __ext4_journalled_writepage(struct page *page, |
2544 | unsigned int len) | 2544 | unsigned int len) |
2545 | { | 2545 | { |
2546 | struct address_space *mapping = page->mapping; | 2546 | struct address_space *mapping = page->mapping; |
2547 | struct inode *inode = mapping->host; | 2547 | struct inode *inode = mapping->host; |
2548 | struct buffer_head *page_bufs; | 2548 | struct buffer_head *page_bufs; |
2549 | handle_t *handle = NULL; | 2549 | handle_t *handle = NULL; |
2550 | int ret = 0; | 2550 | int ret = 0; |
2551 | int err; | 2551 | int err; |
2552 | 2552 | ||
2553 | ClearPageChecked(page); | 2553 | ClearPageChecked(page); |
2554 | page_bufs = page_buffers(page); | 2554 | page_bufs = page_buffers(page); |
2555 | BUG_ON(!page_bufs); | 2555 | BUG_ON(!page_bufs); |
2556 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 2556 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
2557 | /* As soon as we unlock the page, it can go away, but we have | 2557 | /* As soon as we unlock the page, it can go away, but we have |
2558 | * references to buffers so we are safe */ | 2558 | * references to buffers so we are safe */ |
2559 | unlock_page(page); | 2559 | unlock_page(page); |
2560 | 2560 | ||
2561 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2561 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); |
2562 | if (IS_ERR(handle)) { | 2562 | if (IS_ERR(handle)) { |
2563 | ret = PTR_ERR(handle); | 2563 | ret = PTR_ERR(handle); |
2564 | goto out; | 2564 | goto out; |
2565 | } | 2565 | } |
2566 | 2566 | ||
2567 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, | 2567 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, |
2568 | do_journal_get_write_access); | 2568 | do_journal_get_write_access); |
2569 | 2569 | ||
2570 | err = walk_page_buffers(handle, page_bufs, 0, len, NULL, | 2570 | err = walk_page_buffers(handle, page_bufs, 0, len, NULL, |
2571 | write_end_fn); | 2571 | write_end_fn); |
2572 | if (ret == 0) | 2572 | if (ret == 0) |
2573 | ret = err; | 2573 | ret = err; |
2574 | err = ext4_journal_stop(handle); | 2574 | err = ext4_journal_stop(handle); |
2575 | if (!ret) | 2575 | if (!ret) |
2576 | ret = err; | 2576 | ret = err; |
2577 | 2577 | ||
2578 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | 2578 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); |
2579 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 2579 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
2580 | out: | 2580 | out: |
2581 | return ret; | 2581 | return ret; |
2582 | } | 2582 | } |
2583 | 2583 | ||
2584 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | 2584 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); |
2585 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | 2585 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); |
2586 | 2586 | ||
2587 | /* | 2587 | /* |
2588 | * Note that we don't need to start a transaction unless we're journaling data | 2588 | * Note that we don't need to start a transaction unless we're journaling data |
2589 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 2589 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
2590 | * need to file the inode to the transaction's list in ordered mode because if | 2590 | * need to file the inode to the transaction's list in ordered mode because if |
2591 | * we are writing back data added by write(), the inode is already there and if | 2591 | * we are writing back data added by write(), the inode is already there and if |
2592 | * we are writing back data modified via mmap(), no one guarantees in which | 2592 | * we are writing back data modified via mmap(), no one guarantees in which |
2593 | * transaction the data will hit the disk. In case we are journaling data, we | 2593 | * transaction the data will hit the disk. In case we are journaling data, we |
2594 | * cannot start transaction directly because transaction start ranks above page | 2594 | * cannot start transaction directly because transaction start ranks above page |
2595 | * lock so we have to do some magic. | 2595 | * lock so we have to do some magic. |
2596 | * | 2596 | * |
2597 | * This function can get called via... | 2597 | * This function can get called via... |
2598 | * - ext4_da_writepages after taking page lock (have journal handle) | 2598 | * - ext4_da_writepages after taking page lock (have journal handle) |
2599 | * - journal_submit_inode_data_buffers (no journal handle) | 2599 | * - journal_submit_inode_data_buffers (no journal handle) |
2600 | * - shrink_page_list via pdflush (no journal handle) | 2600 | * - shrink_page_list via pdflush (no journal handle) |
2601 | * - grab_page_cache when doing write_begin (have journal handle) | 2601 | * - grab_page_cache when doing write_begin (have journal handle) |
2602 | * | 2602 | * |
2603 | * We don't do any block allocation in this function. If we have page with | 2603 | * We don't do any block allocation in this function. If we have page with |
2604 | * multiple blocks we need to write those buffer_heads that are mapped. This | 2604 | * multiple blocks we need to write those buffer_heads that are mapped. This |
2605 | * is important for mmaped based write. So if we do with blocksize 1K | 2605 | * is important for mmaped based write. So if we do with blocksize 1K |
2606 | * truncate(f, 1024); | 2606 | * truncate(f, 1024); |
2607 | * a = mmap(f, 0, 4096); | 2607 | * a = mmap(f, 0, 4096); |
2608 | * a[0] = 'a'; | 2608 | * a[0] = 'a'; |
2609 | * truncate(f, 4096); | 2609 | * truncate(f, 4096); |
2610 | * we have in the page first buffer_head mapped via page_mkwrite call back | 2610 | * we have in the page first buffer_head mapped via page_mkwrite call back |
2611 | * but other bufer_heads would be unmapped but dirty(dirty done via the | 2611 | * but other bufer_heads would be unmapped but dirty(dirty done via the |
2612 | * do_wp_page). So writepage should write the first block. If we modify | 2612 | * do_wp_page). So writepage should write the first block. If we modify |
2613 | * the mmap area beyond 1024 we will again get a page_fault and the | 2613 | * the mmap area beyond 1024 we will again get a page_fault and the |
2614 | * page_mkwrite callback will do the block allocation and mark the | 2614 | * page_mkwrite callback will do the block allocation and mark the |
2615 | * buffer_heads mapped. | 2615 | * buffer_heads mapped. |
2616 | * | 2616 | * |
2617 | * We redirty the page if we have any buffer_heads that is either delay or | 2617 | * We redirty the page if we have any buffer_heads that is either delay or |
2618 | * unwritten in the page. | 2618 | * unwritten in the page. |
2619 | * | 2619 | * |
2620 | * We can get recursively called as show below. | 2620 | * We can get recursively called as show below. |
2621 | * | 2621 | * |
2622 | * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> | 2622 | * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> |
2623 | * ext4_writepage() | 2623 | * ext4_writepage() |
2624 | * | 2624 | * |
2625 | * But since we don't do any block allocation we should not deadlock. | 2625 | * But since we don't do any block allocation we should not deadlock. |
2626 | * Page also have the dirty flag cleared so we don't get recurive page_lock. | 2626 | * Page also have the dirty flag cleared so we don't get recurive page_lock. |
2627 | */ | 2627 | */ |
2628 | static int ext4_writepage(struct page *page, | 2628 | static int ext4_writepage(struct page *page, |
2629 | struct writeback_control *wbc) | 2629 | struct writeback_control *wbc) |
2630 | { | 2630 | { |
2631 | int ret = 0, commit_write = 0; | 2631 | int ret = 0, commit_write = 0; |
2632 | loff_t size; | 2632 | loff_t size; |
2633 | unsigned int len; | 2633 | unsigned int len; |
2634 | struct buffer_head *page_bufs = NULL; | 2634 | struct buffer_head *page_bufs = NULL; |
2635 | struct inode *inode = page->mapping->host; | 2635 | struct inode *inode = page->mapping->host; |
2636 | 2636 | ||
2637 | trace_ext4_writepage(page); | 2637 | trace_ext4_writepage(page); |
2638 | size = i_size_read(inode); | 2638 | size = i_size_read(inode); |
2639 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2639 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2640 | len = size & ~PAGE_CACHE_MASK; | 2640 | len = size & ~PAGE_CACHE_MASK; |
2641 | else | 2641 | else |
2642 | len = PAGE_CACHE_SIZE; | 2642 | len = PAGE_CACHE_SIZE; |
2643 | 2643 | ||
2644 | /* | 2644 | /* |
2645 | * If the page does not have buffers (for whatever reason), | 2645 | * If the page does not have buffers (for whatever reason), |
2646 | * try to create them using __block_write_begin. If this | 2646 | * try to create them using __block_write_begin. If this |
2647 | * fails, redirty the page and move on. | 2647 | * fails, redirty the page and move on. |
2648 | */ | 2648 | */ |
2649 | if (!page_has_buffers(page)) { | 2649 | if (!page_has_buffers(page)) { |
2650 | if (__block_write_begin(page, 0, len, | 2650 | if (__block_write_begin(page, 0, len, |
2651 | noalloc_get_block_write)) { | 2651 | noalloc_get_block_write)) { |
2652 | redirty_page: | 2652 | redirty_page: |
2653 | redirty_page_for_writepage(wbc, page); | 2653 | redirty_page_for_writepage(wbc, page); |
2654 | unlock_page(page); | 2654 | unlock_page(page); |
2655 | return 0; | 2655 | return 0; |
2656 | } | 2656 | } |
2657 | commit_write = 1; | 2657 | commit_write = 1; |
2658 | } | 2658 | } |
2659 | page_bufs = page_buffers(page); | 2659 | page_bufs = page_buffers(page); |
2660 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2660 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
2661 | ext4_bh_delay_or_unwritten)) { | 2661 | ext4_bh_delay_or_unwritten)) { |
2662 | /* | 2662 | /* |
2663 | * We don't want to do block allocation, so redirty | 2663 | * We don't want to do block allocation, so redirty |
2664 | * the page and return. We may reach here when we do | 2664 | * the page and return. We may reach here when we do |
2665 | * a journal commit via journal_submit_inode_data_buffers. | 2665 | * a journal commit via journal_submit_inode_data_buffers. |
2666 | * We can also reach here via shrink_page_list | 2666 | * We can also reach here via shrink_page_list |
2667 | */ | 2667 | */ |
2668 | goto redirty_page; | 2668 | goto redirty_page; |
2669 | } | 2669 | } |
2670 | if (commit_write) | 2670 | if (commit_write) |
2671 | /* now mark the buffer_heads as dirty and uptodate */ | 2671 | /* now mark the buffer_heads as dirty and uptodate */ |
2672 | block_commit_write(page, 0, len); | 2672 | block_commit_write(page, 0, len); |
2673 | 2673 | ||
2674 | if (PageChecked(page) && ext4_should_journal_data(inode)) | 2674 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
2675 | /* | 2675 | /* |
2676 | * It's mmapped pagecache. Add buffers and journal it. There | 2676 | * It's mmapped pagecache. Add buffers and journal it. There |
2677 | * doesn't seem much point in redirtying the page here. | 2677 | * doesn't seem much point in redirtying the page here. |
2678 | */ | 2678 | */ |
2679 | return __ext4_journalled_writepage(page, len); | 2679 | return __ext4_journalled_writepage(page, len); |
2680 | 2680 | ||
2681 | if (buffer_uninit(page_bufs)) { | 2681 | if (buffer_uninit(page_bufs)) { |
2682 | ext4_set_bh_endio(page_bufs, inode); | 2682 | ext4_set_bh_endio(page_bufs, inode); |
2683 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2683 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2684 | wbc, ext4_end_io_buffer_write); | 2684 | wbc, ext4_end_io_buffer_write); |
2685 | } else | 2685 | } else |
2686 | ret = block_write_full_page(page, noalloc_get_block_write, | 2686 | ret = block_write_full_page(page, noalloc_get_block_write, |
2687 | wbc); | 2687 | wbc); |
2688 | 2688 | ||
2689 | return ret; | 2689 | return ret; |
2690 | } | 2690 | } |
2691 | 2691 | ||
2692 | /* | 2692 | /* |
2693 | * This is called via ext4_da_writepages() to | 2693 | * This is called via ext4_da_writepages() to |
2694 | * calculate the total number of credits to reserve to fit | 2694 | * calculate the total number of credits to reserve to fit |
2695 | * a single extent allocation into a single transaction, | 2695 | * a single extent allocation into a single transaction, |
2696 | * ext4_da_writpeages() will loop calling this before | 2696 | * ext4_da_writpeages() will loop calling this before |
2697 | * the block allocation. | 2697 | * the block allocation. |
2698 | */ | 2698 | */ |
2699 | 2699 | ||
2700 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | 2700 | static int ext4_da_writepages_trans_blocks(struct inode *inode) |
2701 | { | 2701 | { |
2702 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 2702 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; |
2703 | 2703 | ||
2704 | /* | 2704 | /* |
2705 | * With non-extent format the journal credit needed to | 2705 | * With non-extent format the journal credit needed to |
2706 | * insert nrblocks contiguous block is dependent on | 2706 | * insert nrblocks contiguous block is dependent on |
2707 | * number of contiguous block. So we will limit | 2707 | * number of contiguous block. So we will limit |
2708 | * number of contiguous block to a sane value | 2708 | * number of contiguous block to a sane value |
2709 | */ | 2709 | */ |
2710 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && | 2710 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && |
2711 | (max_blocks > EXT4_MAX_TRANS_DATA)) | 2711 | (max_blocks > EXT4_MAX_TRANS_DATA)) |
2712 | max_blocks = EXT4_MAX_TRANS_DATA; | 2712 | max_blocks = EXT4_MAX_TRANS_DATA; |
2713 | 2713 | ||
2714 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2714 | return ext4_chunk_trans_blocks(inode, max_blocks); |
2715 | } | 2715 | } |
2716 | 2716 | ||
2717 | /* | 2717 | /* |
2718 | * write_cache_pages_da - walk the list of dirty pages of the given | 2718 | * write_cache_pages_da - walk the list of dirty pages of the given |
2719 | * address space and accumulate pages that need writing, and call | 2719 | * address space and accumulate pages that need writing, and call |
2720 | * mpage_da_map_and_submit to map a single contiguous memory region | 2720 | * mpage_da_map_and_submit to map a single contiguous memory region |
2721 | * and then write them. | 2721 | * and then write them. |
2722 | */ | 2722 | */ |
2723 | static int write_cache_pages_da(struct address_space *mapping, | 2723 | static int write_cache_pages_da(struct address_space *mapping, |
2724 | struct writeback_control *wbc, | 2724 | struct writeback_control *wbc, |
2725 | struct mpage_da_data *mpd, | 2725 | struct mpage_da_data *mpd, |
2726 | pgoff_t *done_index) | 2726 | pgoff_t *done_index) |
2727 | { | 2727 | { |
2728 | struct buffer_head *bh, *head; | 2728 | struct buffer_head *bh, *head; |
2729 | struct inode *inode = mapping->host; | 2729 | struct inode *inode = mapping->host; |
2730 | struct pagevec pvec; | 2730 | struct pagevec pvec; |
2731 | unsigned int nr_pages; | 2731 | unsigned int nr_pages; |
2732 | sector_t logical; | 2732 | sector_t logical; |
2733 | pgoff_t index, end; | 2733 | pgoff_t index, end; |
2734 | long nr_to_write = wbc->nr_to_write; | 2734 | long nr_to_write = wbc->nr_to_write; |
2735 | int i, tag, ret = 0; | 2735 | int i, tag, ret = 0; |
2736 | 2736 | ||
2737 | memset(mpd, 0, sizeof(struct mpage_da_data)); | 2737 | memset(mpd, 0, sizeof(struct mpage_da_data)); |
2738 | mpd->wbc = wbc; | 2738 | mpd->wbc = wbc; |
2739 | mpd->inode = inode; | 2739 | mpd->inode = inode; |
2740 | pagevec_init(&pvec, 0); | 2740 | pagevec_init(&pvec, 0); |
2741 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2741 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2742 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2742 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2743 | 2743 | ||
2744 | if (wbc->sync_mode == WB_SYNC_ALL) | 2744 | if (wbc->sync_mode == WB_SYNC_ALL) |
2745 | tag = PAGECACHE_TAG_TOWRITE; | 2745 | tag = PAGECACHE_TAG_TOWRITE; |
2746 | else | 2746 | else |
2747 | tag = PAGECACHE_TAG_DIRTY; | 2747 | tag = PAGECACHE_TAG_DIRTY; |
2748 | 2748 | ||
2749 | *done_index = index; | 2749 | *done_index = index; |
2750 | while (index <= end) { | 2750 | while (index <= end) { |
2751 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2751 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2752 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2752 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2753 | if (nr_pages == 0) | 2753 | if (nr_pages == 0) |
2754 | return 0; | 2754 | return 0; |
2755 | 2755 | ||
2756 | for (i = 0; i < nr_pages; i++) { | 2756 | for (i = 0; i < nr_pages; i++) { |
2757 | struct page *page = pvec.pages[i]; | 2757 | struct page *page = pvec.pages[i]; |
2758 | 2758 | ||
2759 | /* | 2759 | /* |
2760 | * At this point, the page may be truncated or | 2760 | * At this point, the page may be truncated or |
2761 | * invalidated (changing page->mapping to NULL), or | 2761 | * invalidated (changing page->mapping to NULL), or |
2762 | * even swizzled back from swapper_space to tmpfs file | 2762 | * even swizzled back from swapper_space to tmpfs file |
2763 | * mapping. However, page->index will not change | 2763 | * mapping. However, page->index will not change |
2764 | * because we have a reference on the page. | 2764 | * because we have a reference on the page. |
2765 | */ | 2765 | */ |
2766 | if (page->index > end) | 2766 | if (page->index > end) |
2767 | goto out; | 2767 | goto out; |
2768 | 2768 | ||
2769 | *done_index = page->index + 1; | 2769 | *done_index = page->index + 1; |
2770 | 2770 | ||
2771 | /* | 2771 | /* |
2772 | * If we can't merge this page, and we have | 2772 | * If we can't merge this page, and we have |
2773 | * accumulated an contiguous region, write it | 2773 | * accumulated an contiguous region, write it |
2774 | */ | 2774 | */ |
2775 | if ((mpd->next_page != page->index) && | 2775 | if ((mpd->next_page != page->index) && |
2776 | (mpd->next_page != mpd->first_page)) { | 2776 | (mpd->next_page != mpd->first_page)) { |
2777 | mpage_da_map_and_submit(mpd); | 2777 | mpage_da_map_and_submit(mpd); |
2778 | goto ret_extent_tail; | 2778 | goto ret_extent_tail; |
2779 | } | 2779 | } |
2780 | 2780 | ||
2781 | lock_page(page); | 2781 | lock_page(page); |
2782 | 2782 | ||
2783 | /* | 2783 | /* |
2784 | * If the page is no longer dirty, or its | 2784 | * If the page is no longer dirty, or its |
2785 | * mapping no longer corresponds to inode we | 2785 | * mapping no longer corresponds to inode we |
2786 | * are writing (which means it has been | 2786 | * are writing (which means it has been |
2787 | * truncated or invalidated), or the page is | 2787 | * truncated or invalidated), or the page is |
2788 | * already under writeback and we are not | 2788 | * already under writeback and we are not |
2789 | * doing a data integrity writeback, skip the page | 2789 | * doing a data integrity writeback, skip the page |
2790 | */ | 2790 | */ |
2791 | if (!PageDirty(page) || | 2791 | if (!PageDirty(page) || |
2792 | (PageWriteback(page) && | 2792 | (PageWriteback(page) && |
2793 | (wbc->sync_mode == WB_SYNC_NONE)) || | 2793 | (wbc->sync_mode == WB_SYNC_NONE)) || |
2794 | unlikely(page->mapping != mapping)) { | 2794 | unlikely(page->mapping != mapping)) { |
2795 | unlock_page(page); | 2795 | unlock_page(page); |
2796 | continue; | 2796 | continue; |
2797 | } | 2797 | } |
2798 | 2798 | ||
2799 | wait_on_page_writeback(page); | 2799 | wait_on_page_writeback(page); |
2800 | BUG_ON(PageWriteback(page)); | 2800 | BUG_ON(PageWriteback(page)); |
2801 | 2801 | ||
2802 | if (mpd->next_page != page->index) | 2802 | if (mpd->next_page != page->index) |
2803 | mpd->first_page = page->index; | 2803 | mpd->first_page = page->index; |
2804 | mpd->next_page = page->index + 1; | 2804 | mpd->next_page = page->index + 1; |
2805 | logical = (sector_t) page->index << | 2805 | logical = (sector_t) page->index << |
2806 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2806 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2807 | 2807 | ||
2808 | if (!page_has_buffers(page)) { | 2808 | if (!page_has_buffers(page)) { |
2809 | mpage_add_bh_to_extent(mpd, logical, | 2809 | mpage_add_bh_to_extent(mpd, logical, |
2810 | PAGE_CACHE_SIZE, | 2810 | PAGE_CACHE_SIZE, |
2811 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | 2811 | (1 << BH_Dirty) | (1 << BH_Uptodate)); |
2812 | if (mpd->io_done) | 2812 | if (mpd->io_done) |
2813 | goto ret_extent_tail; | 2813 | goto ret_extent_tail; |
2814 | } else { | 2814 | } else { |
2815 | /* | 2815 | /* |
2816 | * Page with regular buffer heads, | 2816 | * Page with regular buffer heads, |
2817 | * just add all dirty ones | 2817 | * just add all dirty ones |
2818 | */ | 2818 | */ |
2819 | head = page_buffers(page); | 2819 | head = page_buffers(page); |
2820 | bh = head; | 2820 | bh = head; |
2821 | do { | 2821 | do { |
2822 | BUG_ON(buffer_locked(bh)); | 2822 | BUG_ON(buffer_locked(bh)); |
2823 | /* | 2823 | /* |
2824 | * We need to try to allocate | 2824 | * We need to try to allocate |
2825 | * unmapped blocks in the same page. | 2825 | * unmapped blocks in the same page. |
2826 | * Otherwise we won't make progress | 2826 | * Otherwise we won't make progress |
2827 | * with the page in ext4_writepage | 2827 | * with the page in ext4_writepage |
2828 | */ | 2828 | */ |
2829 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | 2829 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { |
2830 | mpage_add_bh_to_extent(mpd, logical, | 2830 | mpage_add_bh_to_extent(mpd, logical, |
2831 | bh->b_size, | 2831 | bh->b_size, |
2832 | bh->b_state); | 2832 | bh->b_state); |
2833 | if (mpd->io_done) | 2833 | if (mpd->io_done) |
2834 | goto ret_extent_tail; | 2834 | goto ret_extent_tail; |
2835 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | 2835 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { |
2836 | /* | 2836 | /* |
2837 | * mapped dirty buffer. We need | 2837 | * mapped dirty buffer. We need |
2838 | * to update the b_state | 2838 | * to update the b_state |
2839 | * because we look at b_state | 2839 | * because we look at b_state |
2840 | * in mpage_da_map_blocks. We | 2840 | * in mpage_da_map_blocks. We |
2841 | * don't update b_size because | 2841 | * don't update b_size because |
2842 | * if we find an unmapped | 2842 | * if we find an unmapped |
2843 | * buffer_head later we need to | 2843 | * buffer_head later we need to |
2844 | * use the b_state flag of that | 2844 | * use the b_state flag of that |
2845 | * buffer_head. | 2845 | * buffer_head. |
2846 | */ | 2846 | */ |
2847 | if (mpd->b_size == 0) | 2847 | if (mpd->b_size == 0) |
2848 | mpd->b_state = bh->b_state & BH_FLAGS; | 2848 | mpd->b_state = bh->b_state & BH_FLAGS; |
2849 | } | 2849 | } |
2850 | logical++; | 2850 | logical++; |
2851 | } while ((bh = bh->b_this_page) != head); | 2851 | } while ((bh = bh->b_this_page) != head); |
2852 | } | 2852 | } |
2853 | 2853 | ||
2854 | if (nr_to_write > 0) { | 2854 | if (nr_to_write > 0) { |
2855 | nr_to_write--; | 2855 | nr_to_write--; |
2856 | if (nr_to_write == 0 && | 2856 | if (nr_to_write == 0 && |
2857 | wbc->sync_mode == WB_SYNC_NONE) | 2857 | wbc->sync_mode == WB_SYNC_NONE) |
2858 | /* | 2858 | /* |
2859 | * We stop writing back only if we are | 2859 | * We stop writing back only if we are |
2860 | * not doing integrity sync. In case of | 2860 | * not doing integrity sync. In case of |
2861 | * integrity sync we have to keep going | 2861 | * integrity sync we have to keep going |
2862 | * because someone may be concurrently | 2862 | * because someone may be concurrently |
2863 | * dirtying pages, and we might have | 2863 | * dirtying pages, and we might have |
2864 | * synced a lot of newly appeared dirty | 2864 | * synced a lot of newly appeared dirty |
2865 | * pages, but have not synced all of the | 2865 | * pages, but have not synced all of the |
2866 | * old dirty pages. | 2866 | * old dirty pages. |
2867 | */ | 2867 | */ |
2868 | goto out; | 2868 | goto out; |
2869 | } | 2869 | } |
2870 | } | 2870 | } |
2871 | pagevec_release(&pvec); | 2871 | pagevec_release(&pvec); |
2872 | cond_resched(); | 2872 | cond_resched(); |
2873 | } | 2873 | } |
2874 | return 0; | 2874 | return 0; |
2875 | ret_extent_tail: | 2875 | ret_extent_tail: |
2876 | ret = MPAGE_DA_EXTENT_TAIL; | 2876 | ret = MPAGE_DA_EXTENT_TAIL; |
2877 | out: | 2877 | out: |
2878 | pagevec_release(&pvec); | 2878 | pagevec_release(&pvec); |
2879 | cond_resched(); | 2879 | cond_resched(); |
2880 | return ret; | 2880 | return ret; |
2881 | } | 2881 | } |
2882 | 2882 | ||
2883 | 2883 | ||
2884 | static int ext4_da_writepages(struct address_space *mapping, | 2884 | static int ext4_da_writepages(struct address_space *mapping, |
2885 | struct writeback_control *wbc) | 2885 | struct writeback_control *wbc) |
2886 | { | 2886 | { |
2887 | pgoff_t index; | 2887 | pgoff_t index; |
2888 | int range_whole = 0; | 2888 | int range_whole = 0; |
2889 | handle_t *handle = NULL; | 2889 | handle_t *handle = NULL; |
2890 | struct mpage_da_data mpd; | 2890 | struct mpage_da_data mpd; |
2891 | struct inode *inode = mapping->host; | 2891 | struct inode *inode = mapping->host; |
2892 | int pages_written = 0; | 2892 | int pages_written = 0; |
2893 | unsigned int max_pages; | 2893 | unsigned int max_pages; |
2894 | int range_cyclic, cycled = 1, io_done = 0; | 2894 | int range_cyclic, cycled = 1, io_done = 0; |
2895 | int needed_blocks, ret = 0; | 2895 | int needed_blocks, ret = 0; |
2896 | long desired_nr_to_write, nr_to_writebump = 0; | 2896 | long desired_nr_to_write, nr_to_writebump = 0; |
2897 | loff_t range_start = wbc->range_start; | 2897 | loff_t range_start = wbc->range_start; |
2898 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2898 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2899 | pgoff_t done_index = 0; | 2899 | pgoff_t done_index = 0; |
2900 | pgoff_t end; | 2900 | pgoff_t end; |
2901 | 2901 | ||
2902 | trace_ext4_da_writepages(inode, wbc); | 2902 | trace_ext4_da_writepages(inode, wbc); |
2903 | 2903 | ||
2904 | /* | 2904 | /* |
2905 | * No pages to write? This is mainly a kludge to avoid starting | 2905 | * No pages to write? This is mainly a kludge to avoid starting |
2906 | * a transaction for special inodes like journal inode on last iput() | 2906 | * a transaction for special inodes like journal inode on last iput() |
2907 | * because that could violate lock ordering on umount | 2907 | * because that could violate lock ordering on umount |
2908 | */ | 2908 | */ |
2909 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 2909 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2910 | return 0; | 2910 | return 0; |
2911 | 2911 | ||
2912 | /* | 2912 | /* |
2913 | * If the filesystem has aborted, it is read-only, so return | 2913 | * If the filesystem has aborted, it is read-only, so return |
2914 | * right away instead of dumping stack traces later on that | 2914 | * right away instead of dumping stack traces later on that |
2915 | * will obscure the real source of the problem. We test | 2915 | * will obscure the real source of the problem. We test |
2916 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because | 2916 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because |
2917 | * the latter could be true if the filesystem is mounted | 2917 | * the latter could be true if the filesystem is mounted |
2918 | * read-only, and in that case, ext4_da_writepages should | 2918 | * read-only, and in that case, ext4_da_writepages should |
2919 | * *never* be called, so if that ever happens, we would want | 2919 | * *never* be called, so if that ever happens, we would want |
2920 | * the stack trace. | 2920 | * the stack trace. |
2921 | */ | 2921 | */ |
2922 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) | 2922 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
2923 | return -EROFS; | 2923 | return -EROFS; |
2924 | 2924 | ||
2925 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2925 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
2926 | range_whole = 1; | 2926 | range_whole = 1; |
2927 | 2927 | ||
2928 | range_cyclic = wbc->range_cyclic; | 2928 | range_cyclic = wbc->range_cyclic; |
2929 | if (wbc->range_cyclic) { | 2929 | if (wbc->range_cyclic) { |
2930 | index = mapping->writeback_index; | 2930 | index = mapping->writeback_index; |
2931 | if (index) | 2931 | if (index) |
2932 | cycled = 0; | 2932 | cycled = 0; |
2933 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2933 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
2934 | wbc->range_end = LLONG_MAX; | 2934 | wbc->range_end = LLONG_MAX; |
2935 | wbc->range_cyclic = 0; | 2935 | wbc->range_cyclic = 0; |
2936 | end = -1; | 2936 | end = -1; |
2937 | } else { | 2937 | } else { |
2938 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2938 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2939 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2939 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2940 | } | 2940 | } |
2941 | 2941 | ||
2942 | /* | 2942 | /* |
2943 | * This works around two forms of stupidity. The first is in | 2943 | * This works around two forms of stupidity. The first is in |
2944 | * the writeback code, which caps the maximum number of pages | 2944 | * the writeback code, which caps the maximum number of pages |
2945 | * written to be 1024 pages. This is wrong on multiple | 2945 | * written to be 1024 pages. This is wrong on multiple |
2946 | * levels; different architectues have a different page size, | 2946 | * levels; different architectues have a different page size, |
2947 | * which changes the maximum amount of data which gets | 2947 | * which changes the maximum amount of data which gets |
2948 | * written. Secondly, 4 megabytes is way too small. XFS | 2948 | * written. Secondly, 4 megabytes is way too small. XFS |
2949 | * forces this value to be 16 megabytes by multiplying | 2949 | * forces this value to be 16 megabytes by multiplying |
2950 | * nr_to_write parameter by four, and then relies on its | 2950 | * nr_to_write parameter by four, and then relies on its |
2951 | * allocator to allocate larger extents to make them | 2951 | * allocator to allocate larger extents to make them |
2952 | * contiguous. Unfortunately this brings us to the second | 2952 | * contiguous. Unfortunately this brings us to the second |
2953 | * stupidity, which is that ext4's mballoc code only allocates | 2953 | * stupidity, which is that ext4's mballoc code only allocates |
2954 | * at most 2048 blocks. So we force contiguous writes up to | 2954 | * at most 2048 blocks. So we force contiguous writes up to |
2955 | * the number of dirty blocks in the inode, or | 2955 | * the number of dirty blocks in the inode, or |
2956 | * sbi->max_writeback_mb_bump whichever is smaller. | 2956 | * sbi->max_writeback_mb_bump whichever is smaller. |
2957 | */ | 2957 | */ |
2958 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | 2958 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); |
2959 | if (!range_cyclic && range_whole) { | 2959 | if (!range_cyclic && range_whole) { |
2960 | if (wbc->nr_to_write == LONG_MAX) | 2960 | if (wbc->nr_to_write == LONG_MAX) |
2961 | desired_nr_to_write = wbc->nr_to_write; | 2961 | desired_nr_to_write = wbc->nr_to_write; |
2962 | else | 2962 | else |
2963 | desired_nr_to_write = wbc->nr_to_write * 8; | 2963 | desired_nr_to_write = wbc->nr_to_write * 8; |
2964 | } else | 2964 | } else |
2965 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | 2965 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, |
2966 | max_pages); | 2966 | max_pages); |
2967 | if (desired_nr_to_write > max_pages) | 2967 | if (desired_nr_to_write > max_pages) |
2968 | desired_nr_to_write = max_pages; | 2968 | desired_nr_to_write = max_pages; |
2969 | 2969 | ||
2970 | if (wbc->nr_to_write < desired_nr_to_write) { | 2970 | if (wbc->nr_to_write < desired_nr_to_write) { |
2971 | nr_to_writebump = desired_nr_to_write - wbc->nr_to_write; | 2971 | nr_to_writebump = desired_nr_to_write - wbc->nr_to_write; |
2972 | wbc->nr_to_write = desired_nr_to_write; | 2972 | wbc->nr_to_write = desired_nr_to_write; |
2973 | } | 2973 | } |
2974 | 2974 | ||
2975 | retry: | 2975 | retry: |
2976 | if (wbc->sync_mode == WB_SYNC_ALL) | 2976 | if (wbc->sync_mode == WB_SYNC_ALL) |
2977 | tag_pages_for_writeback(mapping, index, end); | 2977 | tag_pages_for_writeback(mapping, index, end); |
2978 | 2978 | ||
2979 | while (!ret && wbc->nr_to_write > 0) { | 2979 | while (!ret && wbc->nr_to_write > 0) { |
2980 | 2980 | ||
2981 | /* | 2981 | /* |
2982 | * we insert one extent at a time. So we need | 2982 | * we insert one extent at a time. So we need |
2983 | * credit needed for single extent allocation. | 2983 | * credit needed for single extent allocation. |
2984 | * journalled mode is currently not supported | 2984 | * journalled mode is currently not supported |
2985 | * by delalloc | 2985 | * by delalloc |
2986 | */ | 2986 | */ |
2987 | BUG_ON(ext4_should_journal_data(inode)); | 2987 | BUG_ON(ext4_should_journal_data(inode)); |
2988 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | 2988 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2989 | 2989 | ||
2990 | /* start a new transaction*/ | 2990 | /* start a new transaction*/ |
2991 | handle = ext4_journal_start(inode, needed_blocks); | 2991 | handle = ext4_journal_start(inode, needed_blocks); |
2992 | if (IS_ERR(handle)) { | 2992 | if (IS_ERR(handle)) { |
2993 | ret = PTR_ERR(handle); | 2993 | ret = PTR_ERR(handle); |
2994 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2994 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2995 | "%ld pages, ino %lu; err %d", __func__, | 2995 | "%ld pages, ino %lu; err %d", __func__, |
2996 | wbc->nr_to_write, inode->i_ino, ret); | 2996 | wbc->nr_to_write, inode->i_ino, ret); |
2997 | goto out_writepages; | 2997 | goto out_writepages; |
2998 | } | 2998 | } |
2999 | 2999 | ||
3000 | /* | 3000 | /* |
3001 | * Now call write_cache_pages_da() to find the next | 3001 | * Now call write_cache_pages_da() to find the next |
3002 | * contiguous region of logical blocks that need | 3002 | * contiguous region of logical blocks that need |
3003 | * blocks to be allocated by ext4 and submit them. | 3003 | * blocks to be allocated by ext4 and submit them. |
3004 | */ | 3004 | */ |
3005 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); | 3005 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3006 | /* | 3006 | /* |
3007 | * If we have a contiguous extent of pages and we | 3007 | * If we have a contiguous extent of pages and we |
3008 | * haven't done the I/O yet, map the blocks and submit | 3008 | * haven't done the I/O yet, map the blocks and submit |
3009 | * them for I/O. | 3009 | * them for I/O. |
3010 | */ | 3010 | */ |
3011 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 3011 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
3012 | mpage_da_map_and_submit(&mpd); | 3012 | mpage_da_map_and_submit(&mpd); |
3013 | ret = MPAGE_DA_EXTENT_TAIL; | 3013 | ret = MPAGE_DA_EXTENT_TAIL; |
3014 | } | 3014 | } |
3015 | trace_ext4_da_write_pages(inode, &mpd); | 3015 | trace_ext4_da_write_pages(inode, &mpd); |
3016 | wbc->nr_to_write -= mpd.pages_written; | 3016 | wbc->nr_to_write -= mpd.pages_written; |
3017 | 3017 | ||
3018 | ext4_journal_stop(handle); | 3018 | ext4_journal_stop(handle); |
3019 | 3019 | ||
3020 | if ((mpd.retval == -ENOSPC) && sbi->s_journal) { | 3020 | if ((mpd.retval == -ENOSPC) && sbi->s_journal) { |
3021 | /* commit the transaction which would | 3021 | /* commit the transaction which would |
3022 | * free blocks released in the transaction | 3022 | * free blocks released in the transaction |
3023 | * and try again | 3023 | * and try again |
3024 | */ | 3024 | */ |
3025 | jbd2_journal_force_commit_nested(sbi->s_journal); | 3025 | jbd2_journal_force_commit_nested(sbi->s_journal); |
3026 | ret = 0; | 3026 | ret = 0; |
3027 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 3027 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
3028 | /* | 3028 | /* |
3029 | * got one extent now try with | 3029 | * got one extent now try with |
3030 | * rest of the pages | 3030 | * rest of the pages |
3031 | */ | 3031 | */ |
3032 | pages_written += mpd.pages_written; | 3032 | pages_written += mpd.pages_written; |
3033 | ret = 0; | 3033 | ret = 0; |
3034 | io_done = 1; | 3034 | io_done = 1; |
3035 | } else if (wbc->nr_to_write) | 3035 | } else if (wbc->nr_to_write) |
3036 | /* | 3036 | /* |
3037 | * There is no more writeout needed | 3037 | * There is no more writeout needed |
3038 | * or we requested for a noblocking writeout | 3038 | * or we requested for a noblocking writeout |
3039 | * and we found the device congested | 3039 | * and we found the device congested |
3040 | */ | 3040 | */ |
3041 | break; | 3041 | break; |
3042 | } | 3042 | } |
3043 | if (!io_done && !cycled) { | 3043 | if (!io_done && !cycled) { |
3044 | cycled = 1; | 3044 | cycled = 1; |
3045 | index = 0; | 3045 | index = 0; |
3046 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 3046 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
3047 | wbc->range_end = mapping->writeback_index - 1; | 3047 | wbc->range_end = mapping->writeback_index - 1; |
3048 | goto retry; | 3048 | goto retry; |
3049 | } | 3049 | } |
3050 | 3050 | ||
3051 | /* Update index */ | 3051 | /* Update index */ |
3052 | wbc->range_cyclic = range_cyclic; | 3052 | wbc->range_cyclic = range_cyclic; |
3053 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 3053 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
3054 | /* | 3054 | /* |
3055 | * set the writeback_index so that range_cyclic | 3055 | * set the writeback_index so that range_cyclic |
3056 | * mode will write it back later | 3056 | * mode will write it back later |
3057 | */ | 3057 | */ |
3058 | mapping->writeback_index = done_index; | 3058 | mapping->writeback_index = done_index; |
3059 | 3059 | ||
3060 | out_writepages: | 3060 | out_writepages: |
3061 | wbc->nr_to_write -= nr_to_writebump; | 3061 | wbc->nr_to_write -= nr_to_writebump; |
3062 | wbc->range_start = range_start; | 3062 | wbc->range_start = range_start; |
3063 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3063 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
3064 | return ret; | 3064 | return ret; |
3065 | } | 3065 | } |
3066 | 3066 | ||
3067 | #define FALL_BACK_TO_NONDELALLOC 1 | 3067 | #define FALL_BACK_TO_NONDELALLOC 1 |
3068 | static int ext4_nonda_switch(struct super_block *sb) | 3068 | static int ext4_nonda_switch(struct super_block *sb) |
3069 | { | 3069 | { |
3070 | s64 free_blocks, dirty_blocks; | 3070 | s64 free_blocks, dirty_blocks; |
3071 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3071 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3072 | 3072 | ||
3073 | /* | 3073 | /* |
3074 | * switch to non delalloc mode if we are running low | 3074 | * switch to non delalloc mode if we are running low |
3075 | * on free block. The free block accounting via percpu | 3075 | * on free block. The free block accounting via percpu |
3076 | * counters can get slightly wrong with percpu_counter_batch getting | 3076 | * counters can get slightly wrong with percpu_counter_batch getting |
3077 | * accumulated on each CPU without updating global counters | 3077 | * accumulated on each CPU without updating global counters |
3078 | * Delalloc need an accurate free block accounting. So switch | 3078 | * Delalloc need an accurate free block accounting. So switch |
3079 | * to non delalloc when we are near to error range. | 3079 | * to non delalloc when we are near to error range. |
3080 | */ | 3080 | */ |
3081 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 3081 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); |
3082 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | 3082 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); |
3083 | if (2 * free_blocks < 3 * dirty_blocks || | 3083 | if (2 * free_blocks < 3 * dirty_blocks || |
3084 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | 3084 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { |
3085 | /* | 3085 | /* |
3086 | * free block count is less than 150% of dirty blocks | 3086 | * free block count is less than 150% of dirty blocks |
3087 | * or free blocks is less than watermark | 3087 | * or free blocks is less than watermark |
3088 | */ | 3088 | */ |
3089 | return 1; | 3089 | return 1; |
3090 | } | 3090 | } |
3091 | /* | 3091 | /* |
3092 | * Even if we don't switch but are nearing capacity, | 3092 | * Even if we don't switch but are nearing capacity, |
3093 | * start pushing delalloc when 1/2 of free blocks are dirty. | 3093 | * start pushing delalloc when 1/2 of free blocks are dirty. |
3094 | */ | 3094 | */ |
3095 | if (free_blocks < 2 * dirty_blocks) | 3095 | if (free_blocks < 2 * dirty_blocks) |
3096 | writeback_inodes_sb_if_idle(sb); | 3096 | writeback_inodes_sb_if_idle(sb); |
3097 | 3097 | ||
3098 | return 0; | 3098 | return 0; |
3099 | } | 3099 | } |
3100 | 3100 | ||
3101 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 3101 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
3102 | loff_t pos, unsigned len, unsigned flags, | 3102 | loff_t pos, unsigned len, unsigned flags, |
3103 | struct page **pagep, void **fsdata) | 3103 | struct page **pagep, void **fsdata) |
3104 | { | 3104 | { |
3105 | int ret, retries = 0; | 3105 | int ret, retries = 0; |
3106 | struct page *page; | 3106 | struct page *page; |
3107 | pgoff_t index; | 3107 | pgoff_t index; |
3108 | struct inode *inode = mapping->host; | 3108 | struct inode *inode = mapping->host; |
3109 | handle_t *handle; | 3109 | handle_t *handle; |
3110 | 3110 | ||
3111 | index = pos >> PAGE_CACHE_SHIFT; | 3111 | index = pos >> PAGE_CACHE_SHIFT; |
3112 | 3112 | ||
3113 | if (ext4_nonda_switch(inode->i_sb)) { | 3113 | if (ext4_nonda_switch(inode->i_sb)) { |
3114 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | 3114 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; |
3115 | return ext4_write_begin(file, mapping, pos, | 3115 | return ext4_write_begin(file, mapping, pos, |
3116 | len, flags, pagep, fsdata); | 3116 | len, flags, pagep, fsdata); |
3117 | } | 3117 | } |
3118 | *fsdata = (void *)0; | 3118 | *fsdata = (void *)0; |
3119 | trace_ext4_da_write_begin(inode, pos, len, flags); | 3119 | trace_ext4_da_write_begin(inode, pos, len, flags); |
3120 | retry: | 3120 | retry: |
3121 | /* | 3121 | /* |
3122 | * With delayed allocation, we don't log the i_disksize update | 3122 | * With delayed allocation, we don't log the i_disksize update |
3123 | * if there is delayed block allocation. But we still need | 3123 | * if there is delayed block allocation. But we still need |
3124 | * to journalling the i_disksize update if writes to the end | 3124 | * to journalling the i_disksize update if writes to the end |
3125 | * of file which has an already mapped buffer. | 3125 | * of file which has an already mapped buffer. |
3126 | */ | 3126 | */ |
3127 | handle = ext4_journal_start(inode, 1); | 3127 | handle = ext4_journal_start(inode, 1); |
3128 | if (IS_ERR(handle)) { | 3128 | if (IS_ERR(handle)) { |
3129 | ret = PTR_ERR(handle); | 3129 | ret = PTR_ERR(handle); |
3130 | goto out; | 3130 | goto out; |
3131 | } | 3131 | } |
3132 | /* We cannot recurse into the filesystem as the transaction is already | 3132 | /* We cannot recurse into the filesystem as the transaction is already |
3133 | * started */ | 3133 | * started */ |
3134 | flags |= AOP_FLAG_NOFS; | 3134 | flags |= AOP_FLAG_NOFS; |
3135 | 3135 | ||
3136 | page = grab_cache_page_write_begin(mapping, index, flags); | 3136 | page = grab_cache_page_write_begin(mapping, index, flags); |
3137 | if (!page) { | 3137 | if (!page) { |
3138 | ext4_journal_stop(handle); | 3138 | ext4_journal_stop(handle); |
3139 | ret = -ENOMEM; | 3139 | ret = -ENOMEM; |
3140 | goto out; | 3140 | goto out; |
3141 | } | 3141 | } |
3142 | *pagep = page; | 3142 | *pagep = page; |
3143 | 3143 | ||
3144 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); | 3144 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); |
3145 | if (ret < 0) { | 3145 | if (ret < 0) { |
3146 | unlock_page(page); | 3146 | unlock_page(page); |
3147 | ext4_journal_stop(handle); | 3147 | ext4_journal_stop(handle); |
3148 | page_cache_release(page); | 3148 | page_cache_release(page); |
3149 | /* | 3149 | /* |
3150 | * block_write_begin may have instantiated a few blocks | 3150 | * block_write_begin may have instantiated a few blocks |
3151 | * outside i_size. Trim these off again. Don't need | 3151 | * outside i_size. Trim these off again. Don't need |
3152 | * i_size_read because we hold i_mutex. | 3152 | * i_size_read because we hold i_mutex. |
3153 | */ | 3153 | */ |
3154 | if (pos + len > inode->i_size) | 3154 | if (pos + len > inode->i_size) |
3155 | ext4_truncate_failed_write(inode); | 3155 | ext4_truncate_failed_write(inode); |
3156 | } | 3156 | } |
3157 | 3157 | ||
3158 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3158 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3159 | goto retry; | 3159 | goto retry; |
3160 | out: | 3160 | out: |
3161 | return ret; | 3161 | return ret; |
3162 | } | 3162 | } |
3163 | 3163 | ||
3164 | /* | 3164 | /* |
3165 | * Check if we should update i_disksize | 3165 | * Check if we should update i_disksize |
3166 | * when write to the end of file but not require block allocation | 3166 | * when write to the end of file but not require block allocation |
3167 | */ | 3167 | */ |
3168 | static int ext4_da_should_update_i_disksize(struct page *page, | 3168 | static int ext4_da_should_update_i_disksize(struct page *page, |
3169 | unsigned long offset) | 3169 | unsigned long offset) |
3170 | { | 3170 | { |
3171 | struct buffer_head *bh; | 3171 | struct buffer_head *bh; |
3172 | struct inode *inode = page->mapping->host; | 3172 | struct inode *inode = page->mapping->host; |
3173 | unsigned int idx; | 3173 | unsigned int idx; |
3174 | int i; | 3174 | int i; |
3175 | 3175 | ||
3176 | bh = page_buffers(page); | 3176 | bh = page_buffers(page); |
3177 | idx = offset >> inode->i_blkbits; | 3177 | idx = offset >> inode->i_blkbits; |
3178 | 3178 | ||
3179 | for (i = 0; i < idx; i++) | 3179 | for (i = 0; i < idx; i++) |
3180 | bh = bh->b_this_page; | 3180 | bh = bh->b_this_page; |
3181 | 3181 | ||
3182 | if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) | 3182 | if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) |
3183 | return 0; | 3183 | return 0; |
3184 | return 1; | 3184 | return 1; |
3185 | } | 3185 | } |
3186 | 3186 | ||
3187 | static int ext4_da_write_end(struct file *file, | 3187 | static int ext4_da_write_end(struct file *file, |
3188 | struct address_space *mapping, | 3188 | struct address_space *mapping, |
3189 | loff_t pos, unsigned len, unsigned copied, | 3189 | loff_t pos, unsigned len, unsigned copied, |
3190 | struct page *page, void *fsdata) | 3190 | struct page *page, void *fsdata) |
3191 | { | 3191 | { |
3192 | struct inode *inode = mapping->host; | 3192 | struct inode *inode = mapping->host; |
3193 | int ret = 0, ret2; | 3193 | int ret = 0, ret2; |
3194 | handle_t *handle = ext4_journal_current_handle(); | 3194 | handle_t *handle = ext4_journal_current_handle(); |
3195 | loff_t new_i_size; | 3195 | loff_t new_i_size; |
3196 | unsigned long start, end; | 3196 | unsigned long start, end; |
3197 | int write_mode = (int)(unsigned long)fsdata; | 3197 | int write_mode = (int)(unsigned long)fsdata; |
3198 | 3198 | ||
3199 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | 3199 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { |
3200 | if (ext4_should_order_data(inode)) { | 3200 | if (ext4_should_order_data(inode)) { |
3201 | return ext4_ordered_write_end(file, mapping, pos, | 3201 | return ext4_ordered_write_end(file, mapping, pos, |
3202 | len, copied, page, fsdata); | 3202 | len, copied, page, fsdata); |
3203 | } else if (ext4_should_writeback_data(inode)) { | 3203 | } else if (ext4_should_writeback_data(inode)) { |
3204 | return ext4_writeback_write_end(file, mapping, pos, | 3204 | return ext4_writeback_write_end(file, mapping, pos, |
3205 | len, copied, page, fsdata); | 3205 | len, copied, page, fsdata); |
3206 | } else { | 3206 | } else { |
3207 | BUG(); | 3207 | BUG(); |
3208 | } | 3208 | } |
3209 | } | 3209 | } |
3210 | 3210 | ||
3211 | trace_ext4_da_write_end(inode, pos, len, copied); | 3211 | trace_ext4_da_write_end(inode, pos, len, copied); |
3212 | start = pos & (PAGE_CACHE_SIZE - 1); | 3212 | start = pos & (PAGE_CACHE_SIZE - 1); |
3213 | end = start + copied - 1; | 3213 | end = start + copied - 1; |
3214 | 3214 | ||
3215 | /* | 3215 | /* |
3216 | * generic_write_end() will run mark_inode_dirty() if i_size | 3216 | * generic_write_end() will run mark_inode_dirty() if i_size |
3217 | * changes. So let's piggyback the i_disksize mark_inode_dirty | 3217 | * changes. So let's piggyback the i_disksize mark_inode_dirty |
3218 | * into that. | 3218 | * into that. |
3219 | */ | 3219 | */ |
3220 | 3220 | ||
3221 | new_i_size = pos + copied; | 3221 | new_i_size = pos + copied; |
3222 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 3222 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
3223 | if (ext4_da_should_update_i_disksize(page, end)) { | 3223 | if (ext4_da_should_update_i_disksize(page, end)) { |
3224 | down_write(&EXT4_I(inode)->i_data_sem); | 3224 | down_write(&EXT4_I(inode)->i_data_sem); |
3225 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 3225 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
3226 | /* | 3226 | /* |
3227 | * Updating i_disksize when extending file | 3227 | * Updating i_disksize when extending file |
3228 | * without needing block allocation | 3228 | * without needing block allocation |
3229 | */ | 3229 | */ |
3230 | if (ext4_should_order_data(inode)) | 3230 | if (ext4_should_order_data(inode)) |
3231 | ret = ext4_jbd2_file_inode(handle, | 3231 | ret = ext4_jbd2_file_inode(handle, |
3232 | inode); | 3232 | inode); |
3233 | 3233 | ||
3234 | EXT4_I(inode)->i_disksize = new_i_size; | 3234 | EXT4_I(inode)->i_disksize = new_i_size; |
3235 | } | 3235 | } |
3236 | up_write(&EXT4_I(inode)->i_data_sem); | 3236 | up_write(&EXT4_I(inode)->i_data_sem); |
3237 | /* We need to mark inode dirty even if | 3237 | /* We need to mark inode dirty even if |
3238 | * new_i_size is less that inode->i_size | 3238 | * new_i_size is less that inode->i_size |
3239 | * bu greater than i_disksize.(hint delalloc) | 3239 | * bu greater than i_disksize.(hint delalloc) |
3240 | */ | 3240 | */ |
3241 | ext4_mark_inode_dirty(handle, inode); | 3241 | ext4_mark_inode_dirty(handle, inode); |
3242 | } | 3242 | } |
3243 | } | 3243 | } |
3244 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 3244 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
3245 | page, fsdata); | 3245 | page, fsdata); |
3246 | copied = ret2; | 3246 | copied = ret2; |
3247 | if (ret2 < 0) | 3247 | if (ret2 < 0) |
3248 | ret = ret2; | 3248 | ret = ret2; |
3249 | ret2 = ext4_journal_stop(handle); | 3249 | ret2 = ext4_journal_stop(handle); |
3250 | if (!ret) | 3250 | if (!ret) |
3251 | ret = ret2; | 3251 | ret = ret2; |
3252 | 3252 | ||
3253 | return ret ? ret : copied; | 3253 | return ret ? ret : copied; |
3254 | } | 3254 | } |
3255 | 3255 | ||
3256 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | 3256 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) |
3257 | { | 3257 | { |
3258 | /* | 3258 | /* |
3259 | * Drop reserved blocks | 3259 | * Drop reserved blocks |
3260 | */ | 3260 | */ |
3261 | BUG_ON(!PageLocked(page)); | 3261 | BUG_ON(!PageLocked(page)); |
3262 | if (!page_has_buffers(page)) | 3262 | if (!page_has_buffers(page)) |
3263 | goto out; | 3263 | goto out; |
3264 | 3264 | ||
3265 | ext4_da_page_release_reservation(page, offset); | 3265 | ext4_da_page_release_reservation(page, offset); |
3266 | 3266 | ||
3267 | out: | 3267 | out: |
3268 | ext4_invalidatepage(page, offset); | 3268 | ext4_invalidatepage(page, offset); |
3269 | 3269 | ||
3270 | return; | 3270 | return; |
3271 | } | 3271 | } |
3272 | 3272 | ||
3273 | /* | 3273 | /* |
3274 | * Force all delayed allocation blocks to be allocated for a given inode. | 3274 | * Force all delayed allocation blocks to be allocated for a given inode. |
3275 | */ | 3275 | */ |
3276 | int ext4_alloc_da_blocks(struct inode *inode) | 3276 | int ext4_alloc_da_blocks(struct inode *inode) |
3277 | { | 3277 | { |
3278 | trace_ext4_alloc_da_blocks(inode); | 3278 | trace_ext4_alloc_da_blocks(inode); |
3279 | 3279 | ||
3280 | if (!EXT4_I(inode)->i_reserved_data_blocks && | 3280 | if (!EXT4_I(inode)->i_reserved_data_blocks && |
3281 | !EXT4_I(inode)->i_reserved_meta_blocks) | 3281 | !EXT4_I(inode)->i_reserved_meta_blocks) |
3282 | return 0; | 3282 | return 0; |
3283 | 3283 | ||
3284 | /* | 3284 | /* |
3285 | * We do something simple for now. The filemap_flush() will | 3285 | * We do something simple for now. The filemap_flush() will |
3286 | * also start triggering a write of the data blocks, which is | 3286 | * also start triggering a write of the data blocks, which is |
3287 | * not strictly speaking necessary (and for users of | 3287 | * not strictly speaking necessary (and for users of |
3288 | * laptop_mode, not even desirable). However, to do otherwise | 3288 | * laptop_mode, not even desirable). However, to do otherwise |
3289 | * would require replicating code paths in: | 3289 | * would require replicating code paths in: |
3290 | * | 3290 | * |
3291 | * ext4_da_writepages() -> | 3291 | * ext4_da_writepages() -> |
3292 | * write_cache_pages() ---> (via passed in callback function) | 3292 | * write_cache_pages() ---> (via passed in callback function) |
3293 | * __mpage_da_writepage() --> | 3293 | * __mpage_da_writepage() --> |
3294 | * mpage_add_bh_to_extent() | 3294 | * mpage_add_bh_to_extent() |
3295 | * mpage_da_map_blocks() | 3295 | * mpage_da_map_blocks() |
3296 | * | 3296 | * |
3297 | * The problem is that write_cache_pages(), located in | 3297 | * The problem is that write_cache_pages(), located in |
3298 | * mm/page-writeback.c, marks pages clean in preparation for | 3298 | * mm/page-writeback.c, marks pages clean in preparation for |
3299 | * doing I/O, which is not desirable if we're not planning on | 3299 | * doing I/O, which is not desirable if we're not planning on |
3300 | * doing I/O at all. | 3300 | * doing I/O at all. |
3301 | * | 3301 | * |
3302 | * We could call write_cache_pages(), and then redirty all of | 3302 | * We could call write_cache_pages(), and then redirty all of |
3303 | * the pages by calling redirty_page_for_writepage() but that | 3303 | * the pages by calling redirty_page_for_writepage() but that |
3304 | * would be ugly in the extreme. So instead we would need to | 3304 | * would be ugly in the extreme. So instead we would need to |
3305 | * replicate parts of the code in the above functions, | 3305 | * replicate parts of the code in the above functions, |
3306 | * simplifying them because we wouldn't actually intend to | 3306 | * simplifying them because we wouldn't actually intend to |
3307 | * write out the pages, but rather only collect contiguous | 3307 | * write out the pages, but rather only collect contiguous |
3308 | * logical block extents, call the multi-block allocator, and | 3308 | * logical block extents, call the multi-block allocator, and |
3309 | * then update the buffer heads with the block allocations. | 3309 | * then update the buffer heads with the block allocations. |
3310 | * | 3310 | * |
3311 | * For now, though, we'll cheat by calling filemap_flush(), | 3311 | * For now, though, we'll cheat by calling filemap_flush(), |
3312 | * which will map the blocks, and start the I/O, but not | 3312 | * which will map the blocks, and start the I/O, but not |
3313 | * actually wait for the I/O to complete. | 3313 | * actually wait for the I/O to complete. |
3314 | */ | 3314 | */ |
3315 | return filemap_flush(inode->i_mapping); | 3315 | return filemap_flush(inode->i_mapping); |
3316 | } | 3316 | } |
3317 | 3317 | ||
3318 | /* | 3318 | /* |
3319 | * bmap() is special. It gets used by applications such as lilo and by | 3319 | * bmap() is special. It gets used by applications such as lilo and by |
3320 | * the swapper to find the on-disk block of a specific piece of data. | 3320 | * the swapper to find the on-disk block of a specific piece of data. |
3321 | * | 3321 | * |
3322 | * Naturally, this is dangerous if the block concerned is still in the | 3322 | * Naturally, this is dangerous if the block concerned is still in the |
3323 | * journal. If somebody makes a swapfile on an ext4 data-journaling | 3323 | * journal. If somebody makes a swapfile on an ext4 data-journaling |
3324 | * filesystem and enables swap, then they may get a nasty shock when the | 3324 | * filesystem and enables swap, then they may get a nasty shock when the |
3325 | * data getting swapped to that swapfile suddenly gets overwritten by | 3325 | * data getting swapped to that swapfile suddenly gets overwritten by |
3326 | * the original zero's written out previously to the journal and | 3326 | * the original zero's written out previously to the journal and |
3327 | * awaiting writeback in the kernel's buffer cache. | 3327 | * awaiting writeback in the kernel's buffer cache. |
3328 | * | 3328 | * |
3329 | * So, if we see any bmap calls here on a modified, data-journaled file, | 3329 | * So, if we see any bmap calls here on a modified, data-journaled file, |
3330 | * take extra steps to flush any blocks which might be in the cache. | 3330 | * take extra steps to flush any blocks which might be in the cache. |
3331 | */ | 3331 | */ |
3332 | static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | 3332 | static sector_t ext4_bmap(struct address_space *mapping, sector_t block) |
3333 | { | 3333 | { |
3334 | struct inode *inode = mapping->host; | 3334 | struct inode *inode = mapping->host; |
3335 | journal_t *journal; | 3335 | journal_t *journal; |
3336 | int err; | 3336 | int err; |
3337 | 3337 | ||
3338 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && | 3338 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && |
3339 | test_opt(inode->i_sb, DELALLOC)) { | 3339 | test_opt(inode->i_sb, DELALLOC)) { |
3340 | /* | 3340 | /* |
3341 | * With delalloc we want to sync the file | 3341 | * With delalloc we want to sync the file |
3342 | * so that we can make sure we allocate | 3342 | * so that we can make sure we allocate |
3343 | * blocks for file | 3343 | * blocks for file |
3344 | */ | 3344 | */ |
3345 | filemap_write_and_wait(mapping); | 3345 | filemap_write_and_wait(mapping); |
3346 | } | 3346 | } |
3347 | 3347 | ||
3348 | if (EXT4_JOURNAL(inode) && | 3348 | if (EXT4_JOURNAL(inode) && |
3349 | ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { | 3349 | ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { |
3350 | /* | 3350 | /* |
3351 | * This is a REALLY heavyweight approach, but the use of | 3351 | * This is a REALLY heavyweight approach, but the use of |
3352 | * bmap on dirty files is expected to be extremely rare: | 3352 | * bmap on dirty files is expected to be extremely rare: |
3353 | * only if we run lilo or swapon on a freshly made file | 3353 | * only if we run lilo or swapon on a freshly made file |
3354 | * do we expect this to happen. | 3354 | * do we expect this to happen. |
3355 | * | 3355 | * |
3356 | * (bmap requires CAP_SYS_RAWIO so this does not | 3356 | * (bmap requires CAP_SYS_RAWIO so this does not |
3357 | * represent an unprivileged user DOS attack --- we'd be | 3357 | * represent an unprivileged user DOS attack --- we'd be |
3358 | * in trouble if mortal users could trigger this path at | 3358 | * in trouble if mortal users could trigger this path at |
3359 | * will.) | 3359 | * will.) |
3360 | * | 3360 | * |
3361 | * NB. EXT4_STATE_JDATA is not set on files other than | 3361 | * NB. EXT4_STATE_JDATA is not set on files other than |
3362 | * regular files. If somebody wants to bmap a directory | 3362 | * regular files. If somebody wants to bmap a directory |
3363 | * or symlink and gets confused because the buffer | 3363 | * or symlink and gets confused because the buffer |
3364 | * hasn't yet been flushed to disk, they deserve | 3364 | * hasn't yet been flushed to disk, they deserve |
3365 | * everything they get. | 3365 | * everything they get. |
3366 | */ | 3366 | */ |
3367 | 3367 | ||
3368 | ext4_clear_inode_state(inode, EXT4_STATE_JDATA); | 3368 | ext4_clear_inode_state(inode, EXT4_STATE_JDATA); |
3369 | journal = EXT4_JOURNAL(inode); | 3369 | journal = EXT4_JOURNAL(inode); |
3370 | jbd2_journal_lock_updates(journal); | 3370 | jbd2_journal_lock_updates(journal); |
3371 | err = jbd2_journal_flush(journal); | 3371 | err = jbd2_journal_flush(journal); |
3372 | jbd2_journal_unlock_updates(journal); | 3372 | jbd2_journal_unlock_updates(journal); |
3373 | 3373 | ||
3374 | if (err) | 3374 | if (err) |
3375 | return 0; | 3375 | return 0; |
3376 | } | 3376 | } |
3377 | 3377 | ||
3378 | return generic_block_bmap(mapping, block, ext4_get_block); | 3378 | return generic_block_bmap(mapping, block, ext4_get_block); |
3379 | } | 3379 | } |
3380 | 3380 | ||
3381 | static int ext4_readpage(struct file *file, struct page *page) | 3381 | static int ext4_readpage(struct file *file, struct page *page) |
3382 | { | 3382 | { |
3383 | trace_ext4_readpage(page); | 3383 | trace_ext4_readpage(page); |
3384 | return mpage_readpage(page, ext4_get_block); | 3384 | return mpage_readpage(page, ext4_get_block); |
3385 | } | 3385 | } |
3386 | 3386 | ||
3387 | static int | 3387 | static int |
3388 | ext4_readpages(struct file *file, struct address_space *mapping, | 3388 | ext4_readpages(struct file *file, struct address_space *mapping, |
3389 | struct list_head *pages, unsigned nr_pages) | 3389 | struct list_head *pages, unsigned nr_pages) |
3390 | { | 3390 | { |
3391 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3391 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3392 | } | 3392 | } |
3393 | 3393 | ||
3394 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3394 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
3395 | { | 3395 | { |
3396 | struct buffer_head *head, *bh; | 3396 | struct buffer_head *head, *bh; |
3397 | unsigned int curr_off = 0; | 3397 | unsigned int curr_off = 0; |
3398 | 3398 | ||
3399 | if (!page_has_buffers(page)) | 3399 | if (!page_has_buffers(page)) |
3400 | return; | 3400 | return; |
3401 | head = bh = page_buffers(page); | 3401 | head = bh = page_buffers(page); |
3402 | do { | 3402 | do { |
3403 | if (offset <= curr_off && test_clear_buffer_uninit(bh) | 3403 | if (offset <= curr_off && test_clear_buffer_uninit(bh) |
3404 | && bh->b_private) { | 3404 | && bh->b_private) { |
3405 | ext4_free_io_end(bh->b_private); | 3405 | ext4_free_io_end(bh->b_private); |
3406 | bh->b_private = NULL; | 3406 | bh->b_private = NULL; |
3407 | bh->b_end_io = NULL; | 3407 | bh->b_end_io = NULL; |
3408 | } | 3408 | } |
3409 | curr_off = curr_off + bh->b_size; | 3409 | curr_off = curr_off + bh->b_size; |
3410 | bh = bh->b_this_page; | 3410 | bh = bh->b_this_page; |
3411 | } while (bh != head); | 3411 | } while (bh != head); |
3412 | } | 3412 | } |
3413 | 3413 | ||
3414 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 3414 | static void ext4_invalidatepage(struct page *page, unsigned long offset) |
3415 | { | 3415 | { |
3416 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3416 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3417 | 3417 | ||
3418 | trace_ext4_invalidatepage(page, offset); | 3418 | trace_ext4_invalidatepage(page, offset); |
3419 | 3419 | ||
3420 | /* | 3420 | /* |
3421 | * free any io_end structure allocated for buffers to be discarded | 3421 | * free any io_end structure allocated for buffers to be discarded |
3422 | */ | 3422 | */ |
3423 | if (ext4_should_dioread_nolock(page->mapping->host)) | 3423 | if (ext4_should_dioread_nolock(page->mapping->host)) |
3424 | ext4_invalidatepage_free_endio(page, offset); | 3424 | ext4_invalidatepage_free_endio(page, offset); |
3425 | /* | 3425 | /* |
3426 | * If it's a full truncate we just forget about the pending dirtying | 3426 | * If it's a full truncate we just forget about the pending dirtying |
3427 | */ | 3427 | */ |
3428 | if (offset == 0) | 3428 | if (offset == 0) |
3429 | ClearPageChecked(page); | 3429 | ClearPageChecked(page); |
3430 | 3430 | ||
3431 | if (journal) | 3431 | if (journal) |
3432 | jbd2_journal_invalidatepage(journal, page, offset); | 3432 | jbd2_journal_invalidatepage(journal, page, offset); |
3433 | else | 3433 | else |
3434 | block_invalidatepage(page, offset); | 3434 | block_invalidatepage(page, offset); |
3435 | } | 3435 | } |
3436 | 3436 | ||
3437 | static int ext4_releasepage(struct page *page, gfp_t wait) | 3437 | static int ext4_releasepage(struct page *page, gfp_t wait) |
3438 | { | 3438 | { |
3439 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3439 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3440 | 3440 | ||
3441 | trace_ext4_releasepage(page); | 3441 | trace_ext4_releasepage(page); |
3442 | 3442 | ||
3443 | WARN_ON(PageChecked(page)); | 3443 | WARN_ON(PageChecked(page)); |
3444 | if (!page_has_buffers(page)) | 3444 | if (!page_has_buffers(page)) |
3445 | return 0; | 3445 | return 0; |
3446 | if (journal) | 3446 | if (journal) |
3447 | return jbd2_journal_try_to_free_buffers(journal, page, wait); | 3447 | return jbd2_journal_try_to_free_buffers(journal, page, wait); |
3448 | else | 3448 | else |
3449 | return try_to_free_buffers(page); | 3449 | return try_to_free_buffers(page); |
3450 | } | 3450 | } |
3451 | 3451 | ||
3452 | /* | 3452 | /* |
3453 | * O_DIRECT for ext3 (or indirect map) based files | 3453 | * O_DIRECT for ext3 (or indirect map) based files |
3454 | * | 3454 | * |
3455 | * If the O_DIRECT write will extend the file then add this inode to the | 3455 | * If the O_DIRECT write will extend the file then add this inode to the |
3456 | * orphan list. So recovery will truncate it back to the original size | 3456 | * orphan list. So recovery will truncate it back to the original size |
3457 | * if the machine crashes during the write. | 3457 | * if the machine crashes during the write. |
3458 | * | 3458 | * |
3459 | * If the O_DIRECT write is intantiating holes inside i_size and the machine | 3459 | * If the O_DIRECT write is intantiating holes inside i_size and the machine |
3460 | * crashes then stale disk data _may_ be exposed inside the file. But current | 3460 | * crashes then stale disk data _may_ be exposed inside the file. But current |
3461 | * VFS code falls back into buffered path in that case so we are safe. | 3461 | * VFS code falls back into buffered path in that case so we are safe. |
3462 | */ | 3462 | */ |
3463 | static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | 3463 | static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, |
3464 | const struct iovec *iov, loff_t offset, | 3464 | const struct iovec *iov, loff_t offset, |
3465 | unsigned long nr_segs) | 3465 | unsigned long nr_segs) |
3466 | { | 3466 | { |
3467 | struct file *file = iocb->ki_filp; | 3467 | struct file *file = iocb->ki_filp; |
3468 | struct inode *inode = file->f_mapping->host; | 3468 | struct inode *inode = file->f_mapping->host; |
3469 | struct ext4_inode_info *ei = EXT4_I(inode); | 3469 | struct ext4_inode_info *ei = EXT4_I(inode); |
3470 | handle_t *handle; | 3470 | handle_t *handle; |
3471 | ssize_t ret; | 3471 | ssize_t ret; |
3472 | int orphan = 0; | 3472 | int orphan = 0; |
3473 | size_t count = iov_length(iov, nr_segs); | 3473 | size_t count = iov_length(iov, nr_segs); |
3474 | int retries = 0; | 3474 | int retries = 0; |
3475 | 3475 | ||
3476 | if (rw == WRITE) { | 3476 | if (rw == WRITE) { |
3477 | loff_t final_size = offset + count; | 3477 | loff_t final_size = offset + count; |
3478 | 3478 | ||
3479 | if (final_size > inode->i_size) { | 3479 | if (final_size > inode->i_size) { |
3480 | /* Credits for sb + inode write */ | 3480 | /* Credits for sb + inode write */ |
3481 | handle = ext4_journal_start(inode, 2); | 3481 | handle = ext4_journal_start(inode, 2); |
3482 | if (IS_ERR(handle)) { | 3482 | if (IS_ERR(handle)) { |
3483 | ret = PTR_ERR(handle); | 3483 | ret = PTR_ERR(handle); |
3484 | goto out; | 3484 | goto out; |
3485 | } | 3485 | } |
3486 | ret = ext4_orphan_add(handle, inode); | 3486 | ret = ext4_orphan_add(handle, inode); |
3487 | if (ret) { | 3487 | if (ret) { |
3488 | ext4_journal_stop(handle); | 3488 | ext4_journal_stop(handle); |
3489 | goto out; | 3489 | goto out; |
3490 | } | 3490 | } |
3491 | orphan = 1; | 3491 | orphan = 1; |
3492 | ei->i_disksize = inode->i_size; | 3492 | ei->i_disksize = inode->i_size; |
3493 | ext4_journal_stop(handle); | 3493 | ext4_journal_stop(handle); |
3494 | } | 3494 | } |
3495 | } | 3495 | } |
3496 | 3496 | ||
3497 | retry: | 3497 | retry: |
3498 | if (rw == READ && ext4_should_dioread_nolock(inode)) | 3498 | if (rw == READ && ext4_should_dioread_nolock(inode)) |
3499 | ret = __blockdev_direct_IO(rw, iocb, inode, | 3499 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3500 | inode->i_sb->s_bdev, iov, | 3500 | inode->i_sb->s_bdev, iov, |
3501 | offset, nr_segs, | 3501 | offset, nr_segs, |
3502 | ext4_get_block, NULL, NULL, 0); | 3502 | ext4_get_block, NULL, NULL, 0); |
3503 | else { | 3503 | else { |
3504 | ret = blockdev_direct_IO(rw, iocb, inode, | 3504 | ret = blockdev_direct_IO(rw, iocb, inode, |
3505 | inode->i_sb->s_bdev, iov, | 3505 | inode->i_sb->s_bdev, iov, |
3506 | offset, nr_segs, | 3506 | offset, nr_segs, |
3507 | ext4_get_block, NULL); | 3507 | ext4_get_block, NULL); |
3508 | 3508 | ||
3509 | if (unlikely((rw & WRITE) && ret < 0)) { | 3509 | if (unlikely((rw & WRITE) && ret < 0)) { |
3510 | loff_t isize = i_size_read(inode); | 3510 | loff_t isize = i_size_read(inode); |
3511 | loff_t end = offset + iov_length(iov, nr_segs); | 3511 | loff_t end = offset + iov_length(iov, nr_segs); |
3512 | 3512 | ||
3513 | if (end > isize) | 3513 | if (end > isize) |
3514 | ext4_truncate_failed_write(inode); | 3514 | ext4_truncate_failed_write(inode); |
3515 | } | 3515 | } |
3516 | } | 3516 | } |
3517 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3517 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3518 | goto retry; | 3518 | goto retry; |
3519 | 3519 | ||
3520 | if (orphan) { | 3520 | if (orphan) { |
3521 | int err; | 3521 | int err; |
3522 | 3522 | ||
3523 | /* Credits for sb + inode write */ | 3523 | /* Credits for sb + inode write */ |
3524 | handle = ext4_journal_start(inode, 2); | 3524 | handle = ext4_journal_start(inode, 2); |
3525 | if (IS_ERR(handle)) { | 3525 | if (IS_ERR(handle)) { |
3526 | /* This is really bad luck. We've written the data | 3526 | /* This is really bad luck. We've written the data |
3527 | * but cannot extend i_size. Bail out and pretend | 3527 | * but cannot extend i_size. Bail out and pretend |
3528 | * the write failed... */ | 3528 | * the write failed... */ |
3529 | ret = PTR_ERR(handle); | 3529 | ret = PTR_ERR(handle); |
3530 | if (inode->i_nlink) | 3530 | if (inode->i_nlink) |
3531 | ext4_orphan_del(NULL, inode); | 3531 | ext4_orphan_del(NULL, inode); |
3532 | 3532 | ||
3533 | goto out; | 3533 | goto out; |
3534 | } | 3534 | } |
3535 | if (inode->i_nlink) | 3535 | if (inode->i_nlink) |
3536 | ext4_orphan_del(handle, inode); | 3536 | ext4_orphan_del(handle, inode); |
3537 | if (ret > 0) { | 3537 | if (ret > 0) { |
3538 | loff_t end = offset + ret; | 3538 | loff_t end = offset + ret; |
3539 | if (end > inode->i_size) { | 3539 | if (end > inode->i_size) { |
3540 | ei->i_disksize = end; | 3540 | ei->i_disksize = end; |
3541 | i_size_write(inode, end); | 3541 | i_size_write(inode, end); |
3542 | /* | 3542 | /* |
3543 | * We're going to return a positive `ret' | 3543 | * We're going to return a positive `ret' |
3544 | * here due to non-zero-length I/O, so there's | 3544 | * here due to non-zero-length I/O, so there's |
3545 | * no way of reporting error returns from | 3545 | * no way of reporting error returns from |
3546 | * ext4_mark_inode_dirty() to userspace. So | 3546 | * ext4_mark_inode_dirty() to userspace. So |
3547 | * ignore it. | 3547 | * ignore it. |
3548 | */ | 3548 | */ |
3549 | ext4_mark_inode_dirty(handle, inode); | 3549 | ext4_mark_inode_dirty(handle, inode); |
3550 | } | 3550 | } |
3551 | } | 3551 | } |
3552 | err = ext4_journal_stop(handle); | 3552 | err = ext4_journal_stop(handle); |
3553 | if (ret == 0) | 3553 | if (ret == 0) |
3554 | ret = err; | 3554 | ret = err; |
3555 | } | 3555 | } |
3556 | out: | 3556 | out: |
3557 | return ret; | 3557 | return ret; |
3558 | } | 3558 | } |
3559 | 3559 | ||
3560 | /* | 3560 | /* |
3561 | * ext4_get_block used when preparing for a DIO write or buffer write. | 3561 | * ext4_get_block used when preparing for a DIO write or buffer write. |
3562 | * We allocate an uinitialized extent if blocks haven't been allocated. | 3562 | * We allocate an uinitialized extent if blocks haven't been allocated. |
3563 | * The extent will be converted to initialized after the IO is complete. | 3563 | * The extent will be converted to initialized after the IO is complete. |
3564 | */ | 3564 | */ |
3565 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 3565 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
3566 | struct buffer_head *bh_result, int create) | 3566 | struct buffer_head *bh_result, int create) |
3567 | { | 3567 | { |
3568 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", | 3568 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
3569 | inode->i_ino, create); | 3569 | inode->i_ino, create); |
3570 | return _ext4_get_block(inode, iblock, bh_result, | 3570 | return _ext4_get_block(inode, iblock, bh_result, |
3571 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3571 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3572 | } | 3572 | } |
3573 | 3573 | ||
3574 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3574 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3575 | ssize_t size, void *private, int ret, | 3575 | ssize_t size, void *private, int ret, |
3576 | bool is_async) | 3576 | bool is_async) |
3577 | { | 3577 | { |
3578 | ext4_io_end_t *io_end = iocb->private; | 3578 | ext4_io_end_t *io_end = iocb->private; |
3579 | struct workqueue_struct *wq; | 3579 | struct workqueue_struct *wq; |
3580 | unsigned long flags; | 3580 | unsigned long flags; |
3581 | struct ext4_inode_info *ei; | 3581 | struct ext4_inode_info *ei; |
3582 | 3582 | ||
3583 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3583 | /* if not async direct IO or dio with 0 bytes write, just return */ |
3584 | if (!io_end || !size) | 3584 | if (!io_end || !size) |
3585 | goto out; | 3585 | goto out; |
3586 | 3586 | ||
3587 | ext_debug("ext4_end_io_dio(): io_end 0x%p" | 3587 | ext_debug("ext4_end_io_dio(): io_end 0x%p" |
3588 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", | 3588 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", |
3589 | iocb->private, io_end->inode->i_ino, iocb, offset, | 3589 | iocb->private, io_end->inode->i_ino, iocb, offset, |
3590 | size); | 3590 | size); |
3591 | 3591 | ||
3592 | /* if not aio dio with unwritten extents, just free io and return */ | 3592 | /* if not aio dio with unwritten extents, just free io and return */ |
3593 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 3593 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
3594 | ext4_free_io_end(io_end); | 3594 | ext4_free_io_end(io_end); |
3595 | iocb->private = NULL; | 3595 | iocb->private = NULL; |
3596 | out: | 3596 | out: |
3597 | if (is_async) | 3597 | if (is_async) |
3598 | aio_complete(iocb, ret, 0); | 3598 | aio_complete(iocb, ret, 0); |
3599 | return; | 3599 | return; |
3600 | } | 3600 | } |
3601 | 3601 | ||
3602 | io_end->offset = offset; | 3602 | io_end->offset = offset; |
3603 | io_end->size = size; | 3603 | io_end->size = size; |
3604 | if (is_async) { | 3604 | if (is_async) { |
3605 | io_end->iocb = iocb; | 3605 | io_end->iocb = iocb; |
3606 | io_end->result = ret; | 3606 | io_end->result = ret; |
3607 | } | 3607 | } |
3608 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3608 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3609 | 3609 | ||
3610 | /* Add the io_end to per-inode completed aio dio list*/ | 3610 | /* Add the io_end to per-inode completed aio dio list*/ |
3611 | ei = EXT4_I(io_end->inode); | 3611 | ei = EXT4_I(io_end->inode); |
3612 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 3612 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3613 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3613 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
3614 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3614 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3615 | 3615 | ||
3616 | /* queue the work to convert unwritten extents to written */ | 3616 | /* queue the work to convert unwritten extents to written */ |
3617 | queue_work(wq, &io_end->work); | 3617 | queue_work(wq, &io_end->work); |
3618 | iocb->private = NULL; | 3618 | iocb->private = NULL; |
3619 | } | 3619 | } |
3620 | 3620 | ||
3621 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | 3621 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) |
3622 | { | 3622 | { |
3623 | ext4_io_end_t *io_end = bh->b_private; | 3623 | ext4_io_end_t *io_end = bh->b_private; |
3624 | struct workqueue_struct *wq; | 3624 | struct workqueue_struct *wq; |
3625 | struct inode *inode; | 3625 | struct inode *inode; |
3626 | unsigned long flags; | 3626 | unsigned long flags; |
3627 | 3627 | ||
3628 | if (!test_clear_buffer_uninit(bh) || !io_end) | 3628 | if (!test_clear_buffer_uninit(bh) || !io_end) |
3629 | goto out; | 3629 | goto out; |
3630 | 3630 | ||
3631 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { | 3631 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { |
3632 | printk("sb umounted, discard end_io request for inode %lu\n", | 3632 | printk("sb umounted, discard end_io request for inode %lu\n", |
3633 | io_end->inode->i_ino); | 3633 | io_end->inode->i_ino); |
3634 | ext4_free_io_end(io_end); | 3634 | ext4_free_io_end(io_end); |
3635 | goto out; | 3635 | goto out; |
3636 | } | 3636 | } |
3637 | 3637 | ||
3638 | io_end->flag = EXT4_IO_END_UNWRITTEN; | 3638 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
3639 | inode = io_end->inode; | 3639 | inode = io_end->inode; |
3640 | 3640 | ||
3641 | /* Add the io_end to per-inode completed io list*/ | 3641 | /* Add the io_end to per-inode completed io list*/ |
3642 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 3642 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
3643 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | 3643 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); |
3644 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | 3644 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); |
3645 | 3645 | ||
3646 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | 3646 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; |
3647 | /* queue the work to convert unwritten extents to written */ | 3647 | /* queue the work to convert unwritten extents to written */ |
3648 | queue_work(wq, &io_end->work); | 3648 | queue_work(wq, &io_end->work); |
3649 | out: | 3649 | out: |
3650 | bh->b_private = NULL; | 3650 | bh->b_private = NULL; |
3651 | bh->b_end_io = NULL; | 3651 | bh->b_end_io = NULL; |
3652 | clear_buffer_uninit(bh); | 3652 | clear_buffer_uninit(bh); |
3653 | end_buffer_async_write(bh, uptodate); | 3653 | end_buffer_async_write(bh, uptodate); |
3654 | } | 3654 | } |
3655 | 3655 | ||
3656 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) | 3656 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) |
3657 | { | 3657 | { |
3658 | ext4_io_end_t *io_end; | 3658 | ext4_io_end_t *io_end; |
3659 | struct page *page = bh->b_page; | 3659 | struct page *page = bh->b_page; |
3660 | loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT; | 3660 | loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT; |
3661 | size_t size = bh->b_size; | 3661 | size_t size = bh->b_size; |
3662 | 3662 | ||
3663 | retry: | 3663 | retry: |
3664 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); | 3664 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); |
3665 | if (!io_end) { | 3665 | if (!io_end) { |
3666 | pr_warn_ratelimited("%s: allocation fail\n", __func__); | 3666 | pr_warn_ratelimited("%s: allocation fail\n", __func__); |
3667 | schedule(); | 3667 | schedule(); |
3668 | goto retry; | 3668 | goto retry; |
3669 | } | 3669 | } |
3670 | io_end->offset = offset; | 3670 | io_end->offset = offset; |
3671 | io_end->size = size; | 3671 | io_end->size = size; |
3672 | /* | 3672 | /* |
3673 | * We need to hold a reference to the page to make sure it | 3673 | * We need to hold a reference to the page to make sure it |
3674 | * doesn't get evicted before ext4_end_io_work() has a chance | 3674 | * doesn't get evicted before ext4_end_io_work() has a chance |
3675 | * to convert the extent from written to unwritten. | 3675 | * to convert the extent from written to unwritten. |
3676 | */ | 3676 | */ |
3677 | io_end->page = page; | 3677 | io_end->page = page; |
3678 | get_page(io_end->page); | 3678 | get_page(io_end->page); |
3679 | 3679 | ||
3680 | bh->b_private = io_end; | 3680 | bh->b_private = io_end; |
3681 | bh->b_end_io = ext4_end_io_buffer_write; | 3681 | bh->b_end_io = ext4_end_io_buffer_write; |
3682 | return 0; | 3682 | return 0; |
3683 | } | 3683 | } |
3684 | 3684 | ||
3685 | /* | 3685 | /* |
3686 | * For ext4 extent files, ext4 will do direct-io write to holes, | 3686 | * For ext4 extent files, ext4 will do direct-io write to holes, |
3687 | * preallocated extents, and those write extend the file, no need to | 3687 | * preallocated extents, and those write extend the file, no need to |
3688 | * fall back to buffered IO. | 3688 | * fall back to buffered IO. |
3689 | * | 3689 | * |
3690 | * For holes, we fallocate those blocks, mark them as uninitialized | 3690 | * For holes, we fallocate those blocks, mark them as uninitialized |
3691 | * If those blocks were preallocated, we mark sure they are splited, but | 3691 | * If those blocks were preallocated, we mark sure they are splited, but |
3692 | * still keep the range to write as uninitialized. | 3692 | * still keep the range to write as uninitialized. |
3693 | * | 3693 | * |
3694 | * The unwrritten extents will be converted to written when DIO is completed. | 3694 | * The unwrritten extents will be converted to written when DIO is completed. |
3695 | * For async direct IO, since the IO may still pending when return, we | 3695 | * For async direct IO, since the IO may still pending when return, we |
3696 | * set up an end_io call back function, which will do the conversion | 3696 | * set up an end_io call back function, which will do the conversion |
3697 | * when async direct IO completed. | 3697 | * when async direct IO completed. |
3698 | * | 3698 | * |
3699 | * If the O_DIRECT write will extend the file then add this inode to the | 3699 | * If the O_DIRECT write will extend the file then add this inode to the |
3700 | * orphan list. So recovery will truncate it back to the original size | 3700 | * orphan list. So recovery will truncate it back to the original size |
3701 | * if the machine crashes during the write. | 3701 | * if the machine crashes during the write. |
3702 | * | 3702 | * |
3703 | */ | 3703 | */ |
3704 | static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | 3704 | static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, |
3705 | const struct iovec *iov, loff_t offset, | 3705 | const struct iovec *iov, loff_t offset, |
3706 | unsigned long nr_segs) | 3706 | unsigned long nr_segs) |
3707 | { | 3707 | { |
3708 | struct file *file = iocb->ki_filp; | 3708 | struct file *file = iocb->ki_filp; |
3709 | struct inode *inode = file->f_mapping->host; | 3709 | struct inode *inode = file->f_mapping->host; |
3710 | ssize_t ret; | 3710 | ssize_t ret; |
3711 | size_t count = iov_length(iov, nr_segs); | 3711 | size_t count = iov_length(iov, nr_segs); |
3712 | 3712 | ||
3713 | loff_t final_size = offset + count; | 3713 | loff_t final_size = offset + count; |
3714 | if (rw == WRITE && final_size <= inode->i_size) { | 3714 | if (rw == WRITE && final_size <= inode->i_size) { |
3715 | /* | 3715 | /* |
3716 | * We could direct write to holes and fallocate. | 3716 | * We could direct write to holes and fallocate. |
3717 | * | 3717 | * |
3718 | * Allocated blocks to fill the hole are marked as uninitialized | 3718 | * Allocated blocks to fill the hole are marked as uninitialized |
3719 | * to prevent parallel buffered read to expose the stale data | 3719 | * to prevent parallel buffered read to expose the stale data |
3720 | * before DIO complete the data IO. | 3720 | * before DIO complete the data IO. |
3721 | * | 3721 | * |
3722 | * As to previously fallocated extents, ext4 get_block | 3722 | * As to previously fallocated extents, ext4 get_block |
3723 | * will just simply mark the buffer mapped but still | 3723 | * will just simply mark the buffer mapped but still |
3724 | * keep the extents uninitialized. | 3724 | * keep the extents uninitialized. |
3725 | * | 3725 | * |
3726 | * for non AIO case, we will convert those unwritten extents | 3726 | * for non AIO case, we will convert those unwritten extents |
3727 | * to written after return back from blockdev_direct_IO. | 3727 | * to written after return back from blockdev_direct_IO. |
3728 | * | 3728 | * |
3729 | * for async DIO, the conversion needs to be defered when | 3729 | * for async DIO, the conversion needs to be defered when |
3730 | * the IO is completed. The ext4 end_io callback function | 3730 | * the IO is completed. The ext4 end_io callback function |
3731 | * will be called to take care of the conversion work. | 3731 | * will be called to take care of the conversion work. |
3732 | * Here for async case, we allocate an io_end structure to | 3732 | * Here for async case, we allocate an io_end structure to |
3733 | * hook to the iocb. | 3733 | * hook to the iocb. |
3734 | */ | 3734 | */ |
3735 | iocb->private = NULL; | 3735 | iocb->private = NULL; |
3736 | EXT4_I(inode)->cur_aio_dio = NULL; | 3736 | EXT4_I(inode)->cur_aio_dio = NULL; |
3737 | if (!is_sync_kiocb(iocb)) { | 3737 | if (!is_sync_kiocb(iocb)) { |
3738 | iocb->private = ext4_init_io_end(inode, GFP_NOFS); | 3738 | iocb->private = ext4_init_io_end(inode, GFP_NOFS); |
3739 | if (!iocb->private) | 3739 | if (!iocb->private) |
3740 | return -ENOMEM; | 3740 | return -ENOMEM; |
3741 | /* | 3741 | /* |
3742 | * we save the io structure for current async | 3742 | * we save the io structure for current async |
3743 | * direct IO, so that later ext4_map_blocks() | 3743 | * direct IO, so that later ext4_map_blocks() |
3744 | * could flag the io structure whether there | 3744 | * could flag the io structure whether there |
3745 | * is a unwritten extents needs to be converted | 3745 | * is a unwritten extents needs to be converted |
3746 | * when IO is completed. | 3746 | * when IO is completed. |
3747 | */ | 3747 | */ |
3748 | EXT4_I(inode)->cur_aio_dio = iocb->private; | 3748 | EXT4_I(inode)->cur_aio_dio = iocb->private; |
3749 | } | 3749 | } |
3750 | 3750 | ||
3751 | ret = blockdev_direct_IO(rw, iocb, inode, | 3751 | ret = blockdev_direct_IO(rw, iocb, inode, |
3752 | inode->i_sb->s_bdev, iov, | 3752 | inode->i_sb->s_bdev, iov, |
3753 | offset, nr_segs, | 3753 | offset, nr_segs, |
3754 | ext4_get_block_write, | 3754 | ext4_get_block_write, |
3755 | ext4_end_io_dio); | 3755 | ext4_end_io_dio); |
3756 | if (iocb->private) | 3756 | if (iocb->private) |
3757 | EXT4_I(inode)->cur_aio_dio = NULL; | 3757 | EXT4_I(inode)->cur_aio_dio = NULL; |
3758 | /* | 3758 | /* |
3759 | * The io_end structure takes a reference to the inode, | 3759 | * The io_end structure takes a reference to the inode, |
3760 | * that structure needs to be destroyed and the | 3760 | * that structure needs to be destroyed and the |
3761 | * reference to the inode need to be dropped, when IO is | 3761 | * reference to the inode need to be dropped, when IO is |
3762 | * complete, even with 0 byte write, or failed. | 3762 | * complete, even with 0 byte write, or failed. |
3763 | * | 3763 | * |
3764 | * In the successful AIO DIO case, the io_end structure will be | 3764 | * In the successful AIO DIO case, the io_end structure will be |
3765 | * desctroyed and the reference to the inode will be dropped | 3765 | * desctroyed and the reference to the inode will be dropped |
3766 | * after the end_io call back function is called. | 3766 | * after the end_io call back function is called. |
3767 | * | 3767 | * |
3768 | * In the case there is 0 byte write, or error case, since | 3768 | * In the case there is 0 byte write, or error case, since |
3769 | * VFS direct IO won't invoke the end_io call back function, | 3769 | * VFS direct IO won't invoke the end_io call back function, |
3770 | * we need to free the end_io structure here. | 3770 | * we need to free the end_io structure here. |
3771 | */ | 3771 | */ |
3772 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3772 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { |
3773 | ext4_free_io_end(iocb->private); | 3773 | ext4_free_io_end(iocb->private); |
3774 | iocb->private = NULL; | 3774 | iocb->private = NULL; |
3775 | } else if (ret > 0 && ext4_test_inode_state(inode, | 3775 | } else if (ret > 0 && ext4_test_inode_state(inode, |
3776 | EXT4_STATE_DIO_UNWRITTEN)) { | 3776 | EXT4_STATE_DIO_UNWRITTEN)) { |
3777 | int err; | 3777 | int err; |
3778 | /* | 3778 | /* |
3779 | * for non AIO case, since the IO is already | 3779 | * for non AIO case, since the IO is already |
3780 | * completed, we could do the conversion right here | 3780 | * completed, we could do the conversion right here |
3781 | */ | 3781 | */ |
3782 | err = ext4_convert_unwritten_extents(inode, | 3782 | err = ext4_convert_unwritten_extents(inode, |
3783 | offset, ret); | 3783 | offset, ret); |
3784 | if (err < 0) | 3784 | if (err < 0) |
3785 | ret = err; | 3785 | ret = err; |
3786 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3786 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3787 | } | 3787 | } |
3788 | return ret; | 3788 | return ret; |
3789 | } | 3789 | } |
3790 | 3790 | ||
3791 | /* for write the the end of file case, we fall back to old way */ | 3791 | /* for write the the end of file case, we fall back to old way */ |
3792 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3792 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
3793 | } | 3793 | } |
3794 | 3794 | ||
3795 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | 3795 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, |
3796 | const struct iovec *iov, loff_t offset, | 3796 | const struct iovec *iov, loff_t offset, |
3797 | unsigned long nr_segs) | 3797 | unsigned long nr_segs) |
3798 | { | 3798 | { |
3799 | struct file *file = iocb->ki_filp; | 3799 | struct file *file = iocb->ki_filp; |
3800 | struct inode *inode = file->f_mapping->host; | 3800 | struct inode *inode = file->f_mapping->host; |
3801 | ssize_t ret; | 3801 | ssize_t ret; |
3802 | 3802 | ||
3803 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | 3803 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); |
3804 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3804 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3805 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3805 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
3806 | else | 3806 | else |
3807 | ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3807 | ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
3808 | trace_ext4_direct_IO_exit(inode, offset, | 3808 | trace_ext4_direct_IO_exit(inode, offset, |
3809 | iov_length(iov, nr_segs), rw, ret); | 3809 | iov_length(iov, nr_segs), rw, ret); |
3810 | return ret; | 3810 | return ret; |
3811 | } | 3811 | } |
3812 | 3812 | ||
3813 | /* | 3813 | /* |
3814 | * Pages can be marked dirty completely asynchronously from ext4's journalling | 3814 | * Pages can be marked dirty completely asynchronously from ext4's journalling |
3815 | * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do | 3815 | * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do |
3816 | * much here because ->set_page_dirty is called under VFS locks. The page is | 3816 | * much here because ->set_page_dirty is called under VFS locks. The page is |
3817 | * not necessarily locked. | 3817 | * not necessarily locked. |
3818 | * | 3818 | * |
3819 | * We cannot just dirty the page and leave attached buffers clean, because the | 3819 | * We cannot just dirty the page and leave attached buffers clean, because the |
3820 | * buffers' dirty state is "definitive". We cannot just set the buffers dirty | 3820 | * buffers' dirty state is "definitive". We cannot just set the buffers dirty |
3821 | * or jbddirty because all the journalling code will explode. | 3821 | * or jbddirty because all the journalling code will explode. |
3822 | * | 3822 | * |
3823 | * So what we do is to mark the page "pending dirty" and next time writepage | 3823 | * So what we do is to mark the page "pending dirty" and next time writepage |
3824 | * is called, propagate that into the buffers appropriately. | 3824 | * is called, propagate that into the buffers appropriately. |
3825 | */ | 3825 | */ |
3826 | static int ext4_journalled_set_page_dirty(struct page *page) | 3826 | static int ext4_journalled_set_page_dirty(struct page *page) |
3827 | { | 3827 | { |
3828 | SetPageChecked(page); | 3828 | SetPageChecked(page); |
3829 | return __set_page_dirty_nobuffers(page); | 3829 | return __set_page_dirty_nobuffers(page); |
3830 | } | 3830 | } |
3831 | 3831 | ||
3832 | static const struct address_space_operations ext4_ordered_aops = { | 3832 | static const struct address_space_operations ext4_ordered_aops = { |
3833 | .readpage = ext4_readpage, | 3833 | .readpage = ext4_readpage, |
3834 | .readpages = ext4_readpages, | 3834 | .readpages = ext4_readpages, |
3835 | .writepage = ext4_writepage, | 3835 | .writepage = ext4_writepage, |
3836 | .write_begin = ext4_write_begin, | 3836 | .write_begin = ext4_write_begin, |
3837 | .write_end = ext4_ordered_write_end, | 3837 | .write_end = ext4_ordered_write_end, |
3838 | .bmap = ext4_bmap, | 3838 | .bmap = ext4_bmap, |
3839 | .invalidatepage = ext4_invalidatepage, | 3839 | .invalidatepage = ext4_invalidatepage, |
3840 | .releasepage = ext4_releasepage, | 3840 | .releasepage = ext4_releasepage, |
3841 | .direct_IO = ext4_direct_IO, | 3841 | .direct_IO = ext4_direct_IO, |
3842 | .migratepage = buffer_migrate_page, | 3842 | .migratepage = buffer_migrate_page, |
3843 | .is_partially_uptodate = block_is_partially_uptodate, | 3843 | .is_partially_uptodate = block_is_partially_uptodate, |
3844 | .error_remove_page = generic_error_remove_page, | 3844 | .error_remove_page = generic_error_remove_page, |
3845 | }; | 3845 | }; |
3846 | 3846 | ||
3847 | static const struct address_space_operations ext4_writeback_aops = { | 3847 | static const struct address_space_operations ext4_writeback_aops = { |
3848 | .readpage = ext4_readpage, | 3848 | .readpage = ext4_readpage, |
3849 | .readpages = ext4_readpages, | 3849 | .readpages = ext4_readpages, |
3850 | .writepage = ext4_writepage, | 3850 | .writepage = ext4_writepage, |
3851 | .write_begin = ext4_write_begin, | 3851 | .write_begin = ext4_write_begin, |
3852 | .write_end = ext4_writeback_write_end, | 3852 | .write_end = ext4_writeback_write_end, |
3853 | .bmap = ext4_bmap, | 3853 | .bmap = ext4_bmap, |
3854 | .invalidatepage = ext4_invalidatepage, | 3854 | .invalidatepage = ext4_invalidatepage, |
3855 | .releasepage = ext4_releasepage, | 3855 | .releasepage = ext4_releasepage, |
3856 | .direct_IO = ext4_direct_IO, | 3856 | .direct_IO = ext4_direct_IO, |
3857 | .migratepage = buffer_migrate_page, | 3857 | .migratepage = buffer_migrate_page, |
3858 | .is_partially_uptodate = block_is_partially_uptodate, | 3858 | .is_partially_uptodate = block_is_partially_uptodate, |
3859 | .error_remove_page = generic_error_remove_page, | 3859 | .error_remove_page = generic_error_remove_page, |
3860 | }; | 3860 | }; |
3861 | 3861 | ||
3862 | static const struct address_space_operations ext4_journalled_aops = { | 3862 | static const struct address_space_operations ext4_journalled_aops = { |
3863 | .readpage = ext4_readpage, | 3863 | .readpage = ext4_readpage, |
3864 | .readpages = ext4_readpages, | 3864 | .readpages = ext4_readpages, |
3865 | .writepage = ext4_writepage, | 3865 | .writepage = ext4_writepage, |
3866 | .write_begin = ext4_write_begin, | 3866 | .write_begin = ext4_write_begin, |
3867 | .write_end = ext4_journalled_write_end, | 3867 | .write_end = ext4_journalled_write_end, |
3868 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3868 | .set_page_dirty = ext4_journalled_set_page_dirty, |
3869 | .bmap = ext4_bmap, | 3869 | .bmap = ext4_bmap, |
3870 | .invalidatepage = ext4_invalidatepage, | 3870 | .invalidatepage = ext4_invalidatepage, |
3871 | .releasepage = ext4_releasepage, | 3871 | .releasepage = ext4_releasepage, |
3872 | .is_partially_uptodate = block_is_partially_uptodate, | 3872 | .is_partially_uptodate = block_is_partially_uptodate, |
3873 | .error_remove_page = generic_error_remove_page, | 3873 | .error_remove_page = generic_error_remove_page, |
3874 | }; | 3874 | }; |
3875 | 3875 | ||
3876 | static const struct address_space_operations ext4_da_aops = { | 3876 | static const struct address_space_operations ext4_da_aops = { |
3877 | .readpage = ext4_readpage, | 3877 | .readpage = ext4_readpage, |
3878 | .readpages = ext4_readpages, | 3878 | .readpages = ext4_readpages, |
3879 | .writepage = ext4_writepage, | 3879 | .writepage = ext4_writepage, |
3880 | .writepages = ext4_da_writepages, | 3880 | .writepages = ext4_da_writepages, |
3881 | .write_begin = ext4_da_write_begin, | 3881 | .write_begin = ext4_da_write_begin, |
3882 | .write_end = ext4_da_write_end, | 3882 | .write_end = ext4_da_write_end, |
3883 | .bmap = ext4_bmap, | 3883 | .bmap = ext4_bmap, |
3884 | .invalidatepage = ext4_da_invalidatepage, | 3884 | .invalidatepage = ext4_da_invalidatepage, |
3885 | .releasepage = ext4_releasepage, | 3885 | .releasepage = ext4_releasepage, |
3886 | .direct_IO = ext4_direct_IO, | 3886 | .direct_IO = ext4_direct_IO, |
3887 | .migratepage = buffer_migrate_page, | 3887 | .migratepage = buffer_migrate_page, |
3888 | .is_partially_uptodate = block_is_partially_uptodate, | 3888 | .is_partially_uptodate = block_is_partially_uptodate, |
3889 | .error_remove_page = generic_error_remove_page, | 3889 | .error_remove_page = generic_error_remove_page, |
3890 | }; | 3890 | }; |
3891 | 3891 | ||
3892 | void ext4_set_aops(struct inode *inode) | 3892 | void ext4_set_aops(struct inode *inode) |
3893 | { | 3893 | { |
3894 | if (ext4_should_order_data(inode) && | 3894 | if (ext4_should_order_data(inode) && |
3895 | test_opt(inode->i_sb, DELALLOC)) | 3895 | test_opt(inode->i_sb, DELALLOC)) |
3896 | inode->i_mapping->a_ops = &ext4_da_aops; | 3896 | inode->i_mapping->a_ops = &ext4_da_aops; |
3897 | else if (ext4_should_order_data(inode)) | 3897 | else if (ext4_should_order_data(inode)) |
3898 | inode->i_mapping->a_ops = &ext4_ordered_aops; | 3898 | inode->i_mapping->a_ops = &ext4_ordered_aops; |
3899 | else if (ext4_should_writeback_data(inode) && | 3899 | else if (ext4_should_writeback_data(inode) && |
3900 | test_opt(inode->i_sb, DELALLOC)) | 3900 | test_opt(inode->i_sb, DELALLOC)) |
3901 | inode->i_mapping->a_ops = &ext4_da_aops; | 3901 | inode->i_mapping->a_ops = &ext4_da_aops; |
3902 | else if (ext4_should_writeback_data(inode)) | 3902 | else if (ext4_should_writeback_data(inode)) |
3903 | inode->i_mapping->a_ops = &ext4_writeback_aops; | 3903 | inode->i_mapping->a_ops = &ext4_writeback_aops; |
3904 | else | 3904 | else |
3905 | inode->i_mapping->a_ops = &ext4_journalled_aops; | 3905 | inode->i_mapping->a_ops = &ext4_journalled_aops; |
3906 | } | 3906 | } |
3907 | 3907 | ||
3908 | /* | 3908 | /* |
3909 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | 3909 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
3910 | * up to the end of the block which corresponds to `from'. | 3910 | * up to the end of the block which corresponds to `from'. |
3911 | * This required during truncate. We need to physically zero the tail end | 3911 | * This required during truncate. We need to physically zero the tail end |
3912 | * of that block so it doesn't yield old data if the file is later grown. | 3912 | * of that block so it doesn't yield old data if the file is later grown. |
3913 | */ | 3913 | */ |
3914 | int ext4_block_truncate_page(handle_t *handle, | 3914 | int ext4_block_truncate_page(handle_t *handle, |
3915 | struct address_space *mapping, loff_t from) | 3915 | struct address_space *mapping, loff_t from) |
3916 | { | 3916 | { |
3917 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3917 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3918 | unsigned length; | 3918 | unsigned length; |
3919 | unsigned blocksize; | 3919 | unsigned blocksize; |
3920 | struct inode *inode = mapping->host; | 3920 | struct inode *inode = mapping->host; |
3921 | 3921 | ||
3922 | blocksize = inode->i_sb->s_blocksize; | 3922 | blocksize = inode->i_sb->s_blocksize; |
3923 | length = blocksize - (offset & (blocksize - 1)); | 3923 | length = blocksize - (offset & (blocksize - 1)); |
3924 | 3924 | ||
3925 | return ext4_block_zero_page_range(handle, mapping, from, length); | 3925 | return ext4_block_zero_page_range(handle, mapping, from, length); |
3926 | } | 3926 | } |
3927 | 3927 | ||
3928 | /* | 3928 | /* |
3929 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | 3929 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' |
3930 | * starting from file offset 'from'. The range to be zero'd must | 3930 | * starting from file offset 'from'. The range to be zero'd must |
3931 | * be contained with in one block. If the specified range exceeds | 3931 | * be contained with in one block. If the specified range exceeds |
3932 | * the end of the block it will be shortened to end of the block | 3932 | * the end of the block it will be shortened to end of the block |
3933 | * that cooresponds to 'from' | 3933 | * that cooresponds to 'from' |
3934 | */ | 3934 | */ |
3935 | int ext4_block_zero_page_range(handle_t *handle, | 3935 | int ext4_block_zero_page_range(handle_t *handle, |
3936 | struct address_space *mapping, loff_t from, loff_t length) | 3936 | struct address_space *mapping, loff_t from, loff_t length) |
3937 | { | 3937 | { |
3938 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3938 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
3939 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3939 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3940 | unsigned blocksize, max, pos; | 3940 | unsigned blocksize, max, pos; |
3941 | ext4_lblk_t iblock; | 3941 | ext4_lblk_t iblock; |
3942 | struct inode *inode = mapping->host; | 3942 | struct inode *inode = mapping->host; |
3943 | struct buffer_head *bh; | 3943 | struct buffer_head *bh; |
3944 | struct page *page; | 3944 | struct page *page; |
3945 | int err = 0; | 3945 | int err = 0; |
3946 | 3946 | ||
3947 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3947 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
3948 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3948 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
3949 | if (!page) | 3949 | if (!page) |
3950 | return -EINVAL; | 3950 | return -EINVAL; |
3951 | 3951 | ||
3952 | blocksize = inode->i_sb->s_blocksize; | 3952 | blocksize = inode->i_sb->s_blocksize; |
3953 | max = blocksize - (offset & (blocksize - 1)); | 3953 | max = blocksize - (offset & (blocksize - 1)); |
3954 | 3954 | ||
3955 | /* | 3955 | /* |
3956 | * correct length if it does not fall between | 3956 | * correct length if it does not fall between |
3957 | * 'from' and the end of the block | 3957 | * 'from' and the end of the block |
3958 | */ | 3958 | */ |
3959 | if (length > max || length < 0) | 3959 | if (length > max || length < 0) |
3960 | length = max; | 3960 | length = max; |
3961 | 3961 | ||
3962 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 3962 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
3963 | 3963 | ||
3964 | if (!page_has_buffers(page)) | 3964 | if (!page_has_buffers(page)) |
3965 | create_empty_buffers(page, blocksize, 0); | 3965 | create_empty_buffers(page, blocksize, 0); |
3966 | 3966 | ||
3967 | /* Find the buffer that contains "offset" */ | 3967 | /* Find the buffer that contains "offset" */ |
3968 | bh = page_buffers(page); | 3968 | bh = page_buffers(page); |
3969 | pos = blocksize; | 3969 | pos = blocksize; |
3970 | while (offset >= pos) { | 3970 | while (offset >= pos) { |
3971 | bh = bh->b_this_page; | 3971 | bh = bh->b_this_page; |
3972 | iblock++; | 3972 | iblock++; |
3973 | pos += blocksize; | 3973 | pos += blocksize; |
3974 | } | 3974 | } |
3975 | 3975 | ||
3976 | err = 0; | 3976 | err = 0; |
3977 | if (buffer_freed(bh)) { | 3977 | if (buffer_freed(bh)) { |
3978 | BUFFER_TRACE(bh, "freed: skip"); | 3978 | BUFFER_TRACE(bh, "freed: skip"); |
3979 | goto unlock; | 3979 | goto unlock; |
3980 | } | 3980 | } |
3981 | 3981 | ||
3982 | if (!buffer_mapped(bh)) { | 3982 | if (!buffer_mapped(bh)) { |
3983 | BUFFER_TRACE(bh, "unmapped"); | 3983 | BUFFER_TRACE(bh, "unmapped"); |
3984 | ext4_get_block(inode, iblock, bh, 0); | 3984 | ext4_get_block(inode, iblock, bh, 0); |
3985 | /* unmapped? It's a hole - nothing to do */ | 3985 | /* unmapped? It's a hole - nothing to do */ |
3986 | if (!buffer_mapped(bh)) { | 3986 | if (!buffer_mapped(bh)) { |
3987 | BUFFER_TRACE(bh, "still unmapped"); | 3987 | BUFFER_TRACE(bh, "still unmapped"); |
3988 | goto unlock; | 3988 | goto unlock; |
3989 | } | 3989 | } |
3990 | } | 3990 | } |
3991 | 3991 | ||
3992 | /* Ok, it's mapped. Make sure it's up-to-date */ | 3992 | /* Ok, it's mapped. Make sure it's up-to-date */ |
3993 | if (PageUptodate(page)) | 3993 | if (PageUptodate(page)) |
3994 | set_buffer_uptodate(bh); | 3994 | set_buffer_uptodate(bh); |
3995 | 3995 | ||
3996 | if (!buffer_uptodate(bh)) { | 3996 | if (!buffer_uptodate(bh)) { |
3997 | err = -EIO; | 3997 | err = -EIO; |
3998 | ll_rw_block(READ, 1, &bh); | 3998 | ll_rw_block(READ, 1, &bh); |
3999 | wait_on_buffer(bh); | 3999 | wait_on_buffer(bh); |
4000 | /* Uhhuh. Read error. Complain and punt. */ | 4000 | /* Uhhuh. Read error. Complain and punt. */ |
4001 | if (!buffer_uptodate(bh)) | 4001 | if (!buffer_uptodate(bh)) |
4002 | goto unlock; | 4002 | goto unlock; |
4003 | } | 4003 | } |
4004 | 4004 | ||
4005 | if (ext4_should_journal_data(inode)) { | 4005 | if (ext4_should_journal_data(inode)) { |
4006 | BUFFER_TRACE(bh, "get write access"); | 4006 | BUFFER_TRACE(bh, "get write access"); |
4007 | err = ext4_journal_get_write_access(handle, bh); | 4007 | err = ext4_journal_get_write_access(handle, bh); |
4008 | if (err) | 4008 | if (err) |
4009 | goto unlock; | 4009 | goto unlock; |
4010 | } | 4010 | } |
4011 | 4011 | ||
4012 | zero_user(page, offset, length); | 4012 | zero_user(page, offset, length); |
4013 | 4013 | ||
4014 | BUFFER_TRACE(bh, "zeroed end of block"); | 4014 | BUFFER_TRACE(bh, "zeroed end of block"); |
4015 | 4015 | ||
4016 | err = 0; | 4016 | err = 0; |
4017 | if (ext4_should_journal_data(inode)) { | 4017 | if (ext4_should_journal_data(inode)) { |
4018 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 4018 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4019 | } else { | 4019 | } else { |
4020 | if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode) | 4020 | if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode) |
4021 | err = ext4_jbd2_file_inode(handle, inode); | 4021 | err = ext4_jbd2_file_inode(handle, inode); |
4022 | mark_buffer_dirty(bh); | 4022 | mark_buffer_dirty(bh); |
4023 | } | 4023 | } |
4024 | 4024 | ||
4025 | unlock: | 4025 | unlock: |
4026 | unlock_page(page); | 4026 | unlock_page(page); |
4027 | page_cache_release(page); | 4027 | page_cache_release(page); |
4028 | return err; | 4028 | return err; |
4029 | } | 4029 | } |
4030 | 4030 | ||
4031 | /* | 4031 | /* |
4032 | * Probably it should be a library function... search for first non-zero word | 4032 | * Probably it should be a library function... search for first non-zero word |
4033 | * or memcmp with zero_page, whatever is better for particular architecture. | 4033 | * or memcmp with zero_page, whatever is better for particular architecture. |
4034 | * Linus? | 4034 | * Linus? |
4035 | */ | 4035 | */ |
4036 | static inline int all_zeroes(__le32 *p, __le32 *q) | 4036 | static inline int all_zeroes(__le32 *p, __le32 *q) |
4037 | { | 4037 | { |
4038 | while (p < q) | 4038 | while (p < q) |
4039 | if (*p++) | 4039 | if (*p++) |
4040 | return 0; | 4040 | return 0; |
4041 | return 1; | 4041 | return 1; |
4042 | } | 4042 | } |
4043 | 4043 | ||
4044 | /** | 4044 | /** |
4045 | * ext4_find_shared - find the indirect blocks for partial truncation. | 4045 | * ext4_find_shared - find the indirect blocks for partial truncation. |
4046 | * @inode: inode in question | 4046 | * @inode: inode in question |
4047 | * @depth: depth of the affected branch | 4047 | * @depth: depth of the affected branch |
4048 | * @offsets: offsets of pointers in that branch (see ext4_block_to_path) | 4048 | * @offsets: offsets of pointers in that branch (see ext4_block_to_path) |
4049 | * @chain: place to store the pointers to partial indirect blocks | 4049 | * @chain: place to store the pointers to partial indirect blocks |
4050 | * @top: place to the (detached) top of branch | 4050 | * @top: place to the (detached) top of branch |
4051 | * | 4051 | * |
4052 | * This is a helper function used by ext4_truncate(). | 4052 | * This is a helper function used by ext4_truncate(). |
4053 | * | 4053 | * |
4054 | * When we do truncate() we may have to clean the ends of several | 4054 | * When we do truncate() we may have to clean the ends of several |
4055 | * indirect blocks but leave the blocks themselves alive. Block is | 4055 | * indirect blocks but leave the blocks themselves alive. Block is |
4056 | * partially truncated if some data below the new i_size is referred | 4056 | * partially truncated if some data below the new i_size is referred |
4057 | * from it (and it is on the path to the first completely truncated | 4057 | * from it (and it is on the path to the first completely truncated |
4058 | * data block, indeed). We have to free the top of that path along | 4058 | * data block, indeed). We have to free the top of that path along |
4059 | * with everything to the right of the path. Since no allocation | 4059 | * with everything to the right of the path. Since no allocation |
4060 | * past the truncation point is possible until ext4_truncate() | 4060 | * past the truncation point is possible until ext4_truncate() |
4061 | * finishes, we may safely do the latter, but top of branch may | 4061 | * finishes, we may safely do the latter, but top of branch may |
4062 | * require special attention - pageout below the truncation point | 4062 | * require special attention - pageout below the truncation point |
4063 | * might try to populate it. | 4063 | * might try to populate it. |
4064 | * | 4064 | * |
4065 | * We atomically detach the top of branch from the tree, store the | 4065 | * We atomically detach the top of branch from the tree, store the |
4066 | * block number of its root in *@top, pointers to buffer_heads of | 4066 | * block number of its root in *@top, pointers to buffer_heads of |
4067 | * partially truncated blocks - in @chain[].bh and pointers to | 4067 | * partially truncated blocks - in @chain[].bh and pointers to |
4068 | * their last elements that should not be removed - in | 4068 | * their last elements that should not be removed - in |
4069 | * @chain[].p. Return value is the pointer to last filled element | 4069 | * @chain[].p. Return value is the pointer to last filled element |
4070 | * of @chain. | 4070 | * of @chain. |
4071 | * | 4071 | * |
4072 | * The work left to caller to do the actual freeing of subtrees: | 4072 | * The work left to caller to do the actual freeing of subtrees: |
4073 | * a) free the subtree starting from *@top | 4073 | * a) free the subtree starting from *@top |
4074 | * b) free the subtrees whose roots are stored in | 4074 | * b) free the subtrees whose roots are stored in |
4075 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) | 4075 | * (@chain[i].p+1 .. end of @chain[i].bh->b_data) |
4076 | * c) free the subtrees growing from the inode past the @chain[0]. | 4076 | * c) free the subtrees growing from the inode past the @chain[0]. |
4077 | * (no partially truncated stuff there). */ | 4077 | * (no partially truncated stuff there). */ |
4078 | 4078 | ||
4079 | static Indirect *ext4_find_shared(struct inode *inode, int depth, | 4079 | static Indirect *ext4_find_shared(struct inode *inode, int depth, |
4080 | ext4_lblk_t offsets[4], Indirect chain[4], | 4080 | ext4_lblk_t offsets[4], Indirect chain[4], |
4081 | __le32 *top) | 4081 | __le32 *top) |
4082 | { | 4082 | { |
4083 | Indirect *partial, *p; | 4083 | Indirect *partial, *p; |
4084 | int k, err; | 4084 | int k, err; |
4085 | 4085 | ||
4086 | *top = 0; | 4086 | *top = 0; |
4087 | /* Make k index the deepest non-null offset + 1 */ | 4087 | /* Make k index the deepest non-null offset + 1 */ |
4088 | for (k = depth; k > 1 && !offsets[k-1]; k--) | 4088 | for (k = depth; k > 1 && !offsets[k-1]; k--) |
4089 | ; | 4089 | ; |
4090 | partial = ext4_get_branch(inode, k, offsets, chain, &err); | 4090 | partial = ext4_get_branch(inode, k, offsets, chain, &err); |
4091 | /* Writer: pointers */ | 4091 | /* Writer: pointers */ |
4092 | if (!partial) | 4092 | if (!partial) |
4093 | partial = chain + k-1; | 4093 | partial = chain + k-1; |
4094 | /* | 4094 | /* |
4095 | * If the branch acquired continuation since we've looked at it - | 4095 | * If the branch acquired continuation since we've looked at it - |
4096 | * fine, it should all survive and (new) top doesn't belong to us. | 4096 | * fine, it should all survive and (new) top doesn't belong to us. |
4097 | */ | 4097 | */ |
4098 | if (!partial->key && *partial->p) | 4098 | if (!partial->key && *partial->p) |
4099 | /* Writer: end */ | 4099 | /* Writer: end */ |
4100 | goto no_top; | 4100 | goto no_top; |
4101 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) | 4101 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) |
4102 | ; | 4102 | ; |
4103 | /* | 4103 | /* |
4104 | * OK, we've found the last block that must survive. The rest of our | 4104 | * OK, we've found the last block that must survive. The rest of our |
4105 | * branch should be detached before unlocking. However, if that rest | 4105 | * branch should be detached before unlocking. However, if that rest |
4106 | * of branch is all ours and does not grow immediately from the inode | 4106 | * of branch is all ours and does not grow immediately from the inode |
4107 | * it's easier to cheat and just decrement partial->p. | 4107 | * it's easier to cheat and just decrement partial->p. |
4108 | */ | 4108 | */ |
4109 | if (p == chain + k - 1 && p > chain) { | 4109 | if (p == chain + k - 1 && p > chain) { |
4110 | p->p--; | 4110 | p->p--; |
4111 | } else { | 4111 | } else { |
4112 | *top = *p->p; | 4112 | *top = *p->p; |
4113 | /* Nope, don't do this in ext4. Must leave the tree intact */ | 4113 | /* Nope, don't do this in ext4. Must leave the tree intact */ |
4114 | #if 0 | 4114 | #if 0 |
4115 | *p->p = 0; | 4115 | *p->p = 0; |
4116 | #endif | 4116 | #endif |
4117 | } | 4117 | } |
4118 | /* Writer: end */ | 4118 | /* Writer: end */ |
4119 | 4119 | ||
4120 | while (partial > p) { | 4120 | while (partial > p) { |
4121 | brelse(partial->bh); | 4121 | brelse(partial->bh); |
4122 | partial--; | 4122 | partial--; |
4123 | } | 4123 | } |
4124 | no_top: | 4124 | no_top: |
4125 | return partial; | 4125 | return partial; |
4126 | } | 4126 | } |
4127 | 4127 | ||
4128 | /* | 4128 | /* |
4129 | * Zero a number of block pointers in either an inode or an indirect block. | 4129 | * Zero a number of block pointers in either an inode or an indirect block. |
4130 | * If we restart the transaction we must again get write access to the | 4130 | * If we restart the transaction we must again get write access to the |
4131 | * indirect block for further modification. | 4131 | * indirect block for further modification. |
4132 | * | 4132 | * |
4133 | * We release `count' blocks on disk, but (last - first) may be greater | 4133 | * We release `count' blocks on disk, but (last - first) may be greater |
4134 | * than `count' because there can be holes in there. | 4134 | * than `count' because there can be holes in there. |
4135 | * | 4135 | * |
4136 | * Return 0 on success, 1 on invalid block range | 4136 | * Return 0 on success, 1 on invalid block range |
4137 | * and < 0 on fatal error. | 4137 | * and < 0 on fatal error. |
4138 | */ | 4138 | */ |
4139 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4139 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4140 | struct buffer_head *bh, | 4140 | struct buffer_head *bh, |
4141 | ext4_fsblk_t block_to_free, | 4141 | ext4_fsblk_t block_to_free, |
4142 | unsigned long count, __le32 *first, | 4142 | unsigned long count, __le32 *first, |
4143 | __le32 *last) | 4143 | __le32 *last) |
4144 | { | 4144 | { |
4145 | __le32 *p; | 4145 | __le32 *p; |
4146 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | 4146 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; |
4147 | int err; | 4147 | int err; |
4148 | 4148 | ||
4149 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 4149 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
4150 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4150 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4151 | 4151 | ||
4152 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | 4152 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, |
4153 | count)) { | 4153 | count)) { |
4154 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " | 4154 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " |
4155 | "blocks %llu len %lu", | 4155 | "blocks %llu len %lu", |
4156 | (unsigned long long) block_to_free, count); | 4156 | (unsigned long long) block_to_free, count); |
4157 | return 1; | 4157 | return 1; |
4158 | } | 4158 | } |
4159 | 4159 | ||
4160 | if (try_to_extend_transaction(handle, inode)) { | 4160 | if (try_to_extend_transaction(handle, inode)) { |
4161 | if (bh) { | 4161 | if (bh) { |
4162 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4162 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
4163 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 4163 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4164 | if (unlikely(err)) | 4164 | if (unlikely(err)) |
4165 | goto out_err; | 4165 | goto out_err; |
4166 | } | 4166 | } |
4167 | err = ext4_mark_inode_dirty(handle, inode); | 4167 | err = ext4_mark_inode_dirty(handle, inode); |
4168 | if (unlikely(err)) | 4168 | if (unlikely(err)) |
4169 | goto out_err; | 4169 | goto out_err; |
4170 | err = ext4_truncate_restart_trans(handle, inode, | 4170 | err = ext4_truncate_restart_trans(handle, inode, |
4171 | blocks_for_truncate(inode)); | 4171 | blocks_for_truncate(inode)); |
4172 | if (unlikely(err)) | 4172 | if (unlikely(err)) |
4173 | goto out_err; | 4173 | goto out_err; |
4174 | if (bh) { | 4174 | if (bh) { |
4175 | BUFFER_TRACE(bh, "retaking write access"); | 4175 | BUFFER_TRACE(bh, "retaking write access"); |
4176 | err = ext4_journal_get_write_access(handle, bh); | 4176 | err = ext4_journal_get_write_access(handle, bh); |
4177 | if (unlikely(err)) | 4177 | if (unlikely(err)) |
4178 | goto out_err; | 4178 | goto out_err; |
4179 | } | 4179 | } |
4180 | } | 4180 | } |
4181 | 4181 | ||
4182 | for (p = first; p < last; p++) | 4182 | for (p = first; p < last; p++) |
4183 | *p = 0; | 4183 | *p = 0; |
4184 | 4184 | ||
4185 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); | 4185 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); |
4186 | return 0; | 4186 | return 0; |
4187 | out_err: | 4187 | out_err: |
4188 | ext4_std_error(inode->i_sb, err); | 4188 | ext4_std_error(inode->i_sb, err); |
4189 | return err; | 4189 | return err; |
4190 | } | 4190 | } |
4191 | 4191 | ||
4192 | /** | 4192 | /** |
4193 | * ext4_free_data - free a list of data blocks | 4193 | * ext4_free_data - free a list of data blocks |
4194 | * @handle: handle for this transaction | 4194 | * @handle: handle for this transaction |
4195 | * @inode: inode we are dealing with | 4195 | * @inode: inode we are dealing with |
4196 | * @this_bh: indirect buffer_head which contains *@first and *@last | 4196 | * @this_bh: indirect buffer_head which contains *@first and *@last |
4197 | * @first: array of block numbers | 4197 | * @first: array of block numbers |
4198 | * @last: points immediately past the end of array | 4198 | * @last: points immediately past the end of array |
4199 | * | 4199 | * |
4200 | * We are freeing all blocks referred from that array (numbers are stored as | 4200 | * We are freeing all blocks referred from that array (numbers are stored as |
4201 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | 4201 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. |
4202 | * | 4202 | * |
4203 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | 4203 | * We accumulate contiguous runs of blocks to free. Conveniently, if these |
4204 | * blocks are contiguous then releasing them at one time will only affect one | 4204 | * blocks are contiguous then releasing them at one time will only affect one |
4205 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't | 4205 | * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't |
4206 | * actually use a lot of journal space. | 4206 | * actually use a lot of journal space. |
4207 | * | 4207 | * |
4208 | * @this_bh will be %NULL if @first and @last point into the inode's direct | 4208 | * @this_bh will be %NULL if @first and @last point into the inode's direct |
4209 | * block pointers. | 4209 | * block pointers. |
4210 | */ | 4210 | */ |
4211 | static void ext4_free_data(handle_t *handle, struct inode *inode, | 4211 | static void ext4_free_data(handle_t *handle, struct inode *inode, |
4212 | struct buffer_head *this_bh, | 4212 | struct buffer_head *this_bh, |
4213 | __le32 *first, __le32 *last) | 4213 | __le32 *first, __le32 *last) |
4214 | { | 4214 | { |
4215 | ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ | 4215 | ext4_fsblk_t block_to_free = 0; /* Starting block # of a run */ |
4216 | unsigned long count = 0; /* Number of blocks in the run */ | 4216 | unsigned long count = 0; /* Number of blocks in the run */ |
4217 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind | 4217 | __le32 *block_to_free_p = NULL; /* Pointer into inode/ind |
4218 | corresponding to | 4218 | corresponding to |
4219 | block_to_free */ | 4219 | block_to_free */ |
4220 | ext4_fsblk_t nr; /* Current block # */ | 4220 | ext4_fsblk_t nr; /* Current block # */ |
4221 | __le32 *p; /* Pointer into inode/ind | 4221 | __le32 *p; /* Pointer into inode/ind |
4222 | for current block */ | 4222 | for current block */ |
4223 | int err = 0; | 4223 | int err = 0; |
4224 | 4224 | ||
4225 | if (this_bh) { /* For indirect block */ | 4225 | if (this_bh) { /* For indirect block */ |
4226 | BUFFER_TRACE(this_bh, "get_write_access"); | 4226 | BUFFER_TRACE(this_bh, "get_write_access"); |
4227 | err = ext4_journal_get_write_access(handle, this_bh); | 4227 | err = ext4_journal_get_write_access(handle, this_bh); |
4228 | /* Important: if we can't update the indirect pointers | 4228 | /* Important: if we can't update the indirect pointers |
4229 | * to the blocks, we can't free them. */ | 4229 | * to the blocks, we can't free them. */ |
4230 | if (err) | 4230 | if (err) |
4231 | return; | 4231 | return; |
4232 | } | 4232 | } |
4233 | 4233 | ||
4234 | for (p = first; p < last; p++) { | 4234 | for (p = first; p < last; p++) { |
4235 | nr = le32_to_cpu(*p); | 4235 | nr = le32_to_cpu(*p); |
4236 | if (nr) { | 4236 | if (nr) { |
4237 | /* accumulate blocks to free if they're contiguous */ | 4237 | /* accumulate blocks to free if they're contiguous */ |
4238 | if (count == 0) { | 4238 | if (count == 0) { |
4239 | block_to_free = nr; | 4239 | block_to_free = nr; |
4240 | block_to_free_p = p; | 4240 | block_to_free_p = p; |
4241 | count = 1; | 4241 | count = 1; |
4242 | } else if (nr == block_to_free + count) { | 4242 | } else if (nr == block_to_free + count) { |
4243 | count++; | 4243 | count++; |
4244 | } else { | 4244 | } else { |
4245 | err = ext4_clear_blocks(handle, inode, this_bh, | 4245 | err = ext4_clear_blocks(handle, inode, this_bh, |
4246 | block_to_free, count, | 4246 | block_to_free, count, |
4247 | block_to_free_p, p); | 4247 | block_to_free_p, p); |
4248 | if (err) | 4248 | if (err) |
4249 | break; | 4249 | break; |
4250 | block_to_free = nr; | 4250 | block_to_free = nr; |
4251 | block_to_free_p = p; | 4251 | block_to_free_p = p; |
4252 | count = 1; | 4252 | count = 1; |
4253 | } | 4253 | } |
4254 | } | 4254 | } |
4255 | } | 4255 | } |
4256 | 4256 | ||
4257 | if (!err && count > 0) | 4257 | if (!err && count > 0) |
4258 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, | 4258 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, |
4259 | count, block_to_free_p, p); | 4259 | count, block_to_free_p, p); |
4260 | if (err < 0) | 4260 | if (err < 0) |
4261 | /* fatal error */ | 4261 | /* fatal error */ |
4262 | return; | 4262 | return; |
4263 | 4263 | ||
4264 | if (this_bh) { | 4264 | if (this_bh) { |
4265 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | 4265 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); |
4266 | 4266 | ||
4267 | /* | 4267 | /* |
4268 | * The buffer head should have an attached journal head at this | 4268 | * The buffer head should have an attached journal head at this |
4269 | * point. However, if the data is corrupted and an indirect | 4269 | * point. However, if the data is corrupted and an indirect |
4270 | * block pointed to itself, it would have been detached when | 4270 | * block pointed to itself, it would have been detached when |
4271 | * the block was cleared. Check for this instead of OOPSing. | 4271 | * the block was cleared. Check for this instead of OOPSing. |
4272 | */ | 4272 | */ |
4273 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 4273 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) |
4274 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 4274 | ext4_handle_dirty_metadata(handle, inode, this_bh); |
4275 | else | 4275 | else |
4276 | EXT4_ERROR_INODE(inode, | 4276 | EXT4_ERROR_INODE(inode, |
4277 | "circular indirect block detected at " | 4277 | "circular indirect block detected at " |
4278 | "block %llu", | 4278 | "block %llu", |
4279 | (unsigned long long) this_bh->b_blocknr); | 4279 | (unsigned long long) this_bh->b_blocknr); |
4280 | } | 4280 | } |
4281 | } | 4281 | } |
4282 | 4282 | ||
4283 | /** | 4283 | /** |
4284 | * ext4_free_branches - free an array of branches | 4284 | * ext4_free_branches - free an array of branches |
4285 | * @handle: JBD handle for this transaction | 4285 | * @handle: JBD handle for this transaction |
4286 | * @inode: inode we are dealing with | 4286 | * @inode: inode we are dealing with |
4287 | * @parent_bh: the buffer_head which contains *@first and *@last | 4287 | * @parent_bh: the buffer_head which contains *@first and *@last |
4288 | * @first: array of block numbers | 4288 | * @first: array of block numbers |
4289 | * @last: pointer immediately past the end of array | 4289 | * @last: pointer immediately past the end of array |
4290 | * @depth: depth of the branches to free | 4290 | * @depth: depth of the branches to free |
4291 | * | 4291 | * |
4292 | * We are freeing all blocks referred from these branches (numbers are | 4292 | * We are freeing all blocks referred from these branches (numbers are |
4293 | * stored as little-endian 32-bit) and updating @inode->i_blocks | 4293 | * stored as little-endian 32-bit) and updating @inode->i_blocks |
4294 | * appropriately. | 4294 | * appropriately. |
4295 | */ | 4295 | */ |
4296 | static void ext4_free_branches(handle_t *handle, struct inode *inode, | 4296 | static void ext4_free_branches(handle_t *handle, struct inode *inode, |
4297 | struct buffer_head *parent_bh, | 4297 | struct buffer_head *parent_bh, |
4298 | __le32 *first, __le32 *last, int depth) | 4298 | __le32 *first, __le32 *last, int depth) |
4299 | { | 4299 | { |
4300 | ext4_fsblk_t nr; | 4300 | ext4_fsblk_t nr; |
4301 | __le32 *p; | 4301 | __le32 *p; |
4302 | 4302 | ||
4303 | if (ext4_handle_is_aborted(handle)) | 4303 | if (ext4_handle_is_aborted(handle)) |
4304 | return; | 4304 | return; |
4305 | 4305 | ||
4306 | if (depth--) { | 4306 | if (depth--) { |
4307 | struct buffer_head *bh; | 4307 | struct buffer_head *bh; |
4308 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 4308 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
4309 | p = last; | 4309 | p = last; |
4310 | while (--p >= first) { | 4310 | while (--p >= first) { |
4311 | nr = le32_to_cpu(*p); | 4311 | nr = le32_to_cpu(*p); |
4312 | if (!nr) | 4312 | if (!nr) |
4313 | continue; /* A hole */ | 4313 | continue; /* A hole */ |
4314 | 4314 | ||
4315 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 4315 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
4316 | nr, 1)) { | 4316 | nr, 1)) { |
4317 | EXT4_ERROR_INODE(inode, | 4317 | EXT4_ERROR_INODE(inode, |
4318 | "invalid indirect mapped " | 4318 | "invalid indirect mapped " |
4319 | "block %lu (level %d)", | 4319 | "block %lu (level %d)", |
4320 | (unsigned long) nr, depth); | 4320 | (unsigned long) nr, depth); |
4321 | break; | 4321 | break; |
4322 | } | 4322 | } |
4323 | 4323 | ||
4324 | /* Go read the buffer for the next level down */ | 4324 | /* Go read the buffer for the next level down */ |
4325 | bh = sb_bread(inode->i_sb, nr); | 4325 | bh = sb_bread(inode->i_sb, nr); |
4326 | 4326 | ||
4327 | /* | 4327 | /* |
4328 | * A read failure? Report error and clear slot | 4328 | * A read failure? Report error and clear slot |
4329 | * (should be rare). | 4329 | * (should be rare). |
4330 | */ | 4330 | */ |
4331 | if (!bh) { | 4331 | if (!bh) { |
4332 | EXT4_ERROR_INODE_BLOCK(inode, nr, | 4332 | EXT4_ERROR_INODE_BLOCK(inode, nr, |
4333 | "Read failure"); | 4333 | "Read failure"); |
4334 | continue; | 4334 | continue; |
4335 | } | 4335 | } |
4336 | 4336 | ||
4337 | /* This zaps the entire block. Bottom up. */ | 4337 | /* This zaps the entire block. Bottom up. */ |
4338 | BUFFER_TRACE(bh, "free child branches"); | 4338 | BUFFER_TRACE(bh, "free child branches"); |
4339 | ext4_free_branches(handle, inode, bh, | 4339 | ext4_free_branches(handle, inode, bh, |
4340 | (__le32 *) bh->b_data, | 4340 | (__le32 *) bh->b_data, |
4341 | (__le32 *) bh->b_data + addr_per_block, | 4341 | (__le32 *) bh->b_data + addr_per_block, |
4342 | depth); | 4342 | depth); |
4343 | brelse(bh); | 4343 | brelse(bh); |
4344 | 4344 | ||
4345 | /* | 4345 | /* |
4346 | * Everything below this this pointer has been | 4346 | * Everything below this this pointer has been |
4347 | * released. Now let this top-of-subtree go. | 4347 | * released. Now let this top-of-subtree go. |
4348 | * | 4348 | * |
4349 | * We want the freeing of this indirect block to be | 4349 | * We want the freeing of this indirect block to be |
4350 | * atomic in the journal with the updating of the | 4350 | * atomic in the journal with the updating of the |
4351 | * bitmap block which owns it. So make some room in | 4351 | * bitmap block which owns it. So make some room in |
4352 | * the journal. | 4352 | * the journal. |
4353 | * | 4353 | * |
4354 | * We zero the parent pointer *after* freeing its | 4354 | * We zero the parent pointer *after* freeing its |
4355 | * pointee in the bitmaps, so if extend_transaction() | 4355 | * pointee in the bitmaps, so if extend_transaction() |
4356 | * for some reason fails to put the bitmap changes and | 4356 | * for some reason fails to put the bitmap changes and |
4357 | * the release into the same transaction, recovery | 4357 | * the release into the same transaction, recovery |
4358 | * will merely complain about releasing a free block, | 4358 | * will merely complain about releasing a free block, |
4359 | * rather than leaking blocks. | 4359 | * rather than leaking blocks. |
4360 | */ | 4360 | */ |
4361 | if (ext4_handle_is_aborted(handle)) | 4361 | if (ext4_handle_is_aborted(handle)) |
4362 | return; | 4362 | return; |
4363 | if (try_to_extend_transaction(handle, inode)) { | 4363 | if (try_to_extend_transaction(handle, inode)) { |
4364 | ext4_mark_inode_dirty(handle, inode); | 4364 | ext4_mark_inode_dirty(handle, inode); |
4365 | ext4_truncate_restart_trans(handle, inode, | 4365 | ext4_truncate_restart_trans(handle, inode, |
4366 | blocks_for_truncate(inode)); | 4366 | blocks_for_truncate(inode)); |
4367 | } | 4367 | } |
4368 | 4368 | ||
4369 | /* | 4369 | /* |
4370 | * The forget flag here is critical because if | 4370 | * The forget flag here is critical because if |
4371 | * we are journaling (and not doing data | 4371 | * we are journaling (and not doing data |
4372 | * journaling), we have to make sure a revoke | 4372 | * journaling), we have to make sure a revoke |
4373 | * record is written to prevent the journal | 4373 | * record is written to prevent the journal |
4374 | * replay from overwriting the (former) | 4374 | * replay from overwriting the (former) |
4375 | * indirect block if it gets reallocated as a | 4375 | * indirect block if it gets reallocated as a |
4376 | * data block. This must happen in the same | 4376 | * data block. This must happen in the same |
4377 | * transaction where the data blocks are | 4377 | * transaction where the data blocks are |
4378 | * actually freed. | 4378 | * actually freed. |
4379 | */ | 4379 | */ |
4380 | ext4_free_blocks(handle, inode, NULL, nr, 1, | 4380 | ext4_free_blocks(handle, inode, NULL, nr, 1, |
4381 | EXT4_FREE_BLOCKS_METADATA| | 4381 | EXT4_FREE_BLOCKS_METADATA| |
4382 | EXT4_FREE_BLOCKS_FORGET); | 4382 | EXT4_FREE_BLOCKS_FORGET); |
4383 | 4383 | ||
4384 | if (parent_bh) { | 4384 | if (parent_bh) { |
4385 | /* | 4385 | /* |
4386 | * The block which we have just freed is | 4386 | * The block which we have just freed is |
4387 | * pointed to by an indirect block: journal it | 4387 | * pointed to by an indirect block: journal it |
4388 | */ | 4388 | */ |
4389 | BUFFER_TRACE(parent_bh, "get_write_access"); | 4389 | BUFFER_TRACE(parent_bh, "get_write_access"); |
4390 | if (!ext4_journal_get_write_access(handle, | 4390 | if (!ext4_journal_get_write_access(handle, |
4391 | parent_bh)){ | 4391 | parent_bh)){ |
4392 | *p = 0; | 4392 | *p = 0; |
4393 | BUFFER_TRACE(parent_bh, | 4393 | BUFFER_TRACE(parent_bh, |
4394 | "call ext4_handle_dirty_metadata"); | 4394 | "call ext4_handle_dirty_metadata"); |
4395 | ext4_handle_dirty_metadata(handle, | 4395 | ext4_handle_dirty_metadata(handle, |
4396 | inode, | 4396 | inode, |
4397 | parent_bh); | 4397 | parent_bh); |
4398 | } | 4398 | } |
4399 | } | 4399 | } |
4400 | } | 4400 | } |
4401 | } else { | 4401 | } else { |
4402 | /* We have reached the bottom of the tree. */ | 4402 | /* We have reached the bottom of the tree. */ |
4403 | BUFFER_TRACE(parent_bh, "free data blocks"); | 4403 | BUFFER_TRACE(parent_bh, "free data blocks"); |
4404 | ext4_free_data(handle, inode, parent_bh, first, last); | 4404 | ext4_free_data(handle, inode, parent_bh, first, last); |
4405 | } | 4405 | } |
4406 | } | 4406 | } |
4407 | 4407 | ||
4408 | int ext4_can_truncate(struct inode *inode) | 4408 | int ext4_can_truncate(struct inode *inode) |
4409 | { | 4409 | { |
4410 | if (S_ISREG(inode->i_mode)) | 4410 | if (S_ISREG(inode->i_mode)) |
4411 | return 1; | 4411 | return 1; |
4412 | if (S_ISDIR(inode->i_mode)) | 4412 | if (S_ISDIR(inode->i_mode)) |
4413 | return 1; | 4413 | return 1; |
4414 | if (S_ISLNK(inode->i_mode)) | 4414 | if (S_ISLNK(inode->i_mode)) |
4415 | return !ext4_inode_is_fast_symlink(inode); | 4415 | return !ext4_inode_is_fast_symlink(inode); |
4416 | return 0; | 4416 | return 0; |
4417 | } | 4417 | } |
4418 | 4418 | ||
4419 | /* | 4419 | /* |
4420 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks | 4420 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks |
4421 | * associated with the given offset and length | 4421 | * associated with the given offset and length |
4422 | * | 4422 | * |
4423 | * @inode: File inode | 4423 | * @inode: File inode |
4424 | * @offset: The offset where the hole will begin | 4424 | * @offset: The offset where the hole will begin |
4425 | * @len: The length of the hole | 4425 | * @len: The length of the hole |
4426 | * | 4426 | * |
4427 | * Returns: 0 on sucess or negative on failure | 4427 | * Returns: 0 on sucess or negative on failure |
4428 | */ | 4428 | */ |
4429 | 4429 | ||
4430 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | 4430 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) |
4431 | { | 4431 | { |
4432 | struct inode *inode = file->f_path.dentry->d_inode; | 4432 | struct inode *inode = file->f_path.dentry->d_inode; |
4433 | if (!S_ISREG(inode->i_mode)) | 4433 | if (!S_ISREG(inode->i_mode)) |
4434 | return -ENOTSUPP; | 4434 | return -ENOTSUPP; |
4435 | 4435 | ||
4436 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4436 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4437 | /* TODO: Add support for non extent hole punching */ | 4437 | /* TODO: Add support for non extent hole punching */ |
4438 | return -ENOTSUPP; | 4438 | return -ENOTSUPP; |
4439 | } | 4439 | } |
4440 | 4440 | ||
4441 | return ext4_ext_punch_hole(file, offset, length); | 4441 | return ext4_ext_punch_hole(file, offset, length); |
4442 | } | 4442 | } |
4443 | 4443 | ||
4444 | /* | 4444 | /* |
4445 | * ext4_truncate() | 4445 | * ext4_truncate() |
4446 | * | 4446 | * |
4447 | * We block out ext4_get_block() block instantiations across the entire | 4447 | * We block out ext4_get_block() block instantiations across the entire |
4448 | * transaction, and VFS/VM ensures that ext4_truncate() cannot run | 4448 | * transaction, and VFS/VM ensures that ext4_truncate() cannot run |
4449 | * simultaneously on behalf of the same inode. | 4449 | * simultaneously on behalf of the same inode. |
4450 | * | 4450 | * |
4451 | * As we work through the truncate and commmit bits of it to the journal there | 4451 | * As we work through the truncate and commmit bits of it to the journal there |
4452 | * is one core, guiding principle: the file's tree must always be consistent on | 4452 | * is one core, guiding principle: the file's tree must always be consistent on |
4453 | * disk. We must be able to restart the truncate after a crash. | 4453 | * disk. We must be able to restart the truncate after a crash. |
4454 | * | 4454 | * |
4455 | * The file's tree may be transiently inconsistent in memory (although it | 4455 | * The file's tree may be transiently inconsistent in memory (although it |
4456 | * probably isn't), but whenever we close off and commit a journal transaction, | 4456 | * probably isn't), but whenever we close off and commit a journal transaction, |
4457 | * the contents of (the filesystem + the journal) must be consistent and | 4457 | * the contents of (the filesystem + the journal) must be consistent and |
4458 | * restartable. It's pretty simple, really: bottom up, right to left (although | 4458 | * restartable. It's pretty simple, really: bottom up, right to left (although |
4459 | * left-to-right works OK too). | 4459 | * left-to-right works OK too). |
4460 | * | 4460 | * |
4461 | * Note that at recovery time, journal replay occurs *before* the restart of | 4461 | * Note that at recovery time, journal replay occurs *before* the restart of |
4462 | * truncate against the orphan inode list. | 4462 | * truncate against the orphan inode list. |
4463 | * | 4463 | * |
4464 | * The committed inode has the new, desired i_size (which is the same as | 4464 | * The committed inode has the new, desired i_size (which is the same as |
4465 | * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see | 4465 | * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see |
4466 | * that this inode's truncate did not complete and it will again call | 4466 | * that this inode's truncate did not complete and it will again call |
4467 | * ext4_truncate() to have another go. So there will be instantiated blocks | 4467 | * ext4_truncate() to have another go. So there will be instantiated blocks |
4468 | * to the right of the truncation point in a crashed ext4 filesystem. But | 4468 | * to the right of the truncation point in a crashed ext4 filesystem. But |
4469 | * that's fine - as long as they are linked from the inode, the post-crash | 4469 | * that's fine - as long as they are linked from the inode, the post-crash |
4470 | * ext4_truncate() run will find them and release them. | 4470 | * ext4_truncate() run will find them and release them. |
4471 | */ | 4471 | */ |
4472 | void ext4_truncate(struct inode *inode) | 4472 | void ext4_truncate(struct inode *inode) |
4473 | { | 4473 | { |
4474 | handle_t *handle; | 4474 | handle_t *handle; |
4475 | struct ext4_inode_info *ei = EXT4_I(inode); | 4475 | struct ext4_inode_info *ei = EXT4_I(inode); |
4476 | __le32 *i_data = ei->i_data; | 4476 | __le32 *i_data = ei->i_data; |
4477 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 4477 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
4478 | struct address_space *mapping = inode->i_mapping; | 4478 | struct address_space *mapping = inode->i_mapping; |
4479 | ext4_lblk_t offsets[4]; | 4479 | ext4_lblk_t offsets[4]; |
4480 | Indirect chain[4]; | 4480 | Indirect chain[4]; |
4481 | Indirect *partial; | 4481 | Indirect *partial; |
4482 | __le32 nr = 0; | 4482 | __le32 nr = 0; |
4483 | int n = 0; | 4483 | int n = 0; |
4484 | ext4_lblk_t last_block, max_block; | 4484 | ext4_lblk_t last_block, max_block; |
4485 | unsigned blocksize = inode->i_sb->s_blocksize; | 4485 | unsigned blocksize = inode->i_sb->s_blocksize; |
4486 | 4486 | ||
4487 | trace_ext4_truncate_enter(inode); | 4487 | trace_ext4_truncate_enter(inode); |
4488 | 4488 | ||
4489 | if (!ext4_can_truncate(inode)) | 4489 | if (!ext4_can_truncate(inode)) |
4490 | return; | 4490 | return; |
4491 | 4491 | ||
4492 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | 4492 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4493 | 4493 | ||
4494 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4494 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
4495 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); | 4495 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
4496 | 4496 | ||
4497 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4497 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4498 | ext4_ext_truncate(inode); | 4498 | ext4_ext_truncate(inode); |
4499 | trace_ext4_truncate_exit(inode); | 4499 | trace_ext4_truncate_exit(inode); |
4500 | return; | 4500 | return; |
4501 | } | 4501 | } |
4502 | 4502 | ||
4503 | handle = start_transaction(inode); | 4503 | handle = start_transaction(inode); |
4504 | if (IS_ERR(handle)) | 4504 | if (IS_ERR(handle)) |
4505 | return; /* AKPM: return what? */ | 4505 | return; /* AKPM: return what? */ |
4506 | 4506 | ||
4507 | last_block = (inode->i_size + blocksize-1) | 4507 | last_block = (inode->i_size + blocksize-1) |
4508 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 4508 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
4509 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | 4509 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) |
4510 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 4510 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
4511 | 4511 | ||
4512 | if (inode->i_size & (blocksize - 1)) | 4512 | if (inode->i_size & (blocksize - 1)) |
4513 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | 4513 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) |
4514 | goto out_stop; | 4514 | goto out_stop; |
4515 | 4515 | ||
4516 | if (last_block != max_block) { | 4516 | if (last_block != max_block) { |
4517 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 4517 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
4518 | if (n == 0) | 4518 | if (n == 0) |
4519 | goto out_stop; /* error */ | 4519 | goto out_stop; /* error */ |
4520 | } | 4520 | } |
4521 | 4521 | ||
4522 | /* | 4522 | /* |
4523 | * OK. This truncate is going to happen. We add the inode to the | 4523 | * OK. This truncate is going to happen. We add the inode to the |
4524 | * orphan list, so that if this truncate spans multiple transactions, | 4524 | * orphan list, so that if this truncate spans multiple transactions, |
4525 | * and we crash, we will resume the truncate when the filesystem | 4525 | * and we crash, we will resume the truncate when the filesystem |
4526 | * recovers. It also marks the inode dirty, to catch the new size. | 4526 | * recovers. It also marks the inode dirty, to catch the new size. |
4527 | * | 4527 | * |
4528 | * Implication: the file must always be in a sane, consistent | 4528 | * Implication: the file must always be in a sane, consistent |
4529 | * truncatable state while each transaction commits. | 4529 | * truncatable state while each transaction commits. |
4530 | */ | 4530 | */ |
4531 | if (ext4_orphan_add(handle, inode)) | 4531 | if (ext4_orphan_add(handle, inode)) |
4532 | goto out_stop; | 4532 | goto out_stop; |
4533 | 4533 | ||
4534 | /* | 4534 | /* |
4535 | * From here we block out all ext4_get_block() callers who want to | 4535 | * From here we block out all ext4_get_block() callers who want to |
4536 | * modify the block allocation tree. | 4536 | * modify the block allocation tree. |
4537 | */ | 4537 | */ |
4538 | down_write(&ei->i_data_sem); | 4538 | down_write(&ei->i_data_sem); |
4539 | 4539 | ||
4540 | ext4_discard_preallocations(inode); | 4540 | ext4_discard_preallocations(inode); |
4541 | 4541 | ||
4542 | /* | 4542 | /* |
4543 | * The orphan list entry will now protect us from any crash which | 4543 | * The orphan list entry will now protect us from any crash which |
4544 | * occurs before the truncate completes, so it is now safe to propagate | 4544 | * occurs before the truncate completes, so it is now safe to propagate |
4545 | * the new, shorter inode size (held for now in i_size) into the | 4545 | * the new, shorter inode size (held for now in i_size) into the |
4546 | * on-disk inode. We do this via i_disksize, which is the value which | 4546 | * on-disk inode. We do this via i_disksize, which is the value which |
4547 | * ext4 *really* writes onto the disk inode. | 4547 | * ext4 *really* writes onto the disk inode. |
4548 | */ | 4548 | */ |
4549 | ei->i_disksize = inode->i_size; | 4549 | ei->i_disksize = inode->i_size; |
4550 | 4550 | ||
4551 | if (last_block == max_block) { | 4551 | if (last_block == max_block) { |
4552 | /* | 4552 | /* |
4553 | * It is unnecessary to free any data blocks if last_block is | 4553 | * It is unnecessary to free any data blocks if last_block is |
4554 | * equal to the indirect block limit. | 4554 | * equal to the indirect block limit. |
4555 | */ | 4555 | */ |
4556 | goto out_unlock; | 4556 | goto out_unlock; |
4557 | } else if (n == 1) { /* direct blocks */ | 4557 | } else if (n == 1) { /* direct blocks */ |
4558 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 4558 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
4559 | i_data + EXT4_NDIR_BLOCKS); | 4559 | i_data + EXT4_NDIR_BLOCKS); |
4560 | goto do_indirects; | 4560 | goto do_indirects; |
4561 | } | 4561 | } |
4562 | 4562 | ||
4563 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); | 4563 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); |
4564 | /* Kill the top of shared branch (not detached) */ | 4564 | /* Kill the top of shared branch (not detached) */ |
4565 | if (nr) { | 4565 | if (nr) { |
4566 | if (partial == chain) { | 4566 | if (partial == chain) { |
4567 | /* Shared branch grows from the inode */ | 4567 | /* Shared branch grows from the inode */ |
4568 | ext4_free_branches(handle, inode, NULL, | 4568 | ext4_free_branches(handle, inode, NULL, |
4569 | &nr, &nr+1, (chain+n-1) - partial); | 4569 | &nr, &nr+1, (chain+n-1) - partial); |
4570 | *partial->p = 0; | 4570 | *partial->p = 0; |
4571 | /* | 4571 | /* |
4572 | * We mark the inode dirty prior to restart, | 4572 | * We mark the inode dirty prior to restart, |
4573 | * and prior to stop. No need for it here. | 4573 | * and prior to stop. No need for it here. |
4574 | */ | 4574 | */ |
4575 | } else { | 4575 | } else { |
4576 | /* Shared branch grows from an indirect block */ | 4576 | /* Shared branch grows from an indirect block */ |
4577 | BUFFER_TRACE(partial->bh, "get_write_access"); | 4577 | BUFFER_TRACE(partial->bh, "get_write_access"); |
4578 | ext4_free_branches(handle, inode, partial->bh, | 4578 | ext4_free_branches(handle, inode, partial->bh, |
4579 | partial->p, | 4579 | partial->p, |
4580 | partial->p+1, (chain+n-1) - partial); | 4580 | partial->p+1, (chain+n-1) - partial); |
4581 | } | 4581 | } |
4582 | } | 4582 | } |
4583 | /* Clear the ends of indirect blocks on the shared branch */ | 4583 | /* Clear the ends of indirect blocks on the shared branch */ |
4584 | while (partial > chain) { | 4584 | while (partial > chain) { |
4585 | ext4_free_branches(handle, inode, partial->bh, partial->p + 1, | 4585 | ext4_free_branches(handle, inode, partial->bh, partial->p + 1, |
4586 | (__le32*)partial->bh->b_data+addr_per_block, | 4586 | (__le32*)partial->bh->b_data+addr_per_block, |
4587 | (chain+n-1) - partial); | 4587 | (chain+n-1) - partial); |
4588 | BUFFER_TRACE(partial->bh, "call brelse"); | 4588 | BUFFER_TRACE(partial->bh, "call brelse"); |
4589 | brelse(partial->bh); | 4589 | brelse(partial->bh); |
4590 | partial--; | 4590 | partial--; |
4591 | } | 4591 | } |
4592 | do_indirects: | 4592 | do_indirects: |
4593 | /* Kill the remaining (whole) subtrees */ | 4593 | /* Kill the remaining (whole) subtrees */ |
4594 | switch (offsets[0]) { | 4594 | switch (offsets[0]) { |
4595 | default: | 4595 | default: |
4596 | nr = i_data[EXT4_IND_BLOCK]; | 4596 | nr = i_data[EXT4_IND_BLOCK]; |
4597 | if (nr) { | 4597 | if (nr) { |
4598 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); | 4598 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); |
4599 | i_data[EXT4_IND_BLOCK] = 0; | 4599 | i_data[EXT4_IND_BLOCK] = 0; |
4600 | } | 4600 | } |
4601 | case EXT4_IND_BLOCK: | 4601 | case EXT4_IND_BLOCK: |
4602 | nr = i_data[EXT4_DIND_BLOCK]; | 4602 | nr = i_data[EXT4_DIND_BLOCK]; |
4603 | if (nr) { | 4603 | if (nr) { |
4604 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); | 4604 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); |
4605 | i_data[EXT4_DIND_BLOCK] = 0; | 4605 | i_data[EXT4_DIND_BLOCK] = 0; |
4606 | } | 4606 | } |
4607 | case EXT4_DIND_BLOCK: | 4607 | case EXT4_DIND_BLOCK: |
4608 | nr = i_data[EXT4_TIND_BLOCK]; | 4608 | nr = i_data[EXT4_TIND_BLOCK]; |
4609 | if (nr) { | 4609 | if (nr) { |
4610 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); | 4610 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); |
4611 | i_data[EXT4_TIND_BLOCK] = 0; | 4611 | i_data[EXT4_TIND_BLOCK] = 0; |
4612 | } | 4612 | } |
4613 | case EXT4_TIND_BLOCK: | 4613 | case EXT4_TIND_BLOCK: |
4614 | ; | 4614 | ; |
4615 | } | 4615 | } |
4616 | 4616 | ||
4617 | out_unlock: | 4617 | out_unlock: |
4618 | up_write(&ei->i_data_sem); | 4618 | up_write(&ei->i_data_sem); |
4619 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4619 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4620 | ext4_mark_inode_dirty(handle, inode); | 4620 | ext4_mark_inode_dirty(handle, inode); |
4621 | 4621 | ||
4622 | /* | 4622 | /* |
4623 | * In a multi-transaction truncate, we only make the final transaction | 4623 | * In a multi-transaction truncate, we only make the final transaction |
4624 | * synchronous | 4624 | * synchronous |
4625 | */ | 4625 | */ |
4626 | if (IS_SYNC(inode)) | 4626 | if (IS_SYNC(inode)) |
4627 | ext4_handle_sync(handle); | 4627 | ext4_handle_sync(handle); |
4628 | out_stop: | 4628 | out_stop: |
4629 | /* | 4629 | /* |
4630 | * If this was a simple ftruncate(), and the file will remain alive | 4630 | * If this was a simple ftruncate(), and the file will remain alive |
4631 | * then we need to clear up the orphan record which we created above. | 4631 | * then we need to clear up the orphan record which we created above. |
4632 | * However, if this was a real unlink then we were called by | 4632 | * However, if this was a real unlink then we were called by |
4633 | * ext4_delete_inode(), and we allow that function to clean up the | 4633 | * ext4_delete_inode(), and we allow that function to clean up the |
4634 | * orphan info for us. | 4634 | * orphan info for us. |
4635 | */ | 4635 | */ |
4636 | if (inode->i_nlink) | 4636 | if (inode->i_nlink) |
4637 | ext4_orphan_del(handle, inode); | 4637 | ext4_orphan_del(handle, inode); |
4638 | 4638 | ||
4639 | ext4_journal_stop(handle); | 4639 | ext4_journal_stop(handle); |
4640 | trace_ext4_truncate_exit(inode); | 4640 | trace_ext4_truncate_exit(inode); |
4641 | } | 4641 | } |
4642 | 4642 | ||
4643 | /* | 4643 | /* |
4644 | * ext4_get_inode_loc returns with an extra refcount against the inode's | 4644 | * ext4_get_inode_loc returns with an extra refcount against the inode's |
4645 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 4645 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
4646 | * data in memory that is needed to recreate the on-disk version of this | 4646 | * data in memory that is needed to recreate the on-disk version of this |
4647 | * inode. | 4647 | * inode. |
4648 | */ | 4648 | */ |
4649 | static int __ext4_get_inode_loc(struct inode *inode, | 4649 | static int __ext4_get_inode_loc(struct inode *inode, |
4650 | struct ext4_iloc *iloc, int in_mem) | 4650 | struct ext4_iloc *iloc, int in_mem) |
4651 | { | 4651 | { |
4652 | struct ext4_group_desc *gdp; | 4652 | struct ext4_group_desc *gdp; |
4653 | struct buffer_head *bh; | 4653 | struct buffer_head *bh; |
4654 | struct super_block *sb = inode->i_sb; | 4654 | struct super_block *sb = inode->i_sb; |
4655 | ext4_fsblk_t block; | 4655 | ext4_fsblk_t block; |
4656 | int inodes_per_block, inode_offset; | 4656 | int inodes_per_block, inode_offset; |
4657 | 4657 | ||
4658 | iloc->bh = NULL; | 4658 | iloc->bh = NULL; |
4659 | if (!ext4_valid_inum(sb, inode->i_ino)) | 4659 | if (!ext4_valid_inum(sb, inode->i_ino)) |
4660 | return -EIO; | 4660 | return -EIO; |
4661 | 4661 | ||
4662 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); | 4662 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); |
4663 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); | 4663 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); |
4664 | if (!gdp) | 4664 | if (!gdp) |
4665 | return -EIO; | 4665 | return -EIO; |
4666 | 4666 | ||
4667 | /* | 4667 | /* |
4668 | * Figure out the offset within the block group inode table | 4668 | * Figure out the offset within the block group inode table |
4669 | */ | 4669 | */ |
4670 | inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; | 4670 | inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; |
4671 | inode_offset = ((inode->i_ino - 1) % | 4671 | inode_offset = ((inode->i_ino - 1) % |
4672 | EXT4_INODES_PER_GROUP(sb)); | 4672 | EXT4_INODES_PER_GROUP(sb)); |
4673 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | 4673 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); |
4674 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); | 4674 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); |
4675 | 4675 | ||
4676 | bh = sb_getblk(sb, block); | 4676 | bh = sb_getblk(sb, block); |
4677 | if (!bh) { | 4677 | if (!bh) { |
4678 | EXT4_ERROR_INODE_BLOCK(inode, block, | 4678 | EXT4_ERROR_INODE_BLOCK(inode, block, |
4679 | "unable to read itable block"); | 4679 | "unable to read itable block"); |
4680 | return -EIO; | 4680 | return -EIO; |
4681 | } | 4681 | } |
4682 | if (!buffer_uptodate(bh)) { | 4682 | if (!buffer_uptodate(bh)) { |
4683 | lock_buffer(bh); | 4683 | lock_buffer(bh); |
4684 | 4684 | ||
4685 | /* | 4685 | /* |
4686 | * If the buffer has the write error flag, we have failed | 4686 | * If the buffer has the write error flag, we have failed |
4687 | * to write out another inode in the same block. In this | 4687 | * to write out another inode in the same block. In this |
4688 | * case, we don't have to read the block because we may | 4688 | * case, we don't have to read the block because we may |
4689 | * read the old inode data successfully. | 4689 | * read the old inode data successfully. |
4690 | */ | 4690 | */ |
4691 | if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) | 4691 | if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) |
4692 | set_buffer_uptodate(bh); | 4692 | set_buffer_uptodate(bh); |
4693 | 4693 | ||
4694 | if (buffer_uptodate(bh)) { | 4694 | if (buffer_uptodate(bh)) { |
4695 | /* someone brought it uptodate while we waited */ | 4695 | /* someone brought it uptodate while we waited */ |
4696 | unlock_buffer(bh); | 4696 | unlock_buffer(bh); |
4697 | goto has_buffer; | 4697 | goto has_buffer; |
4698 | } | 4698 | } |
4699 | 4699 | ||
4700 | /* | 4700 | /* |
4701 | * If we have all information of the inode in memory and this | 4701 | * If we have all information of the inode in memory and this |
4702 | * is the only valid inode in the block, we need not read the | 4702 | * is the only valid inode in the block, we need not read the |
4703 | * block. | 4703 | * block. |
4704 | */ | 4704 | */ |
4705 | if (in_mem) { | 4705 | if (in_mem) { |
4706 | struct buffer_head *bitmap_bh; | 4706 | struct buffer_head *bitmap_bh; |
4707 | int i, start; | 4707 | int i, start; |
4708 | 4708 | ||
4709 | start = inode_offset & ~(inodes_per_block - 1); | 4709 | start = inode_offset & ~(inodes_per_block - 1); |
4710 | 4710 | ||
4711 | /* Is the inode bitmap in cache? */ | 4711 | /* Is the inode bitmap in cache? */ |
4712 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); | 4712 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); |
4713 | if (!bitmap_bh) | 4713 | if (!bitmap_bh) |
4714 | goto make_io; | 4714 | goto make_io; |
4715 | 4715 | ||
4716 | /* | 4716 | /* |
4717 | * If the inode bitmap isn't in cache then the | 4717 | * If the inode bitmap isn't in cache then the |
4718 | * optimisation may end up performing two reads instead | 4718 | * optimisation may end up performing two reads instead |
4719 | * of one, so skip it. | 4719 | * of one, so skip it. |
4720 | */ | 4720 | */ |
4721 | if (!buffer_uptodate(bitmap_bh)) { | 4721 | if (!buffer_uptodate(bitmap_bh)) { |
4722 | brelse(bitmap_bh); | 4722 | brelse(bitmap_bh); |
4723 | goto make_io; | 4723 | goto make_io; |
4724 | } | 4724 | } |
4725 | for (i = start; i < start + inodes_per_block; i++) { | 4725 | for (i = start; i < start + inodes_per_block; i++) { |
4726 | if (i == inode_offset) | 4726 | if (i == inode_offset) |
4727 | continue; | 4727 | continue; |
4728 | if (ext4_test_bit(i, bitmap_bh->b_data)) | 4728 | if (ext4_test_bit(i, bitmap_bh->b_data)) |
4729 | break; | 4729 | break; |
4730 | } | 4730 | } |
4731 | brelse(bitmap_bh); | 4731 | brelse(bitmap_bh); |
4732 | if (i == start + inodes_per_block) { | 4732 | if (i == start + inodes_per_block) { |
4733 | /* all other inodes are free, so skip I/O */ | 4733 | /* all other inodes are free, so skip I/O */ |
4734 | memset(bh->b_data, 0, bh->b_size); | 4734 | memset(bh->b_data, 0, bh->b_size); |
4735 | set_buffer_uptodate(bh); | 4735 | set_buffer_uptodate(bh); |
4736 | unlock_buffer(bh); | 4736 | unlock_buffer(bh); |
4737 | goto has_buffer; | 4737 | goto has_buffer; |
4738 | } | 4738 | } |
4739 | } | 4739 | } |
4740 | 4740 | ||
4741 | make_io: | 4741 | make_io: |
4742 | /* | 4742 | /* |
4743 | * If we need to do any I/O, try to pre-readahead extra | 4743 | * If we need to do any I/O, try to pre-readahead extra |
4744 | * blocks from the inode table. | 4744 | * blocks from the inode table. |
4745 | */ | 4745 | */ |
4746 | if (EXT4_SB(sb)->s_inode_readahead_blks) { | 4746 | if (EXT4_SB(sb)->s_inode_readahead_blks) { |
4747 | ext4_fsblk_t b, end, table; | 4747 | ext4_fsblk_t b, end, table; |
4748 | unsigned num; | 4748 | unsigned num; |
4749 | 4749 | ||
4750 | table = ext4_inode_table(sb, gdp); | 4750 | table = ext4_inode_table(sb, gdp); |
4751 | /* s_inode_readahead_blks is always a power of 2 */ | 4751 | /* s_inode_readahead_blks is always a power of 2 */ |
4752 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); | 4752 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); |
4753 | if (table > b) | 4753 | if (table > b) |
4754 | b = table; | 4754 | b = table; |
4755 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; | 4755 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; |
4756 | num = EXT4_INODES_PER_GROUP(sb); | 4756 | num = EXT4_INODES_PER_GROUP(sb); |
4757 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 4757 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
4758 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | 4758 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) |
4759 | num -= ext4_itable_unused_count(sb, gdp); | 4759 | num -= ext4_itable_unused_count(sb, gdp); |
4760 | table += num / inodes_per_block; | 4760 | table += num / inodes_per_block; |
4761 | if (end > table) | 4761 | if (end > table) |
4762 | end = table; | 4762 | end = table; |
4763 | while (b <= end) | 4763 | while (b <= end) |
4764 | sb_breadahead(sb, b++); | 4764 | sb_breadahead(sb, b++); |
4765 | } | 4765 | } |
4766 | 4766 | ||
4767 | /* | 4767 | /* |
4768 | * There are other valid inodes in the buffer, this inode | 4768 | * There are other valid inodes in the buffer, this inode |
4769 | * has in-inode xattrs, or we don't have this inode in memory. | 4769 | * has in-inode xattrs, or we don't have this inode in memory. |
4770 | * Read the block from disk. | 4770 | * Read the block from disk. |
4771 | */ | 4771 | */ |
4772 | trace_ext4_load_inode(inode); | 4772 | trace_ext4_load_inode(inode); |
4773 | get_bh(bh); | 4773 | get_bh(bh); |
4774 | bh->b_end_io = end_buffer_read_sync; | 4774 | bh->b_end_io = end_buffer_read_sync; |
4775 | submit_bh(READ_META, bh); | 4775 | submit_bh(READ_META, bh); |
4776 | wait_on_buffer(bh); | 4776 | wait_on_buffer(bh); |
4777 | if (!buffer_uptodate(bh)) { | 4777 | if (!buffer_uptodate(bh)) { |
4778 | EXT4_ERROR_INODE_BLOCK(inode, block, | 4778 | EXT4_ERROR_INODE_BLOCK(inode, block, |
4779 | "unable to read itable block"); | 4779 | "unable to read itable block"); |
4780 | brelse(bh); | 4780 | brelse(bh); |
4781 | return -EIO; | 4781 | return -EIO; |
4782 | } | 4782 | } |
4783 | } | 4783 | } |
4784 | has_buffer: | 4784 | has_buffer: |
4785 | iloc->bh = bh; | 4785 | iloc->bh = bh; |
4786 | return 0; | 4786 | return 0; |
4787 | } | 4787 | } |
4788 | 4788 | ||
4789 | int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) | 4789 | int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) |
4790 | { | 4790 | { |
4791 | /* We have all inode data except xattrs in memory here. */ | 4791 | /* We have all inode data except xattrs in memory here. */ |
4792 | return __ext4_get_inode_loc(inode, iloc, | 4792 | return __ext4_get_inode_loc(inode, iloc, |
4793 | !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); | 4793 | !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); |
4794 | } | 4794 | } |
4795 | 4795 | ||
4796 | void ext4_set_inode_flags(struct inode *inode) | 4796 | void ext4_set_inode_flags(struct inode *inode) |
4797 | { | 4797 | { |
4798 | unsigned int flags = EXT4_I(inode)->i_flags; | 4798 | unsigned int flags = EXT4_I(inode)->i_flags; |
4799 | 4799 | ||
4800 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); | 4800 | inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); |
4801 | if (flags & EXT4_SYNC_FL) | 4801 | if (flags & EXT4_SYNC_FL) |
4802 | inode->i_flags |= S_SYNC; | 4802 | inode->i_flags |= S_SYNC; |
4803 | if (flags & EXT4_APPEND_FL) | 4803 | if (flags & EXT4_APPEND_FL) |
4804 | inode->i_flags |= S_APPEND; | 4804 | inode->i_flags |= S_APPEND; |
4805 | if (flags & EXT4_IMMUTABLE_FL) | 4805 | if (flags & EXT4_IMMUTABLE_FL) |
4806 | inode->i_flags |= S_IMMUTABLE; | 4806 | inode->i_flags |= S_IMMUTABLE; |
4807 | if (flags & EXT4_NOATIME_FL) | 4807 | if (flags & EXT4_NOATIME_FL) |
4808 | inode->i_flags |= S_NOATIME; | 4808 | inode->i_flags |= S_NOATIME; |
4809 | if (flags & EXT4_DIRSYNC_FL) | 4809 | if (flags & EXT4_DIRSYNC_FL) |
4810 | inode->i_flags |= S_DIRSYNC; | 4810 | inode->i_flags |= S_DIRSYNC; |
4811 | } | 4811 | } |
4812 | 4812 | ||
4813 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ | 4813 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ |
4814 | void ext4_get_inode_flags(struct ext4_inode_info *ei) | 4814 | void ext4_get_inode_flags(struct ext4_inode_info *ei) |
4815 | { | 4815 | { |
4816 | unsigned int vfs_fl; | 4816 | unsigned int vfs_fl; |
4817 | unsigned long old_fl, new_fl; | 4817 | unsigned long old_fl, new_fl; |
4818 | 4818 | ||
4819 | do { | 4819 | do { |
4820 | vfs_fl = ei->vfs_inode.i_flags; | 4820 | vfs_fl = ei->vfs_inode.i_flags; |
4821 | old_fl = ei->i_flags; | 4821 | old_fl = ei->i_flags; |
4822 | new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL| | 4822 | new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL| |
4823 | EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL| | 4823 | EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL| |
4824 | EXT4_DIRSYNC_FL); | 4824 | EXT4_DIRSYNC_FL); |
4825 | if (vfs_fl & S_SYNC) | 4825 | if (vfs_fl & S_SYNC) |
4826 | new_fl |= EXT4_SYNC_FL; | 4826 | new_fl |= EXT4_SYNC_FL; |
4827 | if (vfs_fl & S_APPEND) | 4827 | if (vfs_fl & S_APPEND) |
4828 | new_fl |= EXT4_APPEND_FL; | 4828 | new_fl |= EXT4_APPEND_FL; |
4829 | if (vfs_fl & S_IMMUTABLE) | 4829 | if (vfs_fl & S_IMMUTABLE) |
4830 | new_fl |= EXT4_IMMUTABLE_FL; | 4830 | new_fl |= EXT4_IMMUTABLE_FL; |
4831 | if (vfs_fl & S_NOATIME) | 4831 | if (vfs_fl & S_NOATIME) |
4832 | new_fl |= EXT4_NOATIME_FL; | 4832 | new_fl |= EXT4_NOATIME_FL; |
4833 | if (vfs_fl & S_DIRSYNC) | 4833 | if (vfs_fl & S_DIRSYNC) |
4834 | new_fl |= EXT4_DIRSYNC_FL; | 4834 | new_fl |= EXT4_DIRSYNC_FL; |
4835 | } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl); | 4835 | } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl); |
4836 | } | 4836 | } |
4837 | 4837 | ||
4838 | static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | 4838 | static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, |
4839 | struct ext4_inode_info *ei) | 4839 | struct ext4_inode_info *ei) |
4840 | { | 4840 | { |
4841 | blkcnt_t i_blocks ; | 4841 | blkcnt_t i_blocks ; |
4842 | struct inode *inode = &(ei->vfs_inode); | 4842 | struct inode *inode = &(ei->vfs_inode); |
4843 | struct super_block *sb = inode->i_sb; | 4843 | struct super_block *sb = inode->i_sb; |
4844 | 4844 | ||
4845 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 4845 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
4846 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | 4846 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { |
4847 | /* we are using combined 48 bit field */ | 4847 | /* we are using combined 48 bit field */ |
4848 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | | 4848 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | |
4849 | le32_to_cpu(raw_inode->i_blocks_lo); | 4849 | le32_to_cpu(raw_inode->i_blocks_lo); |
4850 | if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { | 4850 | if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { |
4851 | /* i_blocks represent file system block size */ | 4851 | /* i_blocks represent file system block size */ |
4852 | return i_blocks << (inode->i_blkbits - 9); | 4852 | return i_blocks << (inode->i_blkbits - 9); |
4853 | } else { | 4853 | } else { |
4854 | return i_blocks; | 4854 | return i_blocks; |
4855 | } | 4855 | } |
4856 | } else { | 4856 | } else { |
4857 | return le32_to_cpu(raw_inode->i_blocks_lo); | 4857 | return le32_to_cpu(raw_inode->i_blocks_lo); |
4858 | } | 4858 | } |
4859 | } | 4859 | } |
4860 | 4860 | ||
4861 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | 4861 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) |
4862 | { | 4862 | { |
4863 | struct ext4_iloc iloc; | 4863 | struct ext4_iloc iloc; |
4864 | struct ext4_inode *raw_inode; | 4864 | struct ext4_inode *raw_inode; |
4865 | struct ext4_inode_info *ei; | 4865 | struct ext4_inode_info *ei; |
4866 | struct inode *inode; | 4866 | struct inode *inode; |
4867 | journal_t *journal = EXT4_SB(sb)->s_journal; | 4867 | journal_t *journal = EXT4_SB(sb)->s_journal; |
4868 | long ret; | 4868 | long ret; |
4869 | int block; | 4869 | int block; |
4870 | 4870 | ||
4871 | inode = iget_locked(sb, ino); | 4871 | inode = iget_locked(sb, ino); |
4872 | if (!inode) | 4872 | if (!inode) |
4873 | return ERR_PTR(-ENOMEM); | 4873 | return ERR_PTR(-ENOMEM); |
4874 | if (!(inode->i_state & I_NEW)) | 4874 | if (!(inode->i_state & I_NEW)) |
4875 | return inode; | 4875 | return inode; |
4876 | 4876 | ||
4877 | ei = EXT4_I(inode); | 4877 | ei = EXT4_I(inode); |
4878 | iloc.bh = NULL; | 4878 | iloc.bh = NULL; |
4879 | 4879 | ||
4880 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4880 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
4881 | if (ret < 0) | 4881 | if (ret < 0) |
4882 | goto bad_inode; | 4882 | goto bad_inode; |
4883 | raw_inode = ext4_raw_inode(&iloc); | 4883 | raw_inode = ext4_raw_inode(&iloc); |
4884 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 4884 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
4885 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 4885 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
4886 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 4886 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
4887 | if (!(test_opt(inode->i_sb, NO_UID32))) { | 4887 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
4888 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 4888 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
4889 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 4889 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
4890 | } | 4890 | } |
4891 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 4891 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
4892 | 4892 | ||
4893 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ | 4893 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
4894 | ei->i_dir_start_lookup = 0; | 4894 | ei->i_dir_start_lookup = 0; |
4895 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 4895 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
4896 | /* We now have enough fields to check if the inode was active or not. | 4896 | /* We now have enough fields to check if the inode was active or not. |
4897 | * This is needed because nfsd might try to access dead inodes | 4897 | * This is needed because nfsd might try to access dead inodes |
4898 | * the test is that same one that e2fsck uses | 4898 | * the test is that same one that e2fsck uses |
4899 | * NeilBrown 1999oct15 | 4899 | * NeilBrown 1999oct15 |
4900 | */ | 4900 | */ |
4901 | if (inode->i_nlink == 0) { | 4901 | if (inode->i_nlink == 0) { |
4902 | if (inode->i_mode == 0 || | 4902 | if (inode->i_mode == 0 || |
4903 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { | 4903 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { |
4904 | /* this inode is deleted */ | 4904 | /* this inode is deleted */ |
4905 | ret = -ESTALE; | 4905 | ret = -ESTALE; |
4906 | goto bad_inode; | 4906 | goto bad_inode; |
4907 | } | 4907 | } |
4908 | /* The only unlinked inodes we let through here have | 4908 | /* The only unlinked inodes we let through here have |
4909 | * valid i_mode and are being read by the orphan | 4909 | * valid i_mode and are being read by the orphan |
4910 | * recovery code: that's fine, we're about to complete | 4910 | * recovery code: that's fine, we're about to complete |
4911 | * the process of deleting those. */ | 4911 | * the process of deleting those. */ |
4912 | } | 4912 | } |
4913 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); | 4913 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); |
4914 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); | 4914 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); |
4915 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); | 4915 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); |
4916 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) | 4916 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) |
4917 | ei->i_file_acl |= | 4917 | ei->i_file_acl |= |
4918 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; | 4918 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; |
4919 | inode->i_size = ext4_isize(raw_inode); | 4919 | inode->i_size = ext4_isize(raw_inode); |
4920 | ei->i_disksize = inode->i_size; | 4920 | ei->i_disksize = inode->i_size; |
4921 | #ifdef CONFIG_QUOTA | 4921 | #ifdef CONFIG_QUOTA |
4922 | ei->i_reserved_quota = 0; | 4922 | ei->i_reserved_quota = 0; |
4923 | #endif | 4923 | #endif |
4924 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 4924 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
4925 | ei->i_block_group = iloc.block_group; | 4925 | ei->i_block_group = iloc.block_group; |
4926 | ei->i_last_alloc_group = ~0; | 4926 | ei->i_last_alloc_group = ~0; |
4927 | /* | 4927 | /* |
4928 | * NOTE! The in-memory inode i_data array is in little-endian order | 4928 | * NOTE! The in-memory inode i_data array is in little-endian order |
4929 | * even on big-endian machines: we do NOT byteswap the block numbers! | 4929 | * even on big-endian machines: we do NOT byteswap the block numbers! |
4930 | */ | 4930 | */ |
4931 | for (block = 0; block < EXT4_N_BLOCKS; block++) | 4931 | for (block = 0; block < EXT4_N_BLOCKS; block++) |
4932 | ei->i_data[block] = raw_inode->i_block[block]; | 4932 | ei->i_data[block] = raw_inode->i_block[block]; |
4933 | INIT_LIST_HEAD(&ei->i_orphan); | 4933 | INIT_LIST_HEAD(&ei->i_orphan); |
4934 | 4934 | ||
4935 | /* | 4935 | /* |
4936 | * Set transaction id's of transactions that have to be committed | 4936 | * Set transaction id's of transactions that have to be committed |
4937 | * to finish f[data]sync. We set them to currently running transaction | 4937 | * to finish f[data]sync. We set them to currently running transaction |
4938 | * as we cannot be sure that the inode or some of its metadata isn't | 4938 | * as we cannot be sure that the inode or some of its metadata isn't |
4939 | * part of the transaction - the inode could have been reclaimed and | 4939 | * part of the transaction - the inode could have been reclaimed and |
4940 | * now it is reread from disk. | 4940 | * now it is reread from disk. |
4941 | */ | 4941 | */ |
4942 | if (journal) { | 4942 | if (journal) { |
4943 | transaction_t *transaction; | 4943 | transaction_t *transaction; |
4944 | tid_t tid; | 4944 | tid_t tid; |
4945 | 4945 | ||
4946 | read_lock(&journal->j_state_lock); | 4946 | read_lock(&journal->j_state_lock); |
4947 | if (journal->j_running_transaction) | 4947 | if (journal->j_running_transaction) |
4948 | transaction = journal->j_running_transaction; | 4948 | transaction = journal->j_running_transaction; |
4949 | else | 4949 | else |
4950 | transaction = journal->j_committing_transaction; | 4950 | transaction = journal->j_committing_transaction; |
4951 | if (transaction) | 4951 | if (transaction) |
4952 | tid = transaction->t_tid; | 4952 | tid = transaction->t_tid; |
4953 | else | 4953 | else |
4954 | tid = journal->j_commit_sequence; | 4954 | tid = journal->j_commit_sequence; |
4955 | read_unlock(&journal->j_state_lock); | 4955 | read_unlock(&journal->j_state_lock); |
4956 | ei->i_sync_tid = tid; | 4956 | ei->i_sync_tid = tid; |
4957 | ei->i_datasync_tid = tid; | 4957 | ei->i_datasync_tid = tid; |
4958 | } | 4958 | } |
4959 | 4959 | ||
4960 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 4960 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
4961 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 4961 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
4962 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 4962 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
4963 | EXT4_INODE_SIZE(inode->i_sb)) { | 4963 | EXT4_INODE_SIZE(inode->i_sb)) { |
4964 | ret = -EIO; | 4964 | ret = -EIO; |
4965 | goto bad_inode; | 4965 | goto bad_inode; |
4966 | } | 4966 | } |
4967 | if (ei->i_extra_isize == 0) { | 4967 | if (ei->i_extra_isize == 0) { |
4968 | /* The extra space is currently unused. Use it. */ | 4968 | /* The extra space is currently unused. Use it. */ |
4969 | ei->i_extra_isize = sizeof(struct ext4_inode) - | 4969 | ei->i_extra_isize = sizeof(struct ext4_inode) - |
4970 | EXT4_GOOD_OLD_INODE_SIZE; | 4970 | EXT4_GOOD_OLD_INODE_SIZE; |
4971 | } else { | 4971 | } else { |
4972 | __le32 *magic = (void *)raw_inode + | 4972 | __le32 *magic = (void *)raw_inode + |
4973 | EXT4_GOOD_OLD_INODE_SIZE + | 4973 | EXT4_GOOD_OLD_INODE_SIZE + |
4974 | ei->i_extra_isize; | 4974 | ei->i_extra_isize; |
4975 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) | 4975 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) |
4976 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); | 4976 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); |
4977 | } | 4977 | } |
4978 | } else | 4978 | } else |
4979 | ei->i_extra_isize = 0; | 4979 | ei->i_extra_isize = 0; |
4980 | 4980 | ||
4981 | EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); | 4981 | EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); |
4982 | EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); | 4982 | EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); |
4983 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); | 4983 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); |
4984 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); | 4984 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); |
4985 | 4985 | ||
4986 | inode->i_version = le32_to_cpu(raw_inode->i_disk_version); | 4986 | inode->i_version = le32_to_cpu(raw_inode->i_disk_version); |
4987 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 4987 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
4988 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | 4988 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) |
4989 | inode->i_version |= | 4989 | inode->i_version |= |
4990 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | 4990 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; |
4991 | } | 4991 | } |
4992 | 4992 | ||
4993 | ret = 0; | 4993 | ret = 0; |
4994 | if (ei->i_file_acl && | 4994 | if (ei->i_file_acl && |
4995 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { | 4995 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { |
4996 | EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", | 4996 | EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", |
4997 | ei->i_file_acl); | 4997 | ei->i_file_acl); |
4998 | ret = -EIO; | 4998 | ret = -EIO; |
4999 | goto bad_inode; | 4999 | goto bad_inode; |
5000 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 5000 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
5001 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 5001 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
5002 | (S_ISLNK(inode->i_mode) && | 5002 | (S_ISLNK(inode->i_mode) && |
5003 | !ext4_inode_is_fast_symlink(inode))) | 5003 | !ext4_inode_is_fast_symlink(inode))) |
5004 | /* Validate extent which is part of inode */ | 5004 | /* Validate extent which is part of inode */ |
5005 | ret = ext4_ext_check_inode(inode); | 5005 | ret = ext4_ext_check_inode(inode); |
5006 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 5006 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
5007 | (S_ISLNK(inode->i_mode) && | 5007 | (S_ISLNK(inode->i_mode) && |
5008 | !ext4_inode_is_fast_symlink(inode))) { | 5008 | !ext4_inode_is_fast_symlink(inode))) { |
5009 | /* Validate block references which are part of inode */ | 5009 | /* Validate block references which are part of inode */ |
5010 | ret = ext4_check_inode_blockref(inode); | 5010 | ret = ext4_check_inode_blockref(inode); |
5011 | } | 5011 | } |
5012 | if (ret) | 5012 | if (ret) |
5013 | goto bad_inode; | 5013 | goto bad_inode; |
5014 | 5014 | ||
5015 | if (S_ISREG(inode->i_mode)) { | 5015 | if (S_ISREG(inode->i_mode)) { |
5016 | inode->i_op = &ext4_file_inode_operations; | 5016 | inode->i_op = &ext4_file_inode_operations; |
5017 | inode->i_fop = &ext4_file_operations; | 5017 | inode->i_fop = &ext4_file_operations; |
5018 | ext4_set_aops(inode); | 5018 | ext4_set_aops(inode); |
5019 | } else if (S_ISDIR(inode->i_mode)) { | 5019 | } else if (S_ISDIR(inode->i_mode)) { |
5020 | inode->i_op = &ext4_dir_inode_operations; | 5020 | inode->i_op = &ext4_dir_inode_operations; |
5021 | inode->i_fop = &ext4_dir_operations; | 5021 | inode->i_fop = &ext4_dir_operations; |
5022 | } else if (S_ISLNK(inode->i_mode)) { | 5022 | } else if (S_ISLNK(inode->i_mode)) { |
5023 | if (ext4_inode_is_fast_symlink(inode)) { | 5023 | if (ext4_inode_is_fast_symlink(inode)) { |
5024 | inode->i_op = &ext4_fast_symlink_inode_operations; | 5024 | inode->i_op = &ext4_fast_symlink_inode_operations; |
5025 | nd_terminate_link(ei->i_data, inode->i_size, | 5025 | nd_terminate_link(ei->i_data, inode->i_size, |
5026 | sizeof(ei->i_data) - 1); | 5026 | sizeof(ei->i_data) - 1); |
5027 | } else { | 5027 | } else { |
5028 | inode->i_op = &ext4_symlink_inode_operations; | 5028 | inode->i_op = &ext4_symlink_inode_operations; |
5029 | ext4_set_aops(inode); | 5029 | ext4_set_aops(inode); |
5030 | } | 5030 | } |
5031 | } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || | 5031 | } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || |
5032 | S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { | 5032 | S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { |
5033 | inode->i_op = &ext4_special_inode_operations; | 5033 | inode->i_op = &ext4_special_inode_operations; |
5034 | if (raw_inode->i_block[0]) | 5034 | if (raw_inode->i_block[0]) |
5035 | init_special_inode(inode, inode->i_mode, | 5035 | init_special_inode(inode, inode->i_mode, |
5036 | old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); | 5036 | old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); |
5037 | else | 5037 | else |
5038 | init_special_inode(inode, inode->i_mode, | 5038 | init_special_inode(inode, inode->i_mode, |
5039 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 5039 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
5040 | } else { | 5040 | } else { |
5041 | ret = -EIO; | 5041 | ret = -EIO; |
5042 | EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); | 5042 | EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); |
5043 | goto bad_inode; | 5043 | goto bad_inode; |
5044 | } | 5044 | } |
5045 | brelse(iloc.bh); | 5045 | brelse(iloc.bh); |
5046 | ext4_set_inode_flags(inode); | 5046 | ext4_set_inode_flags(inode); |
5047 | unlock_new_inode(inode); | 5047 | unlock_new_inode(inode); |
5048 | return inode; | 5048 | return inode; |
5049 | 5049 | ||
5050 | bad_inode: | 5050 | bad_inode: |
5051 | brelse(iloc.bh); | 5051 | brelse(iloc.bh); |
5052 | iget_failed(inode); | 5052 | iget_failed(inode); |
5053 | return ERR_PTR(ret); | 5053 | return ERR_PTR(ret); |
5054 | } | 5054 | } |
5055 | 5055 | ||
5056 | static int ext4_inode_blocks_set(handle_t *handle, | 5056 | static int ext4_inode_blocks_set(handle_t *handle, |
5057 | struct ext4_inode *raw_inode, | 5057 | struct ext4_inode *raw_inode, |
5058 | struct ext4_inode_info *ei) | 5058 | struct ext4_inode_info *ei) |
5059 | { | 5059 | { |
5060 | struct inode *inode = &(ei->vfs_inode); | 5060 | struct inode *inode = &(ei->vfs_inode); |
5061 | u64 i_blocks = inode->i_blocks; | 5061 | u64 i_blocks = inode->i_blocks; |
5062 | struct super_block *sb = inode->i_sb; | 5062 | struct super_block *sb = inode->i_sb; |
5063 | 5063 | ||
5064 | if (i_blocks <= ~0U) { | 5064 | if (i_blocks <= ~0U) { |
5065 | /* | 5065 | /* |
5066 | * i_blocks can be represnted in a 32 bit variable | 5066 | * i_blocks can be represnted in a 32 bit variable |
5067 | * as multiple of 512 bytes | 5067 | * as multiple of 512 bytes |
5068 | */ | 5068 | */ |
5069 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 5069 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
5070 | raw_inode->i_blocks_high = 0; | 5070 | raw_inode->i_blocks_high = 0; |
5071 | ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); | 5071 | ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); |
5072 | return 0; | 5072 | return 0; |
5073 | } | 5073 | } |
5074 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) | 5074 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) |
5075 | return -EFBIG; | 5075 | return -EFBIG; |
5076 | 5076 | ||
5077 | if (i_blocks <= 0xffffffffffffULL) { | 5077 | if (i_blocks <= 0xffffffffffffULL) { |
5078 | /* | 5078 | /* |
5079 | * i_blocks can be represented in a 48 bit variable | 5079 | * i_blocks can be represented in a 48 bit variable |
5080 | * as multiple of 512 bytes | 5080 | * as multiple of 512 bytes |
5081 | */ | 5081 | */ |
5082 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 5082 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
5083 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 5083 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
5084 | ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); | 5084 | ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); |
5085 | } else { | 5085 | } else { |
5086 | ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); | 5086 | ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); |
5087 | /* i_block is stored in file system block size */ | 5087 | /* i_block is stored in file system block size */ |
5088 | i_blocks = i_blocks >> (inode->i_blkbits - 9); | 5088 | i_blocks = i_blocks >> (inode->i_blkbits - 9); |
5089 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 5089 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
5090 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 5090 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
5091 | } | 5091 | } |
5092 | return 0; | 5092 | return 0; |
5093 | } | 5093 | } |
5094 | 5094 | ||
5095 | /* | 5095 | /* |
5096 | * Post the struct inode info into an on-disk inode location in the | 5096 | * Post the struct inode info into an on-disk inode location in the |
5097 | * buffer-cache. This gobbles the caller's reference to the | 5097 | * buffer-cache. This gobbles the caller's reference to the |
5098 | * buffer_head in the inode location struct. | 5098 | * buffer_head in the inode location struct. |
5099 | * | 5099 | * |
5100 | * The caller must have write access to iloc->bh. | 5100 | * The caller must have write access to iloc->bh. |
5101 | */ | 5101 | */ |
5102 | static int ext4_do_update_inode(handle_t *handle, | 5102 | static int ext4_do_update_inode(handle_t *handle, |
5103 | struct inode *inode, | 5103 | struct inode *inode, |
5104 | struct ext4_iloc *iloc) | 5104 | struct ext4_iloc *iloc) |
5105 | { | 5105 | { |
5106 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); | 5106 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); |
5107 | struct ext4_inode_info *ei = EXT4_I(inode); | 5107 | struct ext4_inode_info *ei = EXT4_I(inode); |
5108 | struct buffer_head *bh = iloc->bh; | 5108 | struct buffer_head *bh = iloc->bh; |
5109 | int err = 0, rc, block; | 5109 | int err = 0, rc, block; |
5110 | 5110 | ||
5111 | /* For fields not not tracking in the in-memory inode, | 5111 | /* For fields not not tracking in the in-memory inode, |
5112 | * initialise them to zero for new inodes. */ | 5112 | * initialise them to zero for new inodes. */ |
5113 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) | 5113 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) |
5114 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); | 5114 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); |
5115 | 5115 | ||
5116 | ext4_get_inode_flags(ei); | 5116 | ext4_get_inode_flags(ei); |
5117 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 5117 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
5118 | if (!(test_opt(inode->i_sb, NO_UID32))) { | 5118 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
5119 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); | 5119 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); |
5120 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); | 5120 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); |
5121 | /* | 5121 | /* |
5122 | * Fix up interoperability with old kernels. Otherwise, old inodes get | 5122 | * Fix up interoperability with old kernels. Otherwise, old inodes get |
5123 | * re-used with the upper 16 bits of the uid/gid intact | 5123 | * re-used with the upper 16 bits of the uid/gid intact |
5124 | */ | 5124 | */ |
5125 | if (!ei->i_dtime) { | 5125 | if (!ei->i_dtime) { |
5126 | raw_inode->i_uid_high = | 5126 | raw_inode->i_uid_high = |
5127 | cpu_to_le16(high_16_bits(inode->i_uid)); | 5127 | cpu_to_le16(high_16_bits(inode->i_uid)); |
5128 | raw_inode->i_gid_high = | 5128 | raw_inode->i_gid_high = |
5129 | cpu_to_le16(high_16_bits(inode->i_gid)); | 5129 | cpu_to_le16(high_16_bits(inode->i_gid)); |
5130 | } else { | 5130 | } else { |
5131 | raw_inode->i_uid_high = 0; | 5131 | raw_inode->i_uid_high = 0; |
5132 | raw_inode->i_gid_high = 0; | 5132 | raw_inode->i_gid_high = 0; |
5133 | } | 5133 | } |
5134 | } else { | 5134 | } else { |
5135 | raw_inode->i_uid_low = | 5135 | raw_inode->i_uid_low = |
5136 | cpu_to_le16(fs_high2lowuid(inode->i_uid)); | 5136 | cpu_to_le16(fs_high2lowuid(inode->i_uid)); |
5137 | raw_inode->i_gid_low = | 5137 | raw_inode->i_gid_low = |
5138 | cpu_to_le16(fs_high2lowgid(inode->i_gid)); | 5138 | cpu_to_le16(fs_high2lowgid(inode->i_gid)); |
5139 | raw_inode->i_uid_high = 0; | 5139 | raw_inode->i_uid_high = 0; |
5140 | raw_inode->i_gid_high = 0; | 5140 | raw_inode->i_gid_high = 0; |
5141 | } | 5141 | } |
5142 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | 5142 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); |
5143 | 5143 | ||
5144 | EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); | 5144 | EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); |
5145 | EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); | 5145 | EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); |
5146 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); | 5146 | EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); |
5147 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); | 5147 | EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); |
5148 | 5148 | ||
5149 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) | 5149 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
5150 | goto out_brelse; | 5150 | goto out_brelse; |
5151 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 5151 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
5152 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); | 5152 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); |
5153 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 5153 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
5154 | cpu_to_le32(EXT4_OS_HURD)) | 5154 | cpu_to_le32(EXT4_OS_HURD)) |
5155 | raw_inode->i_file_acl_high = | 5155 | raw_inode->i_file_acl_high = |
5156 | cpu_to_le16(ei->i_file_acl >> 32); | 5156 | cpu_to_le16(ei->i_file_acl >> 32); |
5157 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); | 5157 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); |
5158 | ext4_isize_set(raw_inode, ei->i_disksize); | 5158 | ext4_isize_set(raw_inode, ei->i_disksize); |
5159 | if (ei->i_disksize > 0x7fffffffULL) { | 5159 | if (ei->i_disksize > 0x7fffffffULL) { |
5160 | struct super_block *sb = inode->i_sb; | 5160 | struct super_block *sb = inode->i_sb; |
5161 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | 5161 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, |
5162 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || | 5162 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || |
5163 | EXT4_SB(sb)->s_es->s_rev_level == | 5163 | EXT4_SB(sb)->s_es->s_rev_level == |
5164 | cpu_to_le32(EXT4_GOOD_OLD_REV)) { | 5164 | cpu_to_le32(EXT4_GOOD_OLD_REV)) { |
5165 | /* If this is the first large file | 5165 | /* If this is the first large file |
5166 | * created, add a flag to the superblock. | 5166 | * created, add a flag to the superblock. |
5167 | */ | 5167 | */ |
5168 | err = ext4_journal_get_write_access(handle, | 5168 | err = ext4_journal_get_write_access(handle, |
5169 | EXT4_SB(sb)->s_sbh); | 5169 | EXT4_SB(sb)->s_sbh); |
5170 | if (err) | 5170 | if (err) |
5171 | goto out_brelse; | 5171 | goto out_brelse; |
5172 | ext4_update_dynamic_rev(sb); | 5172 | ext4_update_dynamic_rev(sb); |
5173 | EXT4_SET_RO_COMPAT_FEATURE(sb, | 5173 | EXT4_SET_RO_COMPAT_FEATURE(sb, |
5174 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 5174 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); |
5175 | sb->s_dirt = 1; | 5175 | sb->s_dirt = 1; |
5176 | ext4_handle_sync(handle); | 5176 | ext4_handle_sync(handle); |
5177 | err = ext4_handle_dirty_metadata(handle, NULL, | 5177 | err = ext4_handle_dirty_metadata(handle, NULL, |
5178 | EXT4_SB(sb)->s_sbh); | 5178 | EXT4_SB(sb)->s_sbh); |
5179 | } | 5179 | } |
5180 | } | 5180 | } |
5181 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | 5181 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
5182 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 5182 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { |
5183 | if (old_valid_dev(inode->i_rdev)) { | 5183 | if (old_valid_dev(inode->i_rdev)) { |
5184 | raw_inode->i_block[0] = | 5184 | raw_inode->i_block[0] = |
5185 | cpu_to_le32(old_encode_dev(inode->i_rdev)); | 5185 | cpu_to_le32(old_encode_dev(inode->i_rdev)); |
5186 | raw_inode->i_block[1] = 0; | 5186 | raw_inode->i_block[1] = 0; |
5187 | } else { | 5187 | } else { |
5188 | raw_inode->i_block[0] = 0; | 5188 | raw_inode->i_block[0] = 0; |
5189 | raw_inode->i_block[1] = | 5189 | raw_inode->i_block[1] = |
5190 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | 5190 | cpu_to_le32(new_encode_dev(inode->i_rdev)); |
5191 | raw_inode->i_block[2] = 0; | 5191 | raw_inode->i_block[2] = 0; |
5192 | } | 5192 | } |
5193 | } else | 5193 | } else |
5194 | for (block = 0; block < EXT4_N_BLOCKS; block++) | 5194 | for (block = 0; block < EXT4_N_BLOCKS; block++) |
5195 | raw_inode->i_block[block] = ei->i_data[block]; | 5195 | raw_inode->i_block[block] = ei->i_data[block]; |
5196 | 5196 | ||
5197 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); | 5197 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); |
5198 | if (ei->i_extra_isize) { | 5198 | if (ei->i_extra_isize) { |
5199 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | 5199 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) |
5200 | raw_inode->i_version_hi = | 5200 | raw_inode->i_version_hi = |
5201 | cpu_to_le32(inode->i_version >> 32); | 5201 | cpu_to_le32(inode->i_version >> 32); |
5202 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 5202 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
5203 | } | 5203 | } |
5204 | 5204 | ||
5205 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 5205 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
5206 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); | 5206 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); |
5207 | if (!err) | 5207 | if (!err) |
5208 | err = rc; | 5208 | err = rc; |
5209 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); | 5209 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); |
5210 | 5210 | ||
5211 | ext4_update_inode_fsync_trans(handle, inode, 0); | 5211 | ext4_update_inode_fsync_trans(handle, inode, 0); |
5212 | out_brelse: | 5212 | out_brelse: |
5213 | brelse(bh); | 5213 | brelse(bh); |
5214 | ext4_std_error(inode->i_sb, err); | 5214 | ext4_std_error(inode->i_sb, err); |
5215 | return err; | 5215 | return err; |
5216 | } | 5216 | } |
5217 | 5217 | ||
5218 | /* | 5218 | /* |
5219 | * ext4_write_inode() | 5219 | * ext4_write_inode() |
5220 | * | 5220 | * |
5221 | * We are called from a few places: | 5221 | * We are called from a few places: |
5222 | * | 5222 | * |
5223 | * - Within generic_file_write() for O_SYNC files. | 5223 | * - Within generic_file_write() for O_SYNC files. |
5224 | * Here, there will be no transaction running. We wait for any running | 5224 | * Here, there will be no transaction running. We wait for any running |
5225 | * trasnaction to commit. | 5225 | * trasnaction to commit. |
5226 | * | 5226 | * |
5227 | * - Within sys_sync(), kupdate and such. | 5227 | * - Within sys_sync(), kupdate and such. |
5228 | * We wait on commit, if tol to. | 5228 | * We wait on commit, if tol to. |
5229 | * | 5229 | * |
5230 | * - Within prune_icache() (PF_MEMALLOC == true) | 5230 | * - Within prune_icache() (PF_MEMALLOC == true) |
5231 | * Here we simply return. We can't afford to block kswapd on the | 5231 | * Here we simply return. We can't afford to block kswapd on the |
5232 | * journal commit. | 5232 | * journal commit. |
5233 | * | 5233 | * |
5234 | * In all cases it is actually safe for us to return without doing anything, | 5234 | * In all cases it is actually safe for us to return without doing anything, |
5235 | * because the inode has been copied into a raw inode buffer in | 5235 | * because the inode has been copied into a raw inode buffer in |
5236 | * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for | 5236 | * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for |
5237 | * knfsd. | 5237 | * knfsd. |
5238 | * | 5238 | * |
5239 | * Note that we are absolutely dependent upon all inode dirtiers doing the | 5239 | * Note that we are absolutely dependent upon all inode dirtiers doing the |
5240 | * right thing: they *must* call mark_inode_dirty() after dirtying info in | 5240 | * right thing: they *must* call mark_inode_dirty() after dirtying info in |
5241 | * which we are interested. | 5241 | * which we are interested. |
5242 | * | 5242 | * |
5243 | * It would be a bug for them to not do this. The code: | 5243 | * It would be a bug for them to not do this. The code: |
5244 | * | 5244 | * |
5245 | * mark_inode_dirty(inode) | 5245 | * mark_inode_dirty(inode) |
5246 | * stuff(); | 5246 | * stuff(); |
5247 | * inode->i_size = expr; | 5247 | * inode->i_size = expr; |
5248 | * | 5248 | * |
5249 | * is in error because a kswapd-driven write_inode() could occur while | 5249 | * is in error because a kswapd-driven write_inode() could occur while |
5250 | * `stuff()' is running, and the new i_size will be lost. Plus the inode | 5250 | * `stuff()' is running, and the new i_size will be lost. Plus the inode |
5251 | * will no longer be on the superblock's dirty inode list. | 5251 | * will no longer be on the superblock's dirty inode list. |
5252 | */ | 5252 | */ |
5253 | int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | 5253 | int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) |
5254 | { | 5254 | { |
5255 | int err; | 5255 | int err; |
5256 | 5256 | ||
5257 | if (current->flags & PF_MEMALLOC) | 5257 | if (current->flags & PF_MEMALLOC) |
5258 | return 0; | 5258 | return 0; |
5259 | 5259 | ||
5260 | if (EXT4_SB(inode->i_sb)->s_journal) { | 5260 | if (EXT4_SB(inode->i_sb)->s_journal) { |
5261 | if (ext4_journal_current_handle()) { | 5261 | if (ext4_journal_current_handle()) { |
5262 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); | 5262 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); |
5263 | dump_stack(); | 5263 | dump_stack(); |
5264 | return -EIO; | 5264 | return -EIO; |
5265 | } | 5265 | } |
5266 | 5266 | ||
5267 | if (wbc->sync_mode != WB_SYNC_ALL) | 5267 | if (wbc->sync_mode != WB_SYNC_ALL) |
5268 | return 0; | 5268 | return 0; |
5269 | 5269 | ||
5270 | err = ext4_force_commit(inode->i_sb); | 5270 | err = ext4_force_commit(inode->i_sb); |
5271 | } else { | 5271 | } else { |
5272 | struct ext4_iloc iloc; | 5272 | struct ext4_iloc iloc; |
5273 | 5273 | ||
5274 | err = __ext4_get_inode_loc(inode, &iloc, 0); | 5274 | err = __ext4_get_inode_loc(inode, &iloc, 0); |
5275 | if (err) | 5275 | if (err) |
5276 | return err; | 5276 | return err; |
5277 | if (wbc->sync_mode == WB_SYNC_ALL) | 5277 | if (wbc->sync_mode == WB_SYNC_ALL) |
5278 | sync_dirty_buffer(iloc.bh); | 5278 | sync_dirty_buffer(iloc.bh); |
5279 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5279 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5280 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, | 5280 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, |
5281 | "IO error syncing inode"); | 5281 | "IO error syncing inode"); |
5282 | err = -EIO; | 5282 | err = -EIO; |
5283 | } | 5283 | } |
5284 | brelse(iloc.bh); | 5284 | brelse(iloc.bh); |
5285 | } | 5285 | } |
5286 | return err; | 5286 | return err; |
5287 | } | 5287 | } |
5288 | 5288 | ||
5289 | /* | 5289 | /* |
5290 | * ext4_setattr() | 5290 | * ext4_setattr() |
5291 | * | 5291 | * |
5292 | * Called from notify_change. | 5292 | * Called from notify_change. |
5293 | * | 5293 | * |
5294 | * We want to trap VFS attempts to truncate the file as soon as | 5294 | * We want to trap VFS attempts to truncate the file as soon as |
5295 | * possible. In particular, we want to make sure that when the VFS | 5295 | * possible. In particular, we want to make sure that when the VFS |
5296 | * shrinks i_size, we put the inode on the orphan list and modify | 5296 | * shrinks i_size, we put the inode on the orphan list and modify |
5297 | * i_disksize immediately, so that during the subsequent flushing of | 5297 | * i_disksize immediately, so that during the subsequent flushing of |
5298 | * dirty pages and freeing of disk blocks, we can guarantee that any | 5298 | * dirty pages and freeing of disk blocks, we can guarantee that any |
5299 | * commit will leave the blocks being flushed in an unused state on | 5299 | * commit will leave the blocks being flushed in an unused state on |
5300 | * disk. (On recovery, the inode will get truncated and the blocks will | 5300 | * disk. (On recovery, the inode will get truncated and the blocks will |
5301 | * be freed, so we have a strong guarantee that no future commit will | 5301 | * be freed, so we have a strong guarantee that no future commit will |
5302 | * leave these blocks visible to the user.) | 5302 | * leave these blocks visible to the user.) |
5303 | * | 5303 | * |
5304 | * Another thing we have to assure is that if we are in ordered mode | 5304 | * Another thing we have to assure is that if we are in ordered mode |
5305 | * and inode is still attached to the committing transaction, we must | 5305 | * and inode is still attached to the committing transaction, we must |
5306 | * we start writeout of all the dirty pages which are being truncated. | 5306 | * we start writeout of all the dirty pages which are being truncated. |
5307 | * This way we are sure that all the data written in the previous | 5307 | * This way we are sure that all the data written in the previous |
5308 | * transaction are already on disk (truncate waits for pages under | 5308 | * transaction are already on disk (truncate waits for pages under |
5309 | * writeback). | 5309 | * writeback). |
5310 | * | 5310 | * |
5311 | * Called with inode->i_mutex down. | 5311 | * Called with inode->i_mutex down. |
5312 | */ | 5312 | */ |
5313 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) | 5313 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) |
5314 | { | 5314 | { |
5315 | struct inode *inode = dentry->d_inode; | 5315 | struct inode *inode = dentry->d_inode; |
5316 | int error, rc = 0; | 5316 | int error, rc = 0; |
5317 | int orphan = 0; | 5317 | int orphan = 0; |
5318 | const unsigned int ia_valid = attr->ia_valid; | 5318 | const unsigned int ia_valid = attr->ia_valid; |
5319 | 5319 | ||
5320 | error = inode_change_ok(inode, attr); | 5320 | error = inode_change_ok(inode, attr); |
5321 | if (error) | 5321 | if (error) |
5322 | return error; | 5322 | return error; |
5323 | 5323 | ||
5324 | if (is_quota_modification(inode, attr)) | 5324 | if (is_quota_modification(inode, attr)) |
5325 | dquot_initialize(inode); | 5325 | dquot_initialize(inode); |
5326 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 5326 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
5327 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 5327 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
5328 | handle_t *handle; | 5328 | handle_t *handle; |
5329 | 5329 | ||
5330 | /* (user+group)*(old+new) structure, inode write (sb, | 5330 | /* (user+group)*(old+new) structure, inode write (sb, |
5331 | * inode block, ? - but truncate inode update has it) */ | 5331 | * inode block, ? - but truncate inode update has it) */ |
5332 | handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ | 5332 | handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ |
5333 | EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); | 5333 | EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); |
5334 | if (IS_ERR(handle)) { | 5334 | if (IS_ERR(handle)) { |
5335 | error = PTR_ERR(handle); | 5335 | error = PTR_ERR(handle); |
5336 | goto err_out; | 5336 | goto err_out; |
5337 | } | 5337 | } |
5338 | error = dquot_transfer(inode, attr); | 5338 | error = dquot_transfer(inode, attr); |
5339 | if (error) { | 5339 | if (error) { |
5340 | ext4_journal_stop(handle); | 5340 | ext4_journal_stop(handle); |
5341 | return error; | 5341 | return error; |
5342 | } | 5342 | } |
5343 | /* Update corresponding info in inode so that everything is in | 5343 | /* Update corresponding info in inode so that everything is in |
5344 | * one transaction */ | 5344 | * one transaction */ |
5345 | if (attr->ia_valid & ATTR_UID) | 5345 | if (attr->ia_valid & ATTR_UID) |
5346 | inode->i_uid = attr->ia_uid; | 5346 | inode->i_uid = attr->ia_uid; |
5347 | if (attr->ia_valid & ATTR_GID) | 5347 | if (attr->ia_valid & ATTR_GID) |
5348 | inode->i_gid = attr->ia_gid; | 5348 | inode->i_gid = attr->ia_gid; |
5349 | error = ext4_mark_inode_dirty(handle, inode); | 5349 | error = ext4_mark_inode_dirty(handle, inode); |
5350 | ext4_journal_stop(handle); | 5350 | ext4_journal_stop(handle); |
5351 | } | 5351 | } |
5352 | 5352 | ||
5353 | if (attr->ia_valid & ATTR_SIZE) { | 5353 | if (attr->ia_valid & ATTR_SIZE) { |
5354 | inode_dio_wait(inode); | ||
5355 | |||
5354 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 5356 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
5355 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5357 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
5356 | 5358 | ||
5357 | if (attr->ia_size > sbi->s_bitmap_maxbytes) | 5359 | if (attr->ia_size > sbi->s_bitmap_maxbytes) |
5358 | return -EFBIG; | 5360 | return -EFBIG; |
5359 | } | 5361 | } |
5360 | } | 5362 | } |
5361 | 5363 | ||
5362 | if (S_ISREG(inode->i_mode) && | 5364 | if (S_ISREG(inode->i_mode) && |
5363 | attr->ia_valid & ATTR_SIZE && | 5365 | attr->ia_valid & ATTR_SIZE && |
5364 | (attr->ia_size < inode->i_size)) { | 5366 | (attr->ia_size < inode->i_size)) { |
5365 | handle_t *handle; | 5367 | handle_t *handle; |
5366 | 5368 | ||
5367 | handle = ext4_journal_start(inode, 3); | 5369 | handle = ext4_journal_start(inode, 3); |
5368 | if (IS_ERR(handle)) { | 5370 | if (IS_ERR(handle)) { |
5369 | error = PTR_ERR(handle); | 5371 | error = PTR_ERR(handle); |
5370 | goto err_out; | 5372 | goto err_out; |
5371 | } | 5373 | } |
5372 | if (ext4_handle_valid(handle)) { | 5374 | if (ext4_handle_valid(handle)) { |
5373 | error = ext4_orphan_add(handle, inode); | 5375 | error = ext4_orphan_add(handle, inode); |
5374 | orphan = 1; | 5376 | orphan = 1; |
5375 | } | 5377 | } |
5376 | EXT4_I(inode)->i_disksize = attr->ia_size; | 5378 | EXT4_I(inode)->i_disksize = attr->ia_size; |
5377 | rc = ext4_mark_inode_dirty(handle, inode); | 5379 | rc = ext4_mark_inode_dirty(handle, inode); |
5378 | if (!error) | 5380 | if (!error) |
5379 | error = rc; | 5381 | error = rc; |
5380 | ext4_journal_stop(handle); | 5382 | ext4_journal_stop(handle); |
5381 | 5383 | ||
5382 | if (ext4_should_order_data(inode)) { | 5384 | if (ext4_should_order_data(inode)) { |
5383 | error = ext4_begin_ordered_truncate(inode, | 5385 | error = ext4_begin_ordered_truncate(inode, |
5384 | attr->ia_size); | 5386 | attr->ia_size); |
5385 | if (error) { | 5387 | if (error) { |
5386 | /* Do as much error cleanup as possible */ | 5388 | /* Do as much error cleanup as possible */ |
5387 | handle = ext4_journal_start(inode, 3); | 5389 | handle = ext4_journal_start(inode, 3); |
5388 | if (IS_ERR(handle)) { | 5390 | if (IS_ERR(handle)) { |
5389 | ext4_orphan_del(NULL, inode); | 5391 | ext4_orphan_del(NULL, inode); |
5390 | goto err_out; | 5392 | goto err_out; |
5391 | } | 5393 | } |
5392 | ext4_orphan_del(handle, inode); | 5394 | ext4_orphan_del(handle, inode); |
5393 | orphan = 0; | 5395 | orphan = 0; |
5394 | ext4_journal_stop(handle); | 5396 | ext4_journal_stop(handle); |
5395 | goto err_out; | 5397 | goto err_out; |
5396 | } | 5398 | } |
5397 | } | 5399 | } |
5398 | } | 5400 | } |
5399 | 5401 | ||
5400 | if (attr->ia_valid & ATTR_SIZE) { | 5402 | if (attr->ia_valid & ATTR_SIZE) { |
5401 | if (attr->ia_size != i_size_read(inode)) { | 5403 | if (attr->ia_size != i_size_read(inode)) { |
5402 | truncate_setsize(inode, attr->ia_size); | 5404 | truncate_setsize(inode, attr->ia_size); |
5403 | ext4_truncate(inode); | 5405 | ext4_truncate(inode); |
5404 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | 5406 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) |
5405 | ext4_truncate(inode); | 5407 | ext4_truncate(inode); |
5406 | } | 5408 | } |
5407 | 5409 | ||
5408 | if (!rc) { | 5410 | if (!rc) { |
5409 | setattr_copy(inode, attr); | 5411 | setattr_copy(inode, attr); |
5410 | mark_inode_dirty(inode); | 5412 | mark_inode_dirty(inode); |
5411 | } | 5413 | } |
5412 | 5414 | ||
5413 | /* | 5415 | /* |
5414 | * If the call to ext4_truncate failed to get a transaction handle at | 5416 | * If the call to ext4_truncate failed to get a transaction handle at |
5415 | * all, we need to clean up the in-core orphan list manually. | 5417 | * all, we need to clean up the in-core orphan list manually. |
5416 | */ | 5418 | */ |
5417 | if (orphan && inode->i_nlink) | 5419 | if (orphan && inode->i_nlink) |
5418 | ext4_orphan_del(NULL, inode); | 5420 | ext4_orphan_del(NULL, inode); |
5419 | 5421 | ||
5420 | if (!rc && (ia_valid & ATTR_MODE)) | 5422 | if (!rc && (ia_valid & ATTR_MODE)) |
5421 | rc = ext4_acl_chmod(inode); | 5423 | rc = ext4_acl_chmod(inode); |
5422 | 5424 | ||
5423 | err_out: | 5425 | err_out: |
5424 | ext4_std_error(inode->i_sb, error); | 5426 | ext4_std_error(inode->i_sb, error); |
5425 | if (!error) | 5427 | if (!error) |
5426 | error = rc; | 5428 | error = rc; |
5427 | return error; | 5429 | return error; |
5428 | } | 5430 | } |
5429 | 5431 | ||
5430 | int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 5432 | int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
5431 | struct kstat *stat) | 5433 | struct kstat *stat) |
5432 | { | 5434 | { |
5433 | struct inode *inode; | 5435 | struct inode *inode; |
5434 | unsigned long delalloc_blocks; | 5436 | unsigned long delalloc_blocks; |
5435 | 5437 | ||
5436 | inode = dentry->d_inode; | 5438 | inode = dentry->d_inode; |
5437 | generic_fillattr(inode, stat); | 5439 | generic_fillattr(inode, stat); |
5438 | 5440 | ||
5439 | /* | 5441 | /* |
5440 | * We can't update i_blocks if the block allocation is delayed | 5442 | * We can't update i_blocks if the block allocation is delayed |
5441 | * otherwise in the case of system crash before the real block | 5443 | * otherwise in the case of system crash before the real block |
5442 | * allocation is done, we will have i_blocks inconsistent with | 5444 | * allocation is done, we will have i_blocks inconsistent with |
5443 | * on-disk file blocks. | 5445 | * on-disk file blocks. |
5444 | * We always keep i_blocks updated together with real | 5446 | * We always keep i_blocks updated together with real |
5445 | * allocation. But to not confuse with user, stat | 5447 | * allocation. But to not confuse with user, stat |
5446 | * will return the blocks that include the delayed allocation | 5448 | * will return the blocks that include the delayed allocation |
5447 | * blocks for this file. | 5449 | * blocks for this file. |
5448 | */ | 5450 | */ |
5449 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 5451 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; |
5450 | 5452 | ||
5451 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 5453 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; |
5452 | return 0; | 5454 | return 0; |
5453 | } | 5455 | } |
5454 | 5456 | ||
5455 | static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | 5457 | static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, |
5456 | int chunk) | 5458 | int chunk) |
5457 | { | 5459 | { |
5458 | int indirects; | 5460 | int indirects; |
5459 | 5461 | ||
5460 | /* if nrblocks are contiguous */ | 5462 | /* if nrblocks are contiguous */ |
5461 | if (chunk) { | 5463 | if (chunk) { |
5462 | /* | 5464 | /* |
5463 | * With N contiguous data blocks, we need at most | 5465 | * With N contiguous data blocks, we need at most |
5464 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | 5466 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
5465 | * 2 dindirect blocks, and 1 tindirect block | 5467 | * 2 dindirect blocks, and 1 tindirect block |
5466 | */ | 5468 | */ |
5467 | return DIV_ROUND_UP(nrblocks, | 5469 | return DIV_ROUND_UP(nrblocks, |
5468 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | 5470 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
5469 | } | 5471 | } |
5470 | /* | 5472 | /* |
5471 | * if nrblocks are not contiguous, worse case, each block touch | 5473 | * if nrblocks are not contiguous, worse case, each block touch |
5472 | * a indirect block, and each indirect block touch a double indirect | 5474 | * a indirect block, and each indirect block touch a double indirect |
5473 | * block, plus a triple indirect block | 5475 | * block, plus a triple indirect block |
5474 | */ | 5476 | */ |
5475 | indirects = nrblocks * 2 + 1; | 5477 | indirects = nrblocks * 2 + 1; |
5476 | return indirects; | 5478 | return indirects; |
5477 | } | 5479 | } |
5478 | 5480 | ||
5479 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5481 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5480 | { | 5482 | { |
5481 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 5483 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
5482 | return ext4_indirect_trans_blocks(inode, nrblocks, chunk); | 5484 | return ext4_indirect_trans_blocks(inode, nrblocks, chunk); |
5483 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); | 5485 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); |
5484 | } | 5486 | } |
5485 | 5487 | ||
5486 | /* | 5488 | /* |
5487 | * Account for index blocks, block groups bitmaps and block group | 5489 | * Account for index blocks, block groups bitmaps and block group |
5488 | * descriptor blocks if modify datablocks and index blocks | 5490 | * descriptor blocks if modify datablocks and index blocks |
5489 | * worse case, the indexs blocks spread over different block groups | 5491 | * worse case, the indexs blocks spread over different block groups |
5490 | * | 5492 | * |
5491 | * If datablocks are discontiguous, they are possible to spread over | 5493 | * If datablocks are discontiguous, they are possible to spread over |
5492 | * different block groups too. If they are contiuguous, with flexbg, | 5494 | * different block groups too. If they are contiuguous, with flexbg, |
5493 | * they could still across block group boundary. | 5495 | * they could still across block group boundary. |
5494 | * | 5496 | * |
5495 | * Also account for superblock, inode, quota and xattr blocks | 5497 | * Also account for superblock, inode, quota and xattr blocks |
5496 | */ | 5498 | */ |
5497 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5499 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5498 | { | 5500 | { |
5499 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 5501 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5500 | int gdpblocks; | 5502 | int gdpblocks; |
5501 | int idxblocks; | 5503 | int idxblocks; |
5502 | int ret = 0; | 5504 | int ret = 0; |
5503 | 5505 | ||
5504 | /* | 5506 | /* |
5505 | * How many index blocks need to touch to modify nrblocks? | 5507 | * How many index blocks need to touch to modify nrblocks? |
5506 | * The "Chunk" flag indicating whether the nrblocks is | 5508 | * The "Chunk" flag indicating whether the nrblocks is |
5507 | * physically contiguous on disk | 5509 | * physically contiguous on disk |
5508 | * | 5510 | * |
5509 | * For Direct IO and fallocate, they calls get_block to allocate | 5511 | * For Direct IO and fallocate, they calls get_block to allocate |
5510 | * one single extent at a time, so they could set the "Chunk" flag | 5512 | * one single extent at a time, so they could set the "Chunk" flag |
5511 | */ | 5513 | */ |
5512 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | 5514 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); |
5513 | 5515 | ||
5514 | ret = idxblocks; | 5516 | ret = idxblocks; |
5515 | 5517 | ||
5516 | /* | 5518 | /* |
5517 | * Now let's see how many group bitmaps and group descriptors need | 5519 | * Now let's see how many group bitmaps and group descriptors need |
5518 | * to account | 5520 | * to account |
5519 | */ | 5521 | */ |
5520 | groups = idxblocks; | 5522 | groups = idxblocks; |
5521 | if (chunk) | 5523 | if (chunk) |
5522 | groups += 1; | 5524 | groups += 1; |
5523 | else | 5525 | else |
5524 | groups += nrblocks; | 5526 | groups += nrblocks; |
5525 | 5527 | ||
5526 | gdpblocks = groups; | 5528 | gdpblocks = groups; |
5527 | if (groups > ngroups) | 5529 | if (groups > ngroups) |
5528 | groups = ngroups; | 5530 | groups = ngroups; |
5529 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | 5531 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) |
5530 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | 5532 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; |
5531 | 5533 | ||
5532 | /* bitmaps and block group descriptor blocks */ | 5534 | /* bitmaps and block group descriptor blocks */ |
5533 | ret += groups + gdpblocks; | 5535 | ret += groups + gdpblocks; |
5534 | 5536 | ||
5535 | /* Blocks for super block, inode, quota and xattr blocks */ | 5537 | /* Blocks for super block, inode, quota and xattr blocks */ |
5536 | ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); | 5538 | ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); |
5537 | 5539 | ||
5538 | return ret; | 5540 | return ret; |
5539 | } | 5541 | } |
5540 | 5542 | ||
5541 | /* | 5543 | /* |
5542 | * Calculate the total number of credits to reserve to fit | 5544 | * Calculate the total number of credits to reserve to fit |
5543 | * the modification of a single pages into a single transaction, | 5545 | * the modification of a single pages into a single transaction, |
5544 | * which may include multiple chunks of block allocations. | 5546 | * which may include multiple chunks of block allocations. |
5545 | * | 5547 | * |
5546 | * This could be called via ext4_write_begin() | 5548 | * This could be called via ext4_write_begin() |
5547 | * | 5549 | * |
5548 | * We need to consider the worse case, when | 5550 | * We need to consider the worse case, when |
5549 | * one new block per extent. | 5551 | * one new block per extent. |
5550 | */ | 5552 | */ |
5551 | int ext4_writepage_trans_blocks(struct inode *inode) | 5553 | int ext4_writepage_trans_blocks(struct inode *inode) |
5552 | { | 5554 | { |
5553 | int bpp = ext4_journal_blocks_per_page(inode); | 5555 | int bpp = ext4_journal_blocks_per_page(inode); |
5554 | int ret; | 5556 | int ret; |
5555 | 5557 | ||
5556 | ret = ext4_meta_trans_blocks(inode, bpp, 0); | 5558 | ret = ext4_meta_trans_blocks(inode, bpp, 0); |
5557 | 5559 | ||
5558 | /* Account for data blocks for journalled mode */ | 5560 | /* Account for data blocks for journalled mode */ |
5559 | if (ext4_should_journal_data(inode)) | 5561 | if (ext4_should_journal_data(inode)) |
5560 | ret += bpp; | 5562 | ret += bpp; |
5561 | return ret; | 5563 | return ret; |
5562 | } | 5564 | } |
5563 | 5565 | ||
5564 | /* | 5566 | /* |
5565 | * Calculate the journal credits for a chunk of data modification. | 5567 | * Calculate the journal credits for a chunk of data modification. |
5566 | * | 5568 | * |
5567 | * This is called from DIO, fallocate or whoever calling | 5569 | * This is called from DIO, fallocate or whoever calling |
5568 | * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. | 5570 | * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. |
5569 | * | 5571 | * |
5570 | * journal buffers for data blocks are not included here, as DIO | 5572 | * journal buffers for data blocks are not included here, as DIO |
5571 | * and fallocate do no need to journal data buffers. | 5573 | * and fallocate do no need to journal data buffers. |
5572 | */ | 5574 | */ |
5573 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) | 5575 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) |
5574 | { | 5576 | { |
5575 | return ext4_meta_trans_blocks(inode, nrblocks, 1); | 5577 | return ext4_meta_trans_blocks(inode, nrblocks, 1); |
5576 | } | 5578 | } |
5577 | 5579 | ||
5578 | /* | 5580 | /* |
5579 | * The caller must have previously called ext4_reserve_inode_write(). | 5581 | * The caller must have previously called ext4_reserve_inode_write(). |
5580 | * Give this, we know that the caller already has write access to iloc->bh. | 5582 | * Give this, we know that the caller already has write access to iloc->bh. |
5581 | */ | 5583 | */ |
5582 | int ext4_mark_iloc_dirty(handle_t *handle, | 5584 | int ext4_mark_iloc_dirty(handle_t *handle, |
5583 | struct inode *inode, struct ext4_iloc *iloc) | 5585 | struct inode *inode, struct ext4_iloc *iloc) |
5584 | { | 5586 | { |
5585 | int err = 0; | 5587 | int err = 0; |
5586 | 5588 | ||
5587 | if (test_opt(inode->i_sb, I_VERSION)) | 5589 | if (test_opt(inode->i_sb, I_VERSION)) |
5588 | inode_inc_iversion(inode); | 5590 | inode_inc_iversion(inode); |
5589 | 5591 | ||
5590 | /* the do_update_inode consumes one bh->b_count */ | 5592 | /* the do_update_inode consumes one bh->b_count */ |
5591 | get_bh(iloc->bh); | 5593 | get_bh(iloc->bh); |
5592 | 5594 | ||
5593 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ | 5595 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ |
5594 | err = ext4_do_update_inode(handle, inode, iloc); | 5596 | err = ext4_do_update_inode(handle, inode, iloc); |
5595 | put_bh(iloc->bh); | 5597 | put_bh(iloc->bh); |
5596 | return err; | 5598 | return err; |
5597 | } | 5599 | } |
5598 | 5600 | ||
5599 | /* | 5601 | /* |
5600 | * On success, We end up with an outstanding reference count against | 5602 | * On success, We end up with an outstanding reference count against |
5601 | * iloc->bh. This _must_ be cleaned up later. | 5603 | * iloc->bh. This _must_ be cleaned up later. |
5602 | */ | 5604 | */ |
5603 | 5605 | ||
5604 | int | 5606 | int |
5605 | ext4_reserve_inode_write(handle_t *handle, struct inode *inode, | 5607 | ext4_reserve_inode_write(handle_t *handle, struct inode *inode, |
5606 | struct ext4_iloc *iloc) | 5608 | struct ext4_iloc *iloc) |
5607 | { | 5609 | { |
5608 | int err; | 5610 | int err; |
5609 | 5611 | ||
5610 | err = ext4_get_inode_loc(inode, iloc); | 5612 | err = ext4_get_inode_loc(inode, iloc); |
5611 | if (!err) { | 5613 | if (!err) { |
5612 | BUFFER_TRACE(iloc->bh, "get_write_access"); | 5614 | BUFFER_TRACE(iloc->bh, "get_write_access"); |
5613 | err = ext4_journal_get_write_access(handle, iloc->bh); | 5615 | err = ext4_journal_get_write_access(handle, iloc->bh); |
5614 | if (err) { | 5616 | if (err) { |
5615 | brelse(iloc->bh); | 5617 | brelse(iloc->bh); |
5616 | iloc->bh = NULL; | 5618 | iloc->bh = NULL; |
5617 | } | 5619 | } |
5618 | } | 5620 | } |
5619 | ext4_std_error(inode->i_sb, err); | 5621 | ext4_std_error(inode->i_sb, err); |
5620 | return err; | 5622 | return err; |
5621 | } | 5623 | } |
5622 | 5624 | ||
5623 | /* | 5625 | /* |
5624 | * Expand an inode by new_extra_isize bytes. | 5626 | * Expand an inode by new_extra_isize bytes. |
5625 | * Returns 0 on success or negative error number on failure. | 5627 | * Returns 0 on success or negative error number on failure. |
5626 | */ | 5628 | */ |
5627 | static int ext4_expand_extra_isize(struct inode *inode, | 5629 | static int ext4_expand_extra_isize(struct inode *inode, |
5628 | unsigned int new_extra_isize, | 5630 | unsigned int new_extra_isize, |
5629 | struct ext4_iloc iloc, | 5631 | struct ext4_iloc iloc, |
5630 | handle_t *handle) | 5632 | handle_t *handle) |
5631 | { | 5633 | { |
5632 | struct ext4_inode *raw_inode; | 5634 | struct ext4_inode *raw_inode; |
5633 | struct ext4_xattr_ibody_header *header; | 5635 | struct ext4_xattr_ibody_header *header; |
5634 | 5636 | ||
5635 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) | 5637 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) |
5636 | return 0; | 5638 | return 0; |
5637 | 5639 | ||
5638 | raw_inode = ext4_raw_inode(&iloc); | 5640 | raw_inode = ext4_raw_inode(&iloc); |
5639 | 5641 | ||
5640 | header = IHDR(inode, raw_inode); | 5642 | header = IHDR(inode, raw_inode); |
5641 | 5643 | ||
5642 | /* No extended attributes present */ | 5644 | /* No extended attributes present */ |
5643 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || | 5645 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || |
5644 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { | 5646 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { |
5645 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, | 5647 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, |
5646 | new_extra_isize); | 5648 | new_extra_isize); |
5647 | EXT4_I(inode)->i_extra_isize = new_extra_isize; | 5649 | EXT4_I(inode)->i_extra_isize = new_extra_isize; |
5648 | return 0; | 5650 | return 0; |
5649 | } | 5651 | } |
5650 | 5652 | ||
5651 | /* try to expand with EAs present */ | 5653 | /* try to expand with EAs present */ |
5652 | return ext4_expand_extra_isize_ea(inode, new_extra_isize, | 5654 | return ext4_expand_extra_isize_ea(inode, new_extra_isize, |
5653 | raw_inode, handle); | 5655 | raw_inode, handle); |
5654 | } | 5656 | } |
5655 | 5657 | ||
5656 | /* | 5658 | /* |
5657 | * What we do here is to mark the in-core inode as clean with respect to inode | 5659 | * What we do here is to mark the in-core inode as clean with respect to inode |
5658 | * dirtiness (it may still be data-dirty). | 5660 | * dirtiness (it may still be data-dirty). |
5659 | * This means that the in-core inode may be reaped by prune_icache | 5661 | * This means that the in-core inode may be reaped by prune_icache |
5660 | * without having to perform any I/O. This is a very good thing, | 5662 | * without having to perform any I/O. This is a very good thing, |
5661 | * because *any* task may call prune_icache - even ones which | 5663 | * because *any* task may call prune_icache - even ones which |
5662 | * have a transaction open against a different journal. | 5664 | * have a transaction open against a different journal. |
5663 | * | 5665 | * |
5664 | * Is this cheating? Not really. Sure, we haven't written the | 5666 | * Is this cheating? Not really. Sure, we haven't written the |
5665 | * inode out, but prune_icache isn't a user-visible syncing function. | 5667 | * inode out, but prune_icache isn't a user-visible syncing function. |
5666 | * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) | 5668 | * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) |
5667 | * we start and wait on commits. | 5669 | * we start and wait on commits. |
5668 | * | 5670 | * |
5669 | * Is this efficient/effective? Well, we're being nice to the system | 5671 | * Is this efficient/effective? Well, we're being nice to the system |
5670 | * by cleaning up our inodes proactively so they can be reaped | 5672 | * by cleaning up our inodes proactively so they can be reaped |
5671 | * without I/O. But we are potentially leaving up to five seconds' | 5673 | * without I/O. But we are potentially leaving up to five seconds' |
5672 | * worth of inodes floating about which prune_icache wants us to | 5674 | * worth of inodes floating about which prune_icache wants us to |
5673 | * write out. One way to fix that would be to get prune_icache() | 5675 | * write out. One way to fix that would be to get prune_icache() |
5674 | * to do a write_super() to free up some memory. It has the desired | 5676 | * to do a write_super() to free up some memory. It has the desired |
5675 | * effect. | 5677 | * effect. |
5676 | */ | 5678 | */ |
5677 | int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | 5679 | int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) |
5678 | { | 5680 | { |
5679 | struct ext4_iloc iloc; | 5681 | struct ext4_iloc iloc; |
5680 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5682 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
5681 | static unsigned int mnt_count; | 5683 | static unsigned int mnt_count; |
5682 | int err, ret; | 5684 | int err, ret; |
5683 | 5685 | ||
5684 | might_sleep(); | 5686 | might_sleep(); |
5685 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); | 5687 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); |
5686 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5688 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5687 | if (ext4_handle_valid(handle) && | 5689 | if (ext4_handle_valid(handle) && |
5688 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5690 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
5689 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { | 5691 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { |
5690 | /* | 5692 | /* |
5691 | * We need extra buffer credits since we may write into EA block | 5693 | * We need extra buffer credits since we may write into EA block |
5692 | * with this same handle. If journal_extend fails, then it will | 5694 | * with this same handle. If journal_extend fails, then it will |
5693 | * only result in a minor loss of functionality for that inode. | 5695 | * only result in a minor loss of functionality for that inode. |
5694 | * If this is felt to be critical, then e2fsck should be run to | 5696 | * If this is felt to be critical, then e2fsck should be run to |
5695 | * force a large enough s_min_extra_isize. | 5697 | * force a large enough s_min_extra_isize. |
5696 | */ | 5698 | */ |
5697 | if ((jbd2_journal_extend(handle, | 5699 | if ((jbd2_journal_extend(handle, |
5698 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { | 5700 | EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { |
5699 | ret = ext4_expand_extra_isize(inode, | 5701 | ret = ext4_expand_extra_isize(inode, |
5700 | sbi->s_want_extra_isize, | 5702 | sbi->s_want_extra_isize, |
5701 | iloc, handle); | 5703 | iloc, handle); |
5702 | if (ret) { | 5704 | if (ret) { |
5703 | ext4_set_inode_state(inode, | 5705 | ext4_set_inode_state(inode, |
5704 | EXT4_STATE_NO_EXPAND); | 5706 | EXT4_STATE_NO_EXPAND); |
5705 | if (mnt_count != | 5707 | if (mnt_count != |
5706 | le16_to_cpu(sbi->s_es->s_mnt_count)) { | 5708 | le16_to_cpu(sbi->s_es->s_mnt_count)) { |
5707 | ext4_warning(inode->i_sb, | 5709 | ext4_warning(inode->i_sb, |
5708 | "Unable to expand inode %lu. Delete" | 5710 | "Unable to expand inode %lu. Delete" |
5709 | " some EAs or run e2fsck.", | 5711 | " some EAs or run e2fsck.", |
5710 | inode->i_ino); | 5712 | inode->i_ino); |
5711 | mnt_count = | 5713 | mnt_count = |
5712 | le16_to_cpu(sbi->s_es->s_mnt_count); | 5714 | le16_to_cpu(sbi->s_es->s_mnt_count); |
5713 | } | 5715 | } |
5714 | } | 5716 | } |
5715 | } | 5717 | } |
5716 | } | 5718 | } |
5717 | if (!err) | 5719 | if (!err) |
5718 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); | 5720 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); |
5719 | return err; | 5721 | return err; |
5720 | } | 5722 | } |
5721 | 5723 | ||
5722 | /* | 5724 | /* |
5723 | * ext4_dirty_inode() is called from __mark_inode_dirty() | 5725 | * ext4_dirty_inode() is called from __mark_inode_dirty() |
5724 | * | 5726 | * |
5725 | * We're really interested in the case where a file is being extended. | 5727 | * We're really interested in the case where a file is being extended. |
5726 | * i_size has been changed by generic_commit_write() and we thus need | 5728 | * i_size has been changed by generic_commit_write() and we thus need |
5727 | * to include the updated inode in the current transaction. | 5729 | * to include the updated inode in the current transaction. |
5728 | * | 5730 | * |
5729 | * Also, dquot_alloc_block() will always dirty the inode when blocks | 5731 | * Also, dquot_alloc_block() will always dirty the inode when blocks |
5730 | * are allocated to the file. | 5732 | * are allocated to the file. |
5731 | * | 5733 | * |
5732 | * If the inode is marked synchronous, we don't honour that here - doing | 5734 | * If the inode is marked synchronous, we don't honour that here - doing |
5733 | * so would cause a commit on atime updates, which we don't bother doing. | 5735 | * so would cause a commit on atime updates, which we don't bother doing. |
5734 | * We handle synchronous inodes at the highest possible level. | 5736 | * We handle synchronous inodes at the highest possible level. |
5735 | */ | 5737 | */ |
5736 | void ext4_dirty_inode(struct inode *inode, int flags) | 5738 | void ext4_dirty_inode(struct inode *inode, int flags) |
5737 | { | 5739 | { |
5738 | handle_t *handle; | 5740 | handle_t *handle; |
5739 | 5741 | ||
5740 | handle = ext4_journal_start(inode, 2); | 5742 | handle = ext4_journal_start(inode, 2); |
5741 | if (IS_ERR(handle)) | 5743 | if (IS_ERR(handle)) |
5742 | goto out; | 5744 | goto out; |
5743 | 5745 | ||
5744 | ext4_mark_inode_dirty(handle, inode); | 5746 | ext4_mark_inode_dirty(handle, inode); |
5745 | 5747 | ||
5746 | ext4_journal_stop(handle); | 5748 | ext4_journal_stop(handle); |
5747 | out: | 5749 | out: |
5748 | return; | 5750 | return; |
5749 | } | 5751 | } |
5750 | 5752 | ||
5751 | #if 0 | 5753 | #if 0 |
5752 | /* | 5754 | /* |
5753 | * Bind an inode's backing buffer_head into this transaction, to prevent | 5755 | * Bind an inode's backing buffer_head into this transaction, to prevent |
5754 | * it from being flushed to disk early. Unlike | 5756 | * it from being flushed to disk early. Unlike |
5755 | * ext4_reserve_inode_write, this leaves behind no bh reference and | 5757 | * ext4_reserve_inode_write, this leaves behind no bh reference and |
5756 | * returns no iloc structure, so the caller needs to repeat the iloc | 5758 | * returns no iloc structure, so the caller needs to repeat the iloc |
5757 | * lookup to mark the inode dirty later. | 5759 | * lookup to mark the inode dirty later. |
5758 | */ | 5760 | */ |
5759 | static int ext4_pin_inode(handle_t *handle, struct inode *inode) | 5761 | static int ext4_pin_inode(handle_t *handle, struct inode *inode) |
5760 | { | 5762 | { |
5761 | struct ext4_iloc iloc; | 5763 | struct ext4_iloc iloc; |
5762 | 5764 | ||
5763 | int err = 0; | 5765 | int err = 0; |
5764 | if (handle) { | 5766 | if (handle) { |
5765 | err = ext4_get_inode_loc(inode, &iloc); | 5767 | err = ext4_get_inode_loc(inode, &iloc); |
5766 | if (!err) { | 5768 | if (!err) { |
5767 | BUFFER_TRACE(iloc.bh, "get_write_access"); | 5769 | BUFFER_TRACE(iloc.bh, "get_write_access"); |
5768 | err = jbd2_journal_get_write_access(handle, iloc.bh); | 5770 | err = jbd2_journal_get_write_access(handle, iloc.bh); |
5769 | if (!err) | 5771 | if (!err) |
5770 | err = ext4_handle_dirty_metadata(handle, | 5772 | err = ext4_handle_dirty_metadata(handle, |
5771 | NULL, | 5773 | NULL, |
5772 | iloc.bh); | 5774 | iloc.bh); |
5773 | brelse(iloc.bh); | 5775 | brelse(iloc.bh); |
5774 | } | 5776 | } |
5775 | } | 5777 | } |
5776 | ext4_std_error(inode->i_sb, err); | 5778 | ext4_std_error(inode->i_sb, err); |
5777 | return err; | 5779 | return err; |
5778 | } | 5780 | } |
5779 | #endif | 5781 | #endif |
5780 | 5782 | ||
5781 | int ext4_change_inode_journal_flag(struct inode *inode, int val) | 5783 | int ext4_change_inode_journal_flag(struct inode *inode, int val) |
5782 | { | 5784 | { |
5783 | journal_t *journal; | 5785 | journal_t *journal; |
5784 | handle_t *handle; | 5786 | handle_t *handle; |
5785 | int err; | 5787 | int err; |
5786 | 5788 | ||
5787 | /* | 5789 | /* |
5788 | * We have to be very careful here: changing a data block's | 5790 | * We have to be very careful here: changing a data block's |
5789 | * journaling status dynamically is dangerous. If we write a | 5791 | * journaling status dynamically is dangerous. If we write a |
5790 | * data block to the journal, change the status and then delete | 5792 | * data block to the journal, change the status and then delete |
5791 | * that block, we risk forgetting to revoke the old log record | 5793 | * that block, we risk forgetting to revoke the old log record |
5792 | * from the journal and so a subsequent replay can corrupt data. | 5794 | * from the journal and so a subsequent replay can corrupt data. |
5793 | * So, first we make sure that the journal is empty and that | 5795 | * So, first we make sure that the journal is empty and that |
5794 | * nobody is changing anything. | 5796 | * nobody is changing anything. |
5795 | */ | 5797 | */ |
5796 | 5798 | ||
5797 | journal = EXT4_JOURNAL(inode); | 5799 | journal = EXT4_JOURNAL(inode); |
5798 | if (!journal) | 5800 | if (!journal) |
5799 | return 0; | 5801 | return 0; |
5800 | if (is_journal_aborted(journal)) | 5802 | if (is_journal_aborted(journal)) |
5801 | return -EROFS; | 5803 | return -EROFS; |
5802 | 5804 | ||
5803 | jbd2_journal_lock_updates(journal); | 5805 | jbd2_journal_lock_updates(journal); |
5804 | jbd2_journal_flush(journal); | 5806 | jbd2_journal_flush(journal); |
5805 | 5807 | ||
5806 | /* | 5808 | /* |
5807 | * OK, there are no updates running now, and all cached data is | 5809 | * OK, there are no updates running now, and all cached data is |
5808 | * synced to disk. We are now in a completely consistent state | 5810 | * synced to disk. We are now in a completely consistent state |
5809 | * which doesn't have anything in the journal, and we know that | 5811 | * which doesn't have anything in the journal, and we know that |
5810 | * no filesystem updates are running, so it is safe to modify | 5812 | * no filesystem updates are running, so it is safe to modify |
5811 | * the inode's in-core data-journaling state flag now. | 5813 | * the inode's in-core data-journaling state flag now. |
5812 | */ | 5814 | */ |
5813 | 5815 | ||
5814 | if (val) | 5816 | if (val) |
5815 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 5817 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5816 | else | 5818 | else |
5817 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 5819 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5818 | ext4_set_aops(inode); | 5820 | ext4_set_aops(inode); |
5819 | 5821 | ||
5820 | jbd2_journal_unlock_updates(journal); | 5822 | jbd2_journal_unlock_updates(journal); |
5821 | 5823 | ||
5822 | /* Finally we can mark the inode as dirty. */ | 5824 | /* Finally we can mark the inode as dirty. */ |
5823 | 5825 | ||
5824 | handle = ext4_journal_start(inode, 1); | 5826 | handle = ext4_journal_start(inode, 1); |
5825 | if (IS_ERR(handle)) | 5827 | if (IS_ERR(handle)) |
5826 | return PTR_ERR(handle); | 5828 | return PTR_ERR(handle); |
5827 | 5829 | ||
5828 | err = ext4_mark_inode_dirty(handle, inode); | 5830 | err = ext4_mark_inode_dirty(handle, inode); |
5829 | ext4_handle_sync(handle); | 5831 | ext4_handle_sync(handle); |
5830 | ext4_journal_stop(handle); | 5832 | ext4_journal_stop(handle); |
5831 | ext4_std_error(inode->i_sb, err); | 5833 | ext4_std_error(inode->i_sb, err); |
5832 | 5834 | ||
5833 | return err; | 5835 | return err; |
5834 | } | 5836 | } |
5835 | 5837 | ||
5836 | static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) | 5838 | static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) |
5837 | { | 5839 | { |
5838 | return !buffer_mapped(bh); | 5840 | return !buffer_mapped(bh); |
5839 | } | 5841 | } |
5840 | 5842 | ||
5841 | int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | 5843 | int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) |
5842 | { | 5844 | { |
5843 | struct page *page = vmf->page; | 5845 | struct page *page = vmf->page; |
5844 | loff_t size; | 5846 | loff_t size; |
5845 | unsigned long len; | 5847 | unsigned long len; |
5846 | int ret; | 5848 | int ret; |
5847 | struct file *file = vma->vm_file; | 5849 | struct file *file = vma->vm_file; |
5848 | struct inode *inode = file->f_path.dentry->d_inode; | 5850 | struct inode *inode = file->f_path.dentry->d_inode; |
5849 | struct address_space *mapping = inode->i_mapping; | 5851 | struct address_space *mapping = inode->i_mapping; |
5850 | handle_t *handle; | 5852 | handle_t *handle; |
5851 | get_block_t *get_block; | 5853 | get_block_t *get_block; |
5852 | int retries = 0; | 5854 | int retries = 0; |
5853 | 5855 | ||
5854 | /* | 5856 | /* |
5855 | * This check is racy but catches the common case. We rely on | 5857 | * This check is racy but catches the common case. We rely on |
5856 | * __block_page_mkwrite() to do a reliable check. | 5858 | * __block_page_mkwrite() to do a reliable check. |
5857 | */ | 5859 | */ |
5858 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 5860 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
5859 | /* Delalloc case is easy... */ | 5861 | /* Delalloc case is easy... */ |
5860 | if (test_opt(inode->i_sb, DELALLOC) && | 5862 | if (test_opt(inode->i_sb, DELALLOC) && |
5861 | !ext4_should_journal_data(inode) && | 5863 | !ext4_should_journal_data(inode) && |
5862 | !ext4_nonda_switch(inode->i_sb)) { | 5864 | !ext4_nonda_switch(inode->i_sb)) { |
5863 | do { | 5865 | do { |
5864 | ret = __block_page_mkwrite(vma, vmf, | 5866 | ret = __block_page_mkwrite(vma, vmf, |
5865 | ext4_da_get_block_prep); | 5867 | ext4_da_get_block_prep); |
5866 | } while (ret == -ENOSPC && | 5868 | } while (ret == -ENOSPC && |
5867 | ext4_should_retry_alloc(inode->i_sb, &retries)); | 5869 | ext4_should_retry_alloc(inode->i_sb, &retries)); |
5868 | goto out_ret; | 5870 | goto out_ret; |
5869 | } | 5871 | } |
5870 | 5872 | ||
5871 | lock_page(page); | 5873 | lock_page(page); |
5872 | size = i_size_read(inode); | 5874 | size = i_size_read(inode); |
5873 | /* Page got truncated from under us? */ | 5875 | /* Page got truncated from under us? */ |
5874 | if (page->mapping != mapping || page_offset(page) > size) { | 5876 | if (page->mapping != mapping || page_offset(page) > size) { |
5875 | unlock_page(page); | 5877 | unlock_page(page); |
5876 | ret = VM_FAULT_NOPAGE; | 5878 | ret = VM_FAULT_NOPAGE; |
5877 | goto out; | 5879 | goto out; |
5878 | } | 5880 | } |
5879 | 5881 | ||
5880 | if (page->index == size >> PAGE_CACHE_SHIFT) | 5882 | if (page->index == size >> PAGE_CACHE_SHIFT) |
5881 | len = size & ~PAGE_CACHE_MASK; | 5883 | len = size & ~PAGE_CACHE_MASK; |
5882 | else | 5884 | else |
5883 | len = PAGE_CACHE_SIZE; | 5885 | len = PAGE_CACHE_SIZE; |
5884 | /* | 5886 | /* |
5885 | * Return if we have all the buffers mapped. This avoids the need to do | 5887 | * Return if we have all the buffers mapped. This avoids the need to do |
5886 | * journal_start/journal_stop which can block and take a long time | 5888 | * journal_start/journal_stop which can block and take a long time |
5887 | */ | 5889 | */ |
5888 | if (page_has_buffers(page)) { | 5890 | if (page_has_buffers(page)) { |
5889 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5891 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
5890 | ext4_bh_unmapped)) { | 5892 | ext4_bh_unmapped)) { |
5891 | /* Wait so that we don't change page under IO */ | 5893 | /* Wait so that we don't change page under IO */ |
5892 | wait_on_page_writeback(page); | 5894 | wait_on_page_writeback(page); |
5893 | ret = VM_FAULT_LOCKED; | 5895 | ret = VM_FAULT_LOCKED; |
5894 | goto out; | 5896 | goto out; |
5895 | } | 5897 | } |
5896 | } | 5898 | } |
5897 | unlock_page(page); | 5899 | unlock_page(page); |
5898 | /* OK, we need to fill the hole... */ | 5900 | /* OK, we need to fill the hole... */ |
5899 | if (ext4_should_dioread_nolock(inode)) | 5901 | if (ext4_should_dioread_nolock(inode)) |
5900 | get_block = ext4_get_block_write; | 5902 | get_block = ext4_get_block_write; |
5901 | else | 5903 | else |
5902 | get_block = ext4_get_block; | 5904 | get_block = ext4_get_block; |
5903 | retry_alloc: | 5905 | retry_alloc: |
5904 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 5906 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); |
5905 | if (IS_ERR(handle)) { | 5907 | if (IS_ERR(handle)) { |
5906 | ret = VM_FAULT_SIGBUS; | 5908 | ret = VM_FAULT_SIGBUS; |
5907 | goto out; | 5909 | goto out; |
5908 | } | 5910 | } |
5909 | ret = __block_page_mkwrite(vma, vmf, get_block); | 5911 | ret = __block_page_mkwrite(vma, vmf, get_block); |
5910 | if (!ret && ext4_should_journal_data(inode)) { | 5912 | if (!ret && ext4_should_journal_data(inode)) { |
5911 | if (walk_page_buffers(handle, page_buffers(page), 0, | 5913 | if (walk_page_buffers(handle, page_buffers(page), 0, |
5912 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | 5914 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { |
5913 | unlock_page(page); | 5915 | unlock_page(page); |
5914 | ret = VM_FAULT_SIGBUS; | 5916 | ret = VM_FAULT_SIGBUS; |
5915 | goto out; | 5917 | goto out; |
5916 | } | 5918 | } |
5917 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 5919 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
5918 | } | 5920 | } |
5919 | ext4_journal_stop(handle); | 5921 | ext4_journal_stop(handle); |
5920 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 5922 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
5921 | goto retry_alloc; | 5923 | goto retry_alloc; |
5922 | out_ret: | 5924 | out_ret: |
5923 | ret = block_page_mkwrite_return(ret); | 5925 | ret = block_page_mkwrite_return(ret); |
5924 | out: | 5926 | out: |
5925 | return ret; | 5927 | return ret; |
5926 | } | 5928 | } |
5927 | 5929 |
fs/fat/file.c
1 | /* | 1 | /* |
2 | * linux/fs/fat/file.c | 2 | * linux/fs/fat/file.c |
3 | * | 3 | * |
4 | * Written 1992,1993 by Werner Almesberger | 4 | * Written 1992,1993 by Werner Almesberger |
5 | * | 5 | * |
6 | * regular file handling primitives for fat-based filesystems | 6 | * regular file handling primitives for fat-based filesystems |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/compat.h> | 11 | #include <linux/compat.h> |
12 | #include <linux/mount.h> | 12 | #include <linux/mount.h> |
13 | #include <linux/time.h> | 13 | #include <linux/time.h> |
14 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
15 | #include <linux/writeback.h> | 15 | #include <linux/writeback.h> |
16 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/fsnotify.h> | 18 | #include <linux/fsnotify.h> |
19 | #include <linux/security.h> | 19 | #include <linux/security.h> |
20 | #include "fat.h" | 20 | #include "fat.h" |
21 | 21 | ||
22 | static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) | 22 | static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) |
23 | { | 23 | { |
24 | u32 attr; | 24 | u32 attr; |
25 | 25 | ||
26 | mutex_lock(&inode->i_mutex); | 26 | mutex_lock(&inode->i_mutex); |
27 | attr = fat_make_attrs(inode); | 27 | attr = fat_make_attrs(inode); |
28 | mutex_unlock(&inode->i_mutex); | 28 | mutex_unlock(&inode->i_mutex); |
29 | 29 | ||
30 | return put_user(attr, user_attr); | 30 | return put_user(attr, user_attr); |
31 | } | 31 | } |
32 | 32 | ||
33 | static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) | 33 | static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) |
34 | { | 34 | { |
35 | struct inode *inode = file->f_path.dentry->d_inode; | 35 | struct inode *inode = file->f_path.dentry->d_inode; |
36 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | 36 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); |
37 | int is_dir = S_ISDIR(inode->i_mode); | 37 | int is_dir = S_ISDIR(inode->i_mode); |
38 | u32 attr, oldattr; | 38 | u32 attr, oldattr; |
39 | struct iattr ia; | 39 | struct iattr ia; |
40 | int err; | 40 | int err; |
41 | 41 | ||
42 | err = get_user(attr, user_attr); | 42 | err = get_user(attr, user_attr); |
43 | if (err) | 43 | if (err) |
44 | goto out; | 44 | goto out; |
45 | 45 | ||
46 | mutex_lock(&inode->i_mutex); | 46 | mutex_lock(&inode->i_mutex); |
47 | err = mnt_want_write(file->f_path.mnt); | 47 | err = mnt_want_write(file->f_path.mnt); |
48 | if (err) | 48 | if (err) |
49 | goto out_unlock_inode; | 49 | goto out_unlock_inode; |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * ATTR_VOLUME and ATTR_DIR cannot be changed; this also | 52 | * ATTR_VOLUME and ATTR_DIR cannot be changed; this also |
53 | * prevents the user from turning us into a VFAT | 53 | * prevents the user from turning us into a VFAT |
54 | * longname entry. Also, we obviously can't set | 54 | * longname entry. Also, we obviously can't set |
55 | * any of the NTFS attributes in the high 24 bits. | 55 | * any of the NTFS attributes in the high 24 bits. |
56 | */ | 56 | */ |
57 | attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR); | 57 | attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR); |
58 | /* Merge in ATTR_VOLUME and ATTR_DIR */ | 58 | /* Merge in ATTR_VOLUME and ATTR_DIR */ |
59 | attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | | 59 | attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | |
60 | (is_dir ? ATTR_DIR : 0); | 60 | (is_dir ? ATTR_DIR : 0); |
61 | oldattr = fat_make_attrs(inode); | 61 | oldattr = fat_make_attrs(inode); |
62 | 62 | ||
63 | /* Equivalent to a chmod() */ | 63 | /* Equivalent to a chmod() */ |
64 | ia.ia_valid = ATTR_MODE | ATTR_CTIME; | 64 | ia.ia_valid = ATTR_MODE | ATTR_CTIME; |
65 | ia.ia_ctime = current_fs_time(inode->i_sb); | 65 | ia.ia_ctime = current_fs_time(inode->i_sb); |
66 | if (is_dir) | 66 | if (is_dir) |
67 | ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); | 67 | ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); |
68 | else { | 68 | else { |
69 | ia.ia_mode = fat_make_mode(sbi, attr, | 69 | ia.ia_mode = fat_make_mode(sbi, attr, |
70 | S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); | 70 | S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); |
71 | } | 71 | } |
72 | 72 | ||
73 | /* The root directory has no attributes */ | 73 | /* The root directory has no attributes */ |
74 | if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { | 74 | if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { |
75 | err = -EINVAL; | 75 | err = -EINVAL; |
76 | goto out_drop_write; | 76 | goto out_drop_write; |
77 | } | 77 | } |
78 | 78 | ||
79 | if (sbi->options.sys_immutable && | 79 | if (sbi->options.sys_immutable && |
80 | ((attr | oldattr) & ATTR_SYS) && | 80 | ((attr | oldattr) & ATTR_SYS) && |
81 | !capable(CAP_LINUX_IMMUTABLE)) { | 81 | !capable(CAP_LINUX_IMMUTABLE)) { |
82 | err = -EPERM; | 82 | err = -EPERM; |
83 | goto out_drop_write; | 83 | goto out_drop_write; |
84 | } | 84 | } |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * The security check is questionable... We single | 87 | * The security check is questionable... We single |
88 | * out the RO attribute for checking by the security | 88 | * out the RO attribute for checking by the security |
89 | * module, just because it maps to a file mode. | 89 | * module, just because it maps to a file mode. |
90 | */ | 90 | */ |
91 | err = security_inode_setattr(file->f_path.dentry, &ia); | 91 | err = security_inode_setattr(file->f_path.dentry, &ia); |
92 | if (err) | 92 | if (err) |
93 | goto out_drop_write; | 93 | goto out_drop_write; |
94 | 94 | ||
95 | /* This MUST be done before doing anything irreversible... */ | 95 | /* This MUST be done before doing anything irreversible... */ |
96 | err = fat_setattr(file->f_path.dentry, &ia); | 96 | err = fat_setattr(file->f_path.dentry, &ia); |
97 | if (err) | 97 | if (err) |
98 | goto out_drop_write; | 98 | goto out_drop_write; |
99 | 99 | ||
100 | fsnotify_change(file->f_path.dentry, ia.ia_valid); | 100 | fsnotify_change(file->f_path.dentry, ia.ia_valid); |
101 | if (sbi->options.sys_immutable) { | 101 | if (sbi->options.sys_immutable) { |
102 | if (attr & ATTR_SYS) | 102 | if (attr & ATTR_SYS) |
103 | inode->i_flags |= S_IMMUTABLE; | 103 | inode->i_flags |= S_IMMUTABLE; |
104 | else | 104 | else |
105 | inode->i_flags &= ~S_IMMUTABLE; | 105 | inode->i_flags &= ~S_IMMUTABLE; |
106 | } | 106 | } |
107 | 107 | ||
108 | fat_save_attrs(inode, attr); | 108 | fat_save_attrs(inode, attr); |
109 | mark_inode_dirty(inode); | 109 | mark_inode_dirty(inode); |
110 | out_drop_write: | 110 | out_drop_write: |
111 | mnt_drop_write(file->f_path.mnt); | 111 | mnt_drop_write(file->f_path.mnt); |
112 | out_unlock_inode: | 112 | out_unlock_inode: |
113 | mutex_unlock(&inode->i_mutex); | 113 | mutex_unlock(&inode->i_mutex); |
114 | out: | 114 | out: |
115 | return err; | 115 | return err; |
116 | } | 116 | } |
117 | 117 | ||
118 | long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 118 | long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
119 | { | 119 | { |
120 | struct inode *inode = filp->f_path.dentry->d_inode; | 120 | struct inode *inode = filp->f_path.dentry->d_inode; |
121 | u32 __user *user_attr = (u32 __user *)arg; | 121 | u32 __user *user_attr = (u32 __user *)arg; |
122 | 122 | ||
123 | switch (cmd) { | 123 | switch (cmd) { |
124 | case FAT_IOCTL_GET_ATTRIBUTES: | 124 | case FAT_IOCTL_GET_ATTRIBUTES: |
125 | return fat_ioctl_get_attributes(inode, user_attr); | 125 | return fat_ioctl_get_attributes(inode, user_attr); |
126 | case FAT_IOCTL_SET_ATTRIBUTES: | 126 | case FAT_IOCTL_SET_ATTRIBUTES: |
127 | return fat_ioctl_set_attributes(filp, user_attr); | 127 | return fat_ioctl_set_attributes(filp, user_attr); |
128 | default: | 128 | default: |
129 | return -ENOTTY; /* Inappropriate ioctl for device */ | 129 | return -ENOTTY; /* Inappropriate ioctl for device */ |
130 | } | 130 | } |
131 | } | 131 | } |
132 | 132 | ||
133 | #ifdef CONFIG_COMPAT | 133 | #ifdef CONFIG_COMPAT |
134 | static long fat_generic_compat_ioctl(struct file *filp, unsigned int cmd, | 134 | static long fat_generic_compat_ioctl(struct file *filp, unsigned int cmd, |
135 | unsigned long arg) | 135 | unsigned long arg) |
136 | 136 | ||
137 | { | 137 | { |
138 | return fat_generic_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); | 138 | return fat_generic_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); |
139 | } | 139 | } |
140 | #endif | 140 | #endif |
141 | 141 | ||
142 | static int fat_file_release(struct inode *inode, struct file *filp) | 142 | static int fat_file_release(struct inode *inode, struct file *filp) |
143 | { | 143 | { |
144 | if ((filp->f_mode & FMODE_WRITE) && | 144 | if ((filp->f_mode & FMODE_WRITE) && |
145 | MSDOS_SB(inode->i_sb)->options.flush) { | 145 | MSDOS_SB(inode->i_sb)->options.flush) { |
146 | fat_flush_inodes(inode->i_sb, inode, NULL); | 146 | fat_flush_inodes(inode->i_sb, inode, NULL); |
147 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 147 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
148 | } | 148 | } |
149 | return 0; | 149 | return 0; |
150 | } | 150 | } |
151 | 151 | ||
152 | int fat_file_fsync(struct file *filp, int datasync) | 152 | int fat_file_fsync(struct file *filp, int datasync) |
153 | { | 153 | { |
154 | struct inode *inode = filp->f_mapping->host; | 154 | struct inode *inode = filp->f_mapping->host; |
155 | int res, err; | 155 | int res, err; |
156 | 156 | ||
157 | res = generic_file_fsync(filp, datasync); | 157 | res = generic_file_fsync(filp, datasync); |
158 | err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); | 158 | err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); |
159 | 159 | ||
160 | return res ? res : err; | 160 | return res ? res : err; |
161 | } | 161 | } |
162 | 162 | ||
163 | 163 | ||
164 | const struct file_operations fat_file_operations = { | 164 | const struct file_operations fat_file_operations = { |
165 | .llseek = generic_file_llseek, | 165 | .llseek = generic_file_llseek, |
166 | .read = do_sync_read, | 166 | .read = do_sync_read, |
167 | .write = do_sync_write, | 167 | .write = do_sync_write, |
168 | .aio_read = generic_file_aio_read, | 168 | .aio_read = generic_file_aio_read, |
169 | .aio_write = generic_file_aio_write, | 169 | .aio_write = generic_file_aio_write, |
170 | .mmap = generic_file_mmap, | 170 | .mmap = generic_file_mmap, |
171 | .release = fat_file_release, | 171 | .release = fat_file_release, |
172 | .unlocked_ioctl = fat_generic_ioctl, | 172 | .unlocked_ioctl = fat_generic_ioctl, |
173 | #ifdef CONFIG_COMPAT | 173 | #ifdef CONFIG_COMPAT |
174 | .compat_ioctl = fat_generic_compat_ioctl, | 174 | .compat_ioctl = fat_generic_compat_ioctl, |
175 | #endif | 175 | #endif |
176 | .fsync = fat_file_fsync, | 176 | .fsync = fat_file_fsync, |
177 | .splice_read = generic_file_splice_read, | 177 | .splice_read = generic_file_splice_read, |
178 | }; | 178 | }; |
179 | 179 | ||
180 | static int fat_cont_expand(struct inode *inode, loff_t size) | 180 | static int fat_cont_expand(struct inode *inode, loff_t size) |
181 | { | 181 | { |
182 | struct address_space *mapping = inode->i_mapping; | 182 | struct address_space *mapping = inode->i_mapping; |
183 | loff_t start = inode->i_size, count = size - inode->i_size; | 183 | loff_t start = inode->i_size, count = size - inode->i_size; |
184 | int err; | 184 | int err; |
185 | 185 | ||
186 | err = generic_cont_expand_simple(inode, size); | 186 | err = generic_cont_expand_simple(inode, size); |
187 | if (err) | 187 | if (err) |
188 | goto out; | 188 | goto out; |
189 | 189 | ||
190 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; | 190 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; |
191 | mark_inode_dirty(inode); | 191 | mark_inode_dirty(inode); |
192 | if (IS_SYNC(inode)) { | 192 | if (IS_SYNC(inode)) { |
193 | int err2; | 193 | int err2; |
194 | 194 | ||
195 | /* | 195 | /* |
196 | * Opencode syncing since we don't have a file open to use | 196 | * Opencode syncing since we don't have a file open to use |
197 | * standard fsync path. | 197 | * standard fsync path. |
198 | */ | 198 | */ |
199 | err = filemap_fdatawrite_range(mapping, start, | 199 | err = filemap_fdatawrite_range(mapping, start, |
200 | start + count - 1); | 200 | start + count - 1); |
201 | err2 = sync_mapping_buffers(mapping); | 201 | err2 = sync_mapping_buffers(mapping); |
202 | if (!err) | 202 | if (!err) |
203 | err = err2; | 203 | err = err2; |
204 | err2 = write_inode_now(inode, 1); | 204 | err2 = write_inode_now(inode, 1); |
205 | if (!err) | 205 | if (!err) |
206 | err = err2; | 206 | err = err2; |
207 | if (!err) { | 207 | if (!err) { |
208 | err = filemap_fdatawait_range(mapping, start, | 208 | err = filemap_fdatawait_range(mapping, start, |
209 | start + count - 1); | 209 | start + count - 1); |
210 | } | 210 | } |
211 | } | 211 | } |
212 | out: | 212 | out: |
213 | return err; | 213 | return err; |
214 | } | 214 | } |
215 | 215 | ||
216 | /* Free all clusters after the skip'th cluster. */ | 216 | /* Free all clusters after the skip'th cluster. */ |
217 | static int fat_free(struct inode *inode, int skip) | 217 | static int fat_free(struct inode *inode, int skip) |
218 | { | 218 | { |
219 | struct super_block *sb = inode->i_sb; | 219 | struct super_block *sb = inode->i_sb; |
220 | int err, wait, free_start, i_start, i_logstart; | 220 | int err, wait, free_start, i_start, i_logstart; |
221 | 221 | ||
222 | if (MSDOS_I(inode)->i_start == 0) | 222 | if (MSDOS_I(inode)->i_start == 0) |
223 | return 0; | 223 | return 0; |
224 | 224 | ||
225 | fat_cache_inval_inode(inode); | 225 | fat_cache_inval_inode(inode); |
226 | 226 | ||
227 | wait = IS_DIRSYNC(inode); | 227 | wait = IS_DIRSYNC(inode); |
228 | i_start = free_start = MSDOS_I(inode)->i_start; | 228 | i_start = free_start = MSDOS_I(inode)->i_start; |
229 | i_logstart = MSDOS_I(inode)->i_logstart; | 229 | i_logstart = MSDOS_I(inode)->i_logstart; |
230 | 230 | ||
231 | /* First, we write the new file size. */ | 231 | /* First, we write the new file size. */ |
232 | if (!skip) { | 232 | if (!skip) { |
233 | MSDOS_I(inode)->i_start = 0; | 233 | MSDOS_I(inode)->i_start = 0; |
234 | MSDOS_I(inode)->i_logstart = 0; | 234 | MSDOS_I(inode)->i_logstart = 0; |
235 | } | 235 | } |
236 | MSDOS_I(inode)->i_attrs |= ATTR_ARCH; | 236 | MSDOS_I(inode)->i_attrs |= ATTR_ARCH; |
237 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; | 237 | inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; |
238 | if (wait) { | 238 | if (wait) { |
239 | err = fat_sync_inode(inode); | 239 | err = fat_sync_inode(inode); |
240 | if (err) { | 240 | if (err) { |
241 | MSDOS_I(inode)->i_start = i_start; | 241 | MSDOS_I(inode)->i_start = i_start; |
242 | MSDOS_I(inode)->i_logstart = i_logstart; | 242 | MSDOS_I(inode)->i_logstart = i_logstart; |
243 | return err; | 243 | return err; |
244 | } | 244 | } |
245 | } else | 245 | } else |
246 | mark_inode_dirty(inode); | 246 | mark_inode_dirty(inode); |
247 | 247 | ||
248 | /* Write a new EOF, and get the remaining cluster chain for freeing. */ | 248 | /* Write a new EOF, and get the remaining cluster chain for freeing. */ |
249 | if (skip) { | 249 | if (skip) { |
250 | struct fat_entry fatent; | 250 | struct fat_entry fatent; |
251 | int ret, fclus, dclus; | 251 | int ret, fclus, dclus; |
252 | 252 | ||
253 | ret = fat_get_cluster(inode, skip - 1, &fclus, &dclus); | 253 | ret = fat_get_cluster(inode, skip - 1, &fclus, &dclus); |
254 | if (ret < 0) | 254 | if (ret < 0) |
255 | return ret; | 255 | return ret; |
256 | else if (ret == FAT_ENT_EOF) | 256 | else if (ret == FAT_ENT_EOF) |
257 | return 0; | 257 | return 0; |
258 | 258 | ||
259 | fatent_init(&fatent); | 259 | fatent_init(&fatent); |
260 | ret = fat_ent_read(inode, &fatent, dclus); | 260 | ret = fat_ent_read(inode, &fatent, dclus); |
261 | if (ret == FAT_ENT_EOF) { | 261 | if (ret == FAT_ENT_EOF) { |
262 | fatent_brelse(&fatent); | 262 | fatent_brelse(&fatent); |
263 | return 0; | 263 | return 0; |
264 | } else if (ret == FAT_ENT_FREE) { | 264 | } else if (ret == FAT_ENT_FREE) { |
265 | fat_fs_error(sb, | 265 | fat_fs_error(sb, |
266 | "%s: invalid cluster chain (i_pos %lld)", | 266 | "%s: invalid cluster chain (i_pos %lld)", |
267 | __func__, MSDOS_I(inode)->i_pos); | 267 | __func__, MSDOS_I(inode)->i_pos); |
268 | ret = -EIO; | 268 | ret = -EIO; |
269 | } else if (ret > 0) { | 269 | } else if (ret > 0) { |
270 | err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait); | 270 | err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait); |
271 | if (err) | 271 | if (err) |
272 | ret = err; | 272 | ret = err; |
273 | } | 273 | } |
274 | fatent_brelse(&fatent); | 274 | fatent_brelse(&fatent); |
275 | if (ret < 0) | 275 | if (ret < 0) |
276 | return ret; | 276 | return ret; |
277 | 277 | ||
278 | free_start = ret; | 278 | free_start = ret; |
279 | } | 279 | } |
280 | inode->i_blocks = skip << (MSDOS_SB(sb)->cluster_bits - 9); | 280 | inode->i_blocks = skip << (MSDOS_SB(sb)->cluster_bits - 9); |
281 | 281 | ||
282 | /* Freeing the remained cluster chain */ | 282 | /* Freeing the remained cluster chain */ |
283 | return fat_free_clusters(inode, free_start); | 283 | return fat_free_clusters(inode, free_start); |
284 | } | 284 | } |
285 | 285 | ||
286 | void fat_truncate_blocks(struct inode *inode, loff_t offset) | 286 | void fat_truncate_blocks(struct inode *inode, loff_t offset) |
287 | { | 287 | { |
288 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); | 288 | struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); |
289 | const unsigned int cluster_size = sbi->cluster_size; | 289 | const unsigned int cluster_size = sbi->cluster_size; |
290 | int nr_clusters; | 290 | int nr_clusters; |
291 | 291 | ||
292 | /* | 292 | /* |
293 | * This protects against truncating a file bigger than it was then | 293 | * This protects against truncating a file bigger than it was then |
294 | * trying to write into the hole. | 294 | * trying to write into the hole. |
295 | */ | 295 | */ |
296 | if (MSDOS_I(inode)->mmu_private > offset) | 296 | if (MSDOS_I(inode)->mmu_private > offset) |
297 | MSDOS_I(inode)->mmu_private = offset; | 297 | MSDOS_I(inode)->mmu_private = offset; |
298 | 298 | ||
299 | nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits; | 299 | nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits; |
300 | 300 | ||
301 | fat_free(inode, nr_clusters); | 301 | fat_free(inode, nr_clusters); |
302 | fat_flush_inodes(inode->i_sb, inode, NULL); | 302 | fat_flush_inodes(inode->i_sb, inode, NULL); |
303 | } | 303 | } |
304 | 304 | ||
305 | int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 305 | int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
306 | { | 306 | { |
307 | struct inode *inode = dentry->d_inode; | 307 | struct inode *inode = dentry->d_inode; |
308 | generic_fillattr(inode, stat); | 308 | generic_fillattr(inode, stat); |
309 | stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size; | 309 | stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size; |
310 | return 0; | 310 | return 0; |
311 | } | 311 | } |
312 | EXPORT_SYMBOL_GPL(fat_getattr); | 312 | EXPORT_SYMBOL_GPL(fat_getattr); |
313 | 313 | ||
314 | static int fat_sanitize_mode(const struct msdos_sb_info *sbi, | 314 | static int fat_sanitize_mode(const struct msdos_sb_info *sbi, |
315 | struct inode *inode, umode_t *mode_ptr) | 315 | struct inode *inode, umode_t *mode_ptr) |
316 | { | 316 | { |
317 | mode_t mask, perm; | 317 | mode_t mask, perm; |
318 | 318 | ||
319 | /* | 319 | /* |
320 | * Note, the basic check is already done by a caller of | 320 | * Note, the basic check is already done by a caller of |
321 | * (attr->ia_mode & ~FAT_VALID_MODE) | 321 | * (attr->ia_mode & ~FAT_VALID_MODE) |
322 | */ | 322 | */ |
323 | 323 | ||
324 | if (S_ISREG(inode->i_mode)) | 324 | if (S_ISREG(inode->i_mode)) |
325 | mask = sbi->options.fs_fmask; | 325 | mask = sbi->options.fs_fmask; |
326 | else | 326 | else |
327 | mask = sbi->options.fs_dmask; | 327 | mask = sbi->options.fs_dmask; |
328 | 328 | ||
329 | perm = *mode_ptr & ~(S_IFMT | mask); | 329 | perm = *mode_ptr & ~(S_IFMT | mask); |
330 | 330 | ||
331 | /* | 331 | /* |
332 | * Of the r and x bits, all (subject to umask) must be present. Of the | 332 | * Of the r and x bits, all (subject to umask) must be present. Of the |
333 | * w bits, either all (subject to umask) or none must be present. | 333 | * w bits, either all (subject to umask) or none must be present. |
334 | * | 334 | * |
335 | * If fat_mode_can_hold_ro(inode) is false, can't change w bits. | 335 | * If fat_mode_can_hold_ro(inode) is false, can't change w bits. |
336 | */ | 336 | */ |
337 | if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) | 337 | if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) |
338 | return -EPERM; | 338 | return -EPERM; |
339 | if (fat_mode_can_hold_ro(inode)) { | 339 | if (fat_mode_can_hold_ro(inode)) { |
340 | if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) | 340 | if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) |
341 | return -EPERM; | 341 | return -EPERM; |
342 | } else { | 342 | } else { |
343 | if ((perm & S_IWUGO) != (S_IWUGO & ~mask)) | 343 | if ((perm & S_IWUGO) != (S_IWUGO & ~mask)) |
344 | return -EPERM; | 344 | return -EPERM; |
345 | } | 345 | } |
346 | 346 | ||
347 | *mode_ptr &= S_IFMT | perm; | 347 | *mode_ptr &= S_IFMT | perm; |
348 | 348 | ||
349 | return 0; | 349 | return 0; |
350 | } | 350 | } |
351 | 351 | ||
352 | static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) | 352 | static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) |
353 | { | 353 | { |
354 | mode_t allow_utime = sbi->options.allow_utime; | 354 | mode_t allow_utime = sbi->options.allow_utime; |
355 | 355 | ||
356 | if (current_fsuid() != inode->i_uid) { | 356 | if (current_fsuid() != inode->i_uid) { |
357 | if (in_group_p(inode->i_gid)) | 357 | if (in_group_p(inode->i_gid)) |
358 | allow_utime >>= 3; | 358 | allow_utime >>= 3; |
359 | if (allow_utime & MAY_WRITE) | 359 | if (allow_utime & MAY_WRITE) |
360 | return 1; | 360 | return 1; |
361 | } | 361 | } |
362 | 362 | ||
363 | /* use a default check */ | 363 | /* use a default check */ |
364 | return 0; | 364 | return 0; |
365 | } | 365 | } |
366 | 366 | ||
367 | #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) | 367 | #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) |
368 | /* valid file mode bits */ | 368 | /* valid file mode bits */ |
369 | #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) | 369 | #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) |
370 | 370 | ||
371 | int fat_setattr(struct dentry *dentry, struct iattr *attr) | 371 | int fat_setattr(struct dentry *dentry, struct iattr *attr) |
372 | { | 372 | { |
373 | struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); | 373 | struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); |
374 | struct inode *inode = dentry->d_inode; | 374 | struct inode *inode = dentry->d_inode; |
375 | unsigned int ia_valid; | 375 | unsigned int ia_valid; |
376 | int error; | 376 | int error; |
377 | 377 | ||
378 | /* Check for setting the inode time. */ | 378 | /* Check for setting the inode time. */ |
379 | ia_valid = attr->ia_valid; | 379 | ia_valid = attr->ia_valid; |
380 | if (ia_valid & TIMES_SET_FLAGS) { | 380 | if (ia_valid & TIMES_SET_FLAGS) { |
381 | if (fat_allow_set_time(sbi, inode)) | 381 | if (fat_allow_set_time(sbi, inode)) |
382 | attr->ia_valid &= ~TIMES_SET_FLAGS; | 382 | attr->ia_valid &= ~TIMES_SET_FLAGS; |
383 | } | 383 | } |
384 | 384 | ||
385 | error = inode_change_ok(inode, attr); | 385 | error = inode_change_ok(inode, attr); |
386 | attr->ia_valid = ia_valid; | 386 | attr->ia_valid = ia_valid; |
387 | if (error) { | 387 | if (error) { |
388 | if (sbi->options.quiet) | 388 | if (sbi->options.quiet) |
389 | error = 0; | 389 | error = 0; |
390 | goto out; | 390 | goto out; |
391 | } | 391 | } |
392 | 392 | ||
393 | /* | 393 | /* |
394 | * Expand the file. Since inode_setattr() updates ->i_size | 394 | * Expand the file. Since inode_setattr() updates ->i_size |
395 | * before calling the ->truncate(), but FAT needs to fill the | 395 | * before calling the ->truncate(), but FAT needs to fill the |
396 | * hole before it. XXX: this is no longer true with new truncate | 396 | * hole before it. XXX: this is no longer true with new truncate |
397 | * sequence. | 397 | * sequence. |
398 | */ | 398 | */ |
399 | if (attr->ia_valid & ATTR_SIZE) { | 399 | if (attr->ia_valid & ATTR_SIZE) { |
400 | inode_dio_wait(inode); | ||
401 | |||
400 | if (attr->ia_size > inode->i_size) { | 402 | if (attr->ia_size > inode->i_size) { |
401 | error = fat_cont_expand(inode, attr->ia_size); | 403 | error = fat_cont_expand(inode, attr->ia_size); |
402 | if (error || attr->ia_valid == ATTR_SIZE) | 404 | if (error || attr->ia_valid == ATTR_SIZE) |
403 | goto out; | 405 | goto out; |
404 | attr->ia_valid &= ~ATTR_SIZE; | 406 | attr->ia_valid &= ~ATTR_SIZE; |
405 | } | 407 | } |
406 | } | 408 | } |
407 | 409 | ||
408 | if (((attr->ia_valid & ATTR_UID) && | 410 | if (((attr->ia_valid & ATTR_UID) && |
409 | (attr->ia_uid != sbi->options.fs_uid)) || | 411 | (attr->ia_uid != sbi->options.fs_uid)) || |
410 | ((attr->ia_valid & ATTR_GID) && | 412 | ((attr->ia_valid & ATTR_GID) && |
411 | (attr->ia_gid != sbi->options.fs_gid)) || | 413 | (attr->ia_gid != sbi->options.fs_gid)) || |
412 | ((attr->ia_valid & ATTR_MODE) && | 414 | ((attr->ia_valid & ATTR_MODE) && |
413 | (attr->ia_mode & ~FAT_VALID_MODE))) | 415 | (attr->ia_mode & ~FAT_VALID_MODE))) |
414 | error = -EPERM; | 416 | error = -EPERM; |
415 | 417 | ||
416 | if (error) { | 418 | if (error) { |
417 | if (sbi->options.quiet) | 419 | if (sbi->options.quiet) |
418 | error = 0; | 420 | error = 0; |
419 | goto out; | 421 | goto out; |
420 | } | 422 | } |
421 | 423 | ||
422 | /* | 424 | /* |
423 | * We don't return -EPERM here. Yes, strange, but this is too | 425 | * We don't return -EPERM here. Yes, strange, but this is too |
424 | * old behavior. | 426 | * old behavior. |
425 | */ | 427 | */ |
426 | if (attr->ia_valid & ATTR_MODE) { | 428 | if (attr->ia_valid & ATTR_MODE) { |
427 | if (fat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) | 429 | if (fat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0) |
428 | attr->ia_valid &= ~ATTR_MODE; | 430 | attr->ia_valid &= ~ATTR_MODE; |
429 | } | 431 | } |
430 | 432 | ||
431 | if (attr->ia_valid & ATTR_SIZE) { | 433 | if (attr->ia_valid & ATTR_SIZE) { |
432 | down_write(&MSDOS_I(inode)->truncate_lock); | 434 | down_write(&MSDOS_I(inode)->truncate_lock); |
433 | truncate_setsize(inode, attr->ia_size); | 435 | truncate_setsize(inode, attr->ia_size); |
434 | fat_truncate_blocks(inode, attr->ia_size); | 436 | fat_truncate_blocks(inode, attr->ia_size); |
435 | up_write(&MSDOS_I(inode)->truncate_lock); | 437 | up_write(&MSDOS_I(inode)->truncate_lock); |
436 | } | 438 | } |
437 | 439 | ||
438 | setattr_copy(inode, attr); | 440 | setattr_copy(inode, attr); |
439 | mark_inode_dirty(inode); | 441 | mark_inode_dirty(inode); |
440 | out: | 442 | out: |
441 | return error; | 443 | return error; |
442 | } | 444 | } |
443 | EXPORT_SYMBOL_GPL(fat_setattr); | 445 | EXPORT_SYMBOL_GPL(fat_setattr); |
444 | 446 | ||
445 | const struct inode_operations fat_file_inode_operations = { | 447 | const struct inode_operations fat_file_inode_operations = { |
446 | .setattr = fat_setattr, | 448 | .setattr = fat_setattr, |
447 | .getattr = fat_getattr, | 449 | .getattr = fat_getattr, |
448 | }; | 450 | }; |
449 | 451 |
fs/gfs2/bmap.c
1 | /* | 1 | /* |
2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. |
4 | * | 4 | * |
5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
11 | #include <linux/completion.h> | 11 | #include <linux/completion.h> |
12 | #include <linux/buffer_head.h> | 12 | #include <linux/buffer_head.h> |
13 | #include <linux/gfs2_ondisk.h> | 13 | #include <linux/gfs2_ondisk.h> |
14 | #include <linux/crc32.h> | 14 | #include <linux/crc32.h> |
15 | 15 | ||
16 | #include "gfs2.h" | 16 | #include "gfs2.h" |
17 | #include "incore.h" | 17 | #include "incore.h" |
18 | #include "bmap.h" | 18 | #include "bmap.h" |
19 | #include "glock.h" | 19 | #include "glock.h" |
20 | #include "inode.h" | 20 | #include "inode.h" |
21 | #include "meta_io.h" | 21 | #include "meta_io.h" |
22 | #include "quota.h" | 22 | #include "quota.h" |
23 | #include "rgrp.h" | 23 | #include "rgrp.h" |
24 | #include "super.h" | 24 | #include "super.h" |
25 | #include "trans.h" | 25 | #include "trans.h" |
26 | #include "dir.h" | 26 | #include "dir.h" |
27 | #include "util.h" | 27 | #include "util.h" |
28 | #include "trace_gfs2.h" | 28 | #include "trace_gfs2.h" |
29 | 29 | ||
30 | /* This doesn't need to be that large as max 64 bit pointers in a 4k | 30 | /* This doesn't need to be that large as max 64 bit pointers in a 4k |
31 | * block is 512, so __u16 is fine for that. It saves stack space to | 31 | * block is 512, so __u16 is fine for that. It saves stack space to |
32 | * keep it small. | 32 | * keep it small. |
33 | */ | 33 | */ |
34 | struct metapath { | 34 | struct metapath { |
35 | struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; | 35 | struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; |
36 | __u16 mp_list[GFS2_MAX_META_HEIGHT]; | 36 | __u16 mp_list[GFS2_MAX_META_HEIGHT]; |
37 | }; | 37 | }; |
38 | 38 | ||
39 | typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh, | 39 | typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh, |
40 | struct buffer_head *bh, __be64 *top, | 40 | struct buffer_head *bh, __be64 *top, |
41 | __be64 *bottom, unsigned int height, | 41 | __be64 *bottom, unsigned int height, |
42 | void *data); | 42 | void *data); |
43 | 43 | ||
44 | struct strip_mine { | 44 | struct strip_mine { |
45 | int sm_first; | 45 | int sm_first; |
46 | unsigned int sm_height; | 46 | unsigned int sm_height; |
47 | }; | 47 | }; |
48 | 48 | ||
49 | /** | 49 | /** |
50 | * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page | 50 | * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page |
51 | * @ip: the inode | 51 | * @ip: the inode |
52 | * @dibh: the dinode buffer | 52 | * @dibh: the dinode buffer |
53 | * @block: the block number that was allocated | 53 | * @block: the block number that was allocated |
54 | * @page: The (optional) page. This is looked up if @page is NULL | 54 | * @page: The (optional) page. This is looked up if @page is NULL |
55 | * | 55 | * |
56 | * Returns: errno | 56 | * Returns: errno |
57 | */ | 57 | */ |
58 | 58 | ||
59 | static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | 59 | static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, |
60 | u64 block, struct page *page) | 60 | u64 block, struct page *page) |
61 | { | 61 | { |
62 | struct inode *inode = &ip->i_inode; | 62 | struct inode *inode = &ip->i_inode; |
63 | struct buffer_head *bh; | 63 | struct buffer_head *bh; |
64 | int release = 0; | 64 | int release = 0; |
65 | 65 | ||
66 | if (!page || page->index) { | 66 | if (!page || page->index) { |
67 | page = grab_cache_page(inode->i_mapping, 0); | 67 | page = grab_cache_page(inode->i_mapping, 0); |
68 | if (!page) | 68 | if (!page) |
69 | return -ENOMEM; | 69 | return -ENOMEM; |
70 | release = 1; | 70 | release = 1; |
71 | } | 71 | } |
72 | 72 | ||
73 | if (!PageUptodate(page)) { | 73 | if (!PageUptodate(page)) { |
74 | void *kaddr = kmap(page); | 74 | void *kaddr = kmap(page); |
75 | u64 dsize = i_size_read(inode); | 75 | u64 dsize = i_size_read(inode); |
76 | 76 | ||
77 | if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) | 77 | if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode))) |
78 | dsize = dibh->b_size - sizeof(struct gfs2_dinode); | 78 | dsize = dibh->b_size - sizeof(struct gfs2_dinode); |
79 | 79 | ||
80 | memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); | 80 | memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); |
81 | memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize); | 81 | memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize); |
82 | kunmap(page); | 82 | kunmap(page); |
83 | 83 | ||
84 | SetPageUptodate(page); | 84 | SetPageUptodate(page); |
85 | } | 85 | } |
86 | 86 | ||
87 | if (!page_has_buffers(page)) | 87 | if (!page_has_buffers(page)) |
88 | create_empty_buffers(page, 1 << inode->i_blkbits, | 88 | create_empty_buffers(page, 1 << inode->i_blkbits, |
89 | (1 << BH_Uptodate)); | 89 | (1 << BH_Uptodate)); |
90 | 90 | ||
91 | bh = page_buffers(page); | 91 | bh = page_buffers(page); |
92 | 92 | ||
93 | if (!buffer_mapped(bh)) | 93 | if (!buffer_mapped(bh)) |
94 | map_bh(bh, inode->i_sb, block); | 94 | map_bh(bh, inode->i_sb, block); |
95 | 95 | ||
96 | set_buffer_uptodate(bh); | 96 | set_buffer_uptodate(bh); |
97 | if (!gfs2_is_jdata(ip)) | 97 | if (!gfs2_is_jdata(ip)) |
98 | mark_buffer_dirty(bh); | 98 | mark_buffer_dirty(bh); |
99 | if (!gfs2_is_writeback(ip)) | 99 | if (!gfs2_is_writeback(ip)) |
100 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | 100 | gfs2_trans_add_bh(ip->i_gl, bh, 0); |
101 | 101 | ||
102 | if (release) { | 102 | if (release) { |
103 | unlock_page(page); | 103 | unlock_page(page); |
104 | page_cache_release(page); | 104 | page_cache_release(page); |
105 | } | 105 | } |
106 | 106 | ||
107 | return 0; | 107 | return 0; |
108 | } | 108 | } |
109 | 109 | ||
110 | /** | 110 | /** |
111 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big | 111 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big |
112 | * @ip: The GFS2 inode to unstuff | 112 | * @ip: The GFS2 inode to unstuff |
113 | * @page: The (optional) page. This is looked up if the @page is NULL | 113 | * @page: The (optional) page. This is looked up if the @page is NULL |
114 | * | 114 | * |
115 | * This routine unstuffs a dinode and returns it to a "normal" state such | 115 | * This routine unstuffs a dinode and returns it to a "normal" state such |
116 | * that the height can be grown in the traditional way. | 116 | * that the height can be grown in the traditional way. |
117 | * | 117 | * |
118 | * Returns: errno | 118 | * Returns: errno |
119 | */ | 119 | */ |
120 | 120 | ||
121 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | 121 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) |
122 | { | 122 | { |
123 | struct buffer_head *bh, *dibh; | 123 | struct buffer_head *bh, *dibh; |
124 | struct gfs2_dinode *di; | 124 | struct gfs2_dinode *di; |
125 | u64 block = 0; | 125 | u64 block = 0; |
126 | int isdir = gfs2_is_dir(ip); | 126 | int isdir = gfs2_is_dir(ip); |
127 | int error; | 127 | int error; |
128 | 128 | ||
129 | down_write(&ip->i_rw_mutex); | 129 | down_write(&ip->i_rw_mutex); |
130 | 130 | ||
131 | error = gfs2_meta_inode_buffer(ip, &dibh); | 131 | error = gfs2_meta_inode_buffer(ip, &dibh); |
132 | if (error) | 132 | if (error) |
133 | goto out; | 133 | goto out; |
134 | 134 | ||
135 | if (i_size_read(&ip->i_inode)) { | 135 | if (i_size_read(&ip->i_inode)) { |
136 | /* Get a free block, fill it with the stuffed data, | 136 | /* Get a free block, fill it with the stuffed data, |
137 | and write it out to disk */ | 137 | and write it out to disk */ |
138 | 138 | ||
139 | unsigned int n = 1; | 139 | unsigned int n = 1; |
140 | error = gfs2_alloc_block(ip, &block, &n); | 140 | error = gfs2_alloc_block(ip, &block, &n); |
141 | if (error) | 141 | if (error) |
142 | goto out_brelse; | 142 | goto out_brelse; |
143 | if (isdir) { | 143 | if (isdir) { |
144 | gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); | 144 | gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); |
145 | error = gfs2_dir_get_new_buffer(ip, block, &bh); | 145 | error = gfs2_dir_get_new_buffer(ip, block, &bh); |
146 | if (error) | 146 | if (error) |
147 | goto out_brelse; | 147 | goto out_brelse; |
148 | gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), | 148 | gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), |
149 | dibh, sizeof(struct gfs2_dinode)); | 149 | dibh, sizeof(struct gfs2_dinode)); |
150 | brelse(bh); | 150 | brelse(bh); |
151 | } else { | 151 | } else { |
152 | error = gfs2_unstuffer_page(ip, dibh, block, page); | 152 | error = gfs2_unstuffer_page(ip, dibh, block, page); |
153 | if (error) | 153 | if (error) |
154 | goto out_brelse; | 154 | goto out_brelse; |
155 | } | 155 | } |
156 | } | 156 | } |
157 | 157 | ||
158 | /* Set up the pointer to the new block */ | 158 | /* Set up the pointer to the new block */ |
159 | 159 | ||
160 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 160 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
161 | di = (struct gfs2_dinode *)dibh->b_data; | 161 | di = (struct gfs2_dinode *)dibh->b_data; |
162 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 162 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
163 | 163 | ||
164 | if (i_size_read(&ip->i_inode)) { | 164 | if (i_size_read(&ip->i_inode)) { |
165 | *(__be64 *)(di + 1) = cpu_to_be64(block); | 165 | *(__be64 *)(di + 1) = cpu_to_be64(block); |
166 | gfs2_add_inode_blocks(&ip->i_inode, 1); | 166 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
167 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); | 167 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
168 | } | 168 | } |
169 | 169 | ||
170 | ip->i_height = 1; | 170 | ip->i_height = 1; |
171 | di->di_height = cpu_to_be16(1); | 171 | di->di_height = cpu_to_be16(1); |
172 | 172 | ||
173 | out_brelse: | 173 | out_brelse: |
174 | brelse(dibh); | 174 | brelse(dibh); |
175 | out: | 175 | out: |
176 | up_write(&ip->i_rw_mutex); | 176 | up_write(&ip->i_rw_mutex); |
177 | return error; | 177 | return error; |
178 | } | 178 | } |
179 | 179 | ||
180 | 180 | ||
181 | /** | 181 | /** |
182 | * find_metapath - Find path through the metadata tree | 182 | * find_metapath - Find path through the metadata tree |
183 | * @sdp: The superblock | 183 | * @sdp: The superblock |
184 | * @mp: The metapath to return the result in | 184 | * @mp: The metapath to return the result in |
185 | * @block: The disk block to look up | 185 | * @block: The disk block to look up |
186 | * @height: The pre-calculated height of the metadata tree | 186 | * @height: The pre-calculated height of the metadata tree |
187 | * | 187 | * |
188 | * This routine returns a struct metapath structure that defines a path | 188 | * This routine returns a struct metapath structure that defines a path |
189 | * through the metadata of inode "ip" to get to block "block". | 189 | * through the metadata of inode "ip" to get to block "block". |
190 | * | 190 | * |
191 | * Example: | 191 | * Example: |
192 | * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a | 192 | * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a |
193 | * filesystem with a blocksize of 4096. | 193 | * filesystem with a blocksize of 4096. |
194 | * | 194 | * |
195 | * find_metapath() would return a struct metapath structure set to: | 195 | * find_metapath() would return a struct metapath structure set to: |
196 | * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48, | 196 | * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48, |
197 | * and mp_list[2] = 165. | 197 | * and mp_list[2] = 165. |
198 | * | 198 | * |
199 | * That means that in order to get to the block containing the byte at | 199 | * That means that in order to get to the block containing the byte at |
200 | * offset 101342453, we would load the indirect block pointed to by pointer | 200 | * offset 101342453, we would load the indirect block pointed to by pointer |
201 | * 0 in the dinode. We would then load the indirect block pointed to by | 201 | * 0 in the dinode. We would then load the indirect block pointed to by |
202 | * pointer 48 in that indirect block. We would then load the data block | 202 | * pointer 48 in that indirect block. We would then load the data block |
203 | * pointed to by pointer 165 in that indirect block. | 203 | * pointed to by pointer 165 in that indirect block. |
204 | * | 204 | * |
205 | * ---------------------------------------- | 205 | * ---------------------------------------- |
206 | * | Dinode | | | 206 | * | Dinode | | |
207 | * | | 4| | 207 | * | | 4| |
208 | * | |0 1 2 3 4 5 9| | 208 | * | |0 1 2 3 4 5 9| |
209 | * | | 6| | 209 | * | | 6| |
210 | * ---------------------------------------- | 210 | * ---------------------------------------- |
211 | * | | 211 | * | |
212 | * | | 212 | * | |
213 | * V | 213 | * V |
214 | * ---------------------------------------- | 214 | * ---------------------------------------- |
215 | * | Indirect Block | | 215 | * | Indirect Block | |
216 | * | 5| | 216 | * | 5| |
217 | * | 4 4 4 4 4 5 5 1| | 217 | * | 4 4 4 4 4 5 5 1| |
218 | * |0 5 6 7 8 9 0 1 2| | 218 | * |0 5 6 7 8 9 0 1 2| |
219 | * ---------------------------------------- | 219 | * ---------------------------------------- |
220 | * | | 220 | * | |
221 | * | | 221 | * | |
222 | * V | 222 | * V |
223 | * ---------------------------------------- | 223 | * ---------------------------------------- |
224 | * | Indirect Block | | 224 | * | Indirect Block | |
225 | * | 1 1 1 1 1 5| | 225 | * | 1 1 1 1 1 5| |
226 | * | 6 6 6 6 6 1| | 226 | * | 6 6 6 6 6 1| |
227 | * |0 3 4 5 6 7 2| | 227 | * |0 3 4 5 6 7 2| |
228 | * ---------------------------------------- | 228 | * ---------------------------------------- |
229 | * | | 229 | * | |
230 | * | | 230 | * | |
231 | * V | 231 | * V |
232 | * ---------------------------------------- | 232 | * ---------------------------------------- |
233 | * | Data block containing offset | | 233 | * | Data block containing offset | |
234 | * | 101342453 | | 234 | * | 101342453 | |
235 | * | | | 235 | * | | |
236 | * | | | 236 | * | | |
237 | * ---------------------------------------- | 237 | * ---------------------------------------- |
238 | * | 238 | * |
239 | */ | 239 | */ |
240 | 240 | ||
241 | static void find_metapath(const struct gfs2_sbd *sdp, u64 block, | 241 | static void find_metapath(const struct gfs2_sbd *sdp, u64 block, |
242 | struct metapath *mp, unsigned int height) | 242 | struct metapath *mp, unsigned int height) |
243 | { | 243 | { |
244 | unsigned int i; | 244 | unsigned int i; |
245 | 245 | ||
246 | for (i = height; i--;) | 246 | for (i = height; i--;) |
247 | mp->mp_list[i] = do_div(block, sdp->sd_inptrs); | 247 | mp->mp_list[i] = do_div(block, sdp->sd_inptrs); |
248 | 248 | ||
249 | } | 249 | } |
250 | 250 | ||
251 | static inline unsigned int metapath_branch_start(const struct metapath *mp) | 251 | static inline unsigned int metapath_branch_start(const struct metapath *mp) |
252 | { | 252 | { |
253 | if (mp->mp_list[0] == 0) | 253 | if (mp->mp_list[0] == 0) |
254 | return 2; | 254 | return 2; |
255 | return 1; | 255 | return 1; |
256 | } | 256 | } |
257 | 257 | ||
258 | /** | 258 | /** |
259 | * metapointer - Return pointer to start of metadata in a buffer | 259 | * metapointer - Return pointer to start of metadata in a buffer |
260 | * @height: The metadata height (0 = dinode) | 260 | * @height: The metadata height (0 = dinode) |
261 | * @mp: The metapath | 261 | * @mp: The metapath |
262 | * | 262 | * |
263 | * Return a pointer to the block number of the next height of the metadata | 263 | * Return a pointer to the block number of the next height of the metadata |
264 | * tree given a buffer containing the pointer to the current height of the | 264 | * tree given a buffer containing the pointer to the current height of the |
265 | * metadata tree. | 265 | * metadata tree. |
266 | */ | 266 | */ |
267 | 267 | ||
268 | static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) | 268 | static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) |
269 | { | 269 | { |
270 | struct buffer_head *bh = mp->mp_bh[height]; | 270 | struct buffer_head *bh = mp->mp_bh[height]; |
271 | unsigned int head_size = (height > 0) ? | 271 | unsigned int head_size = (height > 0) ? |
272 | sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); | 272 | sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); |
273 | return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; | 273 | return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; |
274 | } | 274 | } |
275 | 275 | ||
276 | /** | 276 | /** |
277 | * lookup_metapath - Walk the metadata tree to a specific point | 277 | * lookup_metapath - Walk the metadata tree to a specific point |
278 | * @ip: The inode | 278 | * @ip: The inode |
279 | * @mp: The metapath | 279 | * @mp: The metapath |
280 | * | 280 | * |
281 | * Assumes that the inode's buffer has already been looked up and | 281 | * Assumes that the inode's buffer has already been looked up and |
282 | * hooked onto mp->mp_bh[0] and that the metapath has been initialised | 282 | * hooked onto mp->mp_bh[0] and that the metapath has been initialised |
283 | * by find_metapath(). | 283 | * by find_metapath(). |
284 | * | 284 | * |
285 | * If this function encounters part of the tree which has not been | 285 | * If this function encounters part of the tree which has not been |
286 | * allocated, it returns the current height of the tree at the point | 286 | * allocated, it returns the current height of the tree at the point |
287 | * at which it found the unallocated block. Blocks which are found are | 287 | * at which it found the unallocated block. Blocks which are found are |
288 | * added to the mp->mp_bh[] list. | 288 | * added to the mp->mp_bh[] list. |
289 | * | 289 | * |
290 | * Returns: error or height of metadata tree | 290 | * Returns: error or height of metadata tree |
291 | */ | 291 | */ |
292 | 292 | ||
293 | static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) | 293 | static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) |
294 | { | 294 | { |
295 | unsigned int end_of_metadata = ip->i_height - 1; | 295 | unsigned int end_of_metadata = ip->i_height - 1; |
296 | unsigned int x; | 296 | unsigned int x; |
297 | __be64 *ptr; | 297 | __be64 *ptr; |
298 | u64 dblock; | 298 | u64 dblock; |
299 | int ret; | 299 | int ret; |
300 | 300 | ||
301 | for (x = 0; x < end_of_metadata; x++) { | 301 | for (x = 0; x < end_of_metadata; x++) { |
302 | ptr = metapointer(x, mp); | 302 | ptr = metapointer(x, mp); |
303 | dblock = be64_to_cpu(*ptr); | 303 | dblock = be64_to_cpu(*ptr); |
304 | if (!dblock) | 304 | if (!dblock) |
305 | return x + 1; | 305 | return x + 1; |
306 | 306 | ||
307 | ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, 0, &mp->mp_bh[x+1]); | 307 | ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, 0, &mp->mp_bh[x+1]); |
308 | if (ret) | 308 | if (ret) |
309 | return ret; | 309 | return ret; |
310 | } | 310 | } |
311 | 311 | ||
312 | return ip->i_height; | 312 | return ip->i_height; |
313 | } | 313 | } |
314 | 314 | ||
315 | static inline void release_metapath(struct metapath *mp) | 315 | static inline void release_metapath(struct metapath *mp) |
316 | { | 316 | { |
317 | int i; | 317 | int i; |
318 | 318 | ||
319 | for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { | 319 | for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { |
320 | if (mp->mp_bh[i] == NULL) | 320 | if (mp->mp_bh[i] == NULL) |
321 | break; | 321 | break; |
322 | brelse(mp->mp_bh[i]); | 322 | brelse(mp->mp_bh[i]); |
323 | } | 323 | } |
324 | } | 324 | } |
325 | 325 | ||
326 | /** | 326 | /** |
327 | * gfs2_extent_length - Returns length of an extent of blocks | 327 | * gfs2_extent_length - Returns length of an extent of blocks |
328 | * @start: Start of the buffer | 328 | * @start: Start of the buffer |
329 | * @len: Length of the buffer in bytes | 329 | * @len: Length of the buffer in bytes |
330 | * @ptr: Current position in the buffer | 330 | * @ptr: Current position in the buffer |
331 | * @limit: Max extent length to return (0 = unlimited) | 331 | * @limit: Max extent length to return (0 = unlimited) |
332 | * @eob: Set to 1 if we hit "end of block" | 332 | * @eob: Set to 1 if we hit "end of block" |
333 | * | 333 | * |
334 | * If the first block is zero (unallocated) it will return the number of | 334 | * If the first block is zero (unallocated) it will return the number of |
335 | * unallocated blocks in the extent, otherwise it will return the number | 335 | * unallocated blocks in the extent, otherwise it will return the number |
336 | * of contiguous blocks in the extent. | 336 | * of contiguous blocks in the extent. |
337 | * | 337 | * |
338 | * Returns: The length of the extent (minimum of one block) | 338 | * Returns: The length of the extent (minimum of one block) |
339 | */ | 339 | */ |
340 | 340 | ||
341 | static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob) | 341 | static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob) |
342 | { | 342 | { |
343 | const __be64 *end = (start + len); | 343 | const __be64 *end = (start + len); |
344 | const __be64 *first = ptr; | 344 | const __be64 *first = ptr; |
345 | u64 d = be64_to_cpu(*ptr); | 345 | u64 d = be64_to_cpu(*ptr); |
346 | 346 | ||
347 | *eob = 0; | 347 | *eob = 0; |
348 | do { | 348 | do { |
349 | ptr++; | 349 | ptr++; |
350 | if (ptr >= end) | 350 | if (ptr >= end) |
351 | break; | 351 | break; |
352 | if (limit && --limit == 0) | 352 | if (limit && --limit == 0) |
353 | break; | 353 | break; |
354 | if (d) | 354 | if (d) |
355 | d++; | 355 | d++; |
356 | } while(be64_to_cpu(*ptr) == d); | 356 | } while(be64_to_cpu(*ptr) == d); |
357 | if (ptr >= end) | 357 | if (ptr >= end) |
358 | *eob = 1; | 358 | *eob = 1; |
359 | return (ptr - first); | 359 | return (ptr - first); |
360 | } | 360 | } |
361 | 361 | ||
362 | static inline void bmap_lock(struct gfs2_inode *ip, int create) | 362 | static inline void bmap_lock(struct gfs2_inode *ip, int create) |
363 | { | 363 | { |
364 | if (create) | 364 | if (create) |
365 | down_write(&ip->i_rw_mutex); | 365 | down_write(&ip->i_rw_mutex); |
366 | else | 366 | else |
367 | down_read(&ip->i_rw_mutex); | 367 | down_read(&ip->i_rw_mutex); |
368 | } | 368 | } |
369 | 369 | ||
370 | static inline void bmap_unlock(struct gfs2_inode *ip, int create) | 370 | static inline void bmap_unlock(struct gfs2_inode *ip, int create) |
371 | { | 371 | { |
372 | if (create) | 372 | if (create) |
373 | up_write(&ip->i_rw_mutex); | 373 | up_write(&ip->i_rw_mutex); |
374 | else | 374 | else |
375 | up_read(&ip->i_rw_mutex); | 375 | up_read(&ip->i_rw_mutex); |
376 | } | 376 | } |
377 | 377 | ||
378 | static inline __be64 *gfs2_indirect_init(struct metapath *mp, | 378 | static inline __be64 *gfs2_indirect_init(struct metapath *mp, |
379 | struct gfs2_glock *gl, unsigned int i, | 379 | struct gfs2_glock *gl, unsigned int i, |
380 | unsigned offset, u64 bn) | 380 | unsigned offset, u64 bn) |
381 | { | 381 | { |
382 | __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + | 382 | __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + |
383 | ((i > 1) ? sizeof(struct gfs2_meta_header) : | 383 | ((i > 1) ? sizeof(struct gfs2_meta_header) : |
384 | sizeof(struct gfs2_dinode))); | 384 | sizeof(struct gfs2_dinode))); |
385 | BUG_ON(i < 1); | 385 | BUG_ON(i < 1); |
386 | BUG_ON(mp->mp_bh[i] != NULL); | 386 | BUG_ON(mp->mp_bh[i] != NULL); |
387 | mp->mp_bh[i] = gfs2_meta_new(gl, bn); | 387 | mp->mp_bh[i] = gfs2_meta_new(gl, bn); |
388 | gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); | 388 | gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); |
389 | gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); | 389 | gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); |
390 | gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); | 390 | gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); |
391 | ptr += offset; | 391 | ptr += offset; |
392 | *ptr = cpu_to_be64(bn); | 392 | *ptr = cpu_to_be64(bn); |
393 | return ptr; | 393 | return ptr; |
394 | } | 394 | } |
395 | 395 | ||
396 | enum alloc_state { | 396 | enum alloc_state { |
397 | ALLOC_DATA = 0, | 397 | ALLOC_DATA = 0, |
398 | ALLOC_GROW_DEPTH = 1, | 398 | ALLOC_GROW_DEPTH = 1, |
399 | ALLOC_GROW_HEIGHT = 2, | 399 | ALLOC_GROW_HEIGHT = 2, |
400 | /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ | 400 | /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ |
401 | }; | 401 | }; |
402 | 402 | ||
403 | /** | 403 | /** |
404 | * gfs2_bmap_alloc - Build a metadata tree of the requested height | 404 | * gfs2_bmap_alloc - Build a metadata tree of the requested height |
405 | * @inode: The GFS2 inode | 405 | * @inode: The GFS2 inode |
406 | * @lblock: The logical starting block of the extent | 406 | * @lblock: The logical starting block of the extent |
407 | * @bh_map: This is used to return the mapping details | 407 | * @bh_map: This is used to return the mapping details |
408 | * @mp: The metapath | 408 | * @mp: The metapath |
409 | * @sheight: The starting height (i.e. whats already mapped) | 409 | * @sheight: The starting height (i.e. whats already mapped) |
410 | * @height: The height to build to | 410 | * @height: The height to build to |
411 | * @maxlen: The max number of data blocks to alloc | 411 | * @maxlen: The max number of data blocks to alloc |
412 | * | 412 | * |
413 | * In this routine we may have to alloc: | 413 | * In this routine we may have to alloc: |
414 | * i) Indirect blocks to grow the metadata tree height | 414 | * i) Indirect blocks to grow the metadata tree height |
415 | * ii) Indirect blocks to fill in lower part of the metadata tree | 415 | * ii) Indirect blocks to fill in lower part of the metadata tree |
416 | * iii) Data blocks | 416 | * iii) Data blocks |
417 | * | 417 | * |
418 | * The function is in two parts. The first part works out the total | 418 | * The function is in two parts. The first part works out the total |
419 | * number of blocks which we need. The second part does the actual | 419 | * number of blocks which we need. The second part does the actual |
420 | * allocation asking for an extent at a time (if enough contiguous free | 420 | * allocation asking for an extent at a time (if enough contiguous free |
421 | * blocks are available, there will only be one request per bmap call) | 421 | * blocks are available, there will only be one request per bmap call) |
422 | * and uses the state machine to initialise the blocks in order. | 422 | * and uses the state machine to initialise the blocks in order. |
423 | * | 423 | * |
424 | * Returns: errno on error | 424 | * Returns: errno on error |
425 | */ | 425 | */ |
426 | 426 | ||
427 | static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | 427 | static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, |
428 | struct buffer_head *bh_map, struct metapath *mp, | 428 | struct buffer_head *bh_map, struct metapath *mp, |
429 | const unsigned int sheight, | 429 | const unsigned int sheight, |
430 | const unsigned int height, | 430 | const unsigned int height, |
431 | const unsigned int maxlen) | 431 | const unsigned int maxlen) |
432 | { | 432 | { |
433 | struct gfs2_inode *ip = GFS2_I(inode); | 433 | struct gfs2_inode *ip = GFS2_I(inode); |
434 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 434 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
435 | struct buffer_head *dibh = mp->mp_bh[0]; | 435 | struct buffer_head *dibh = mp->mp_bh[0]; |
436 | u64 bn, dblock = 0; | 436 | u64 bn, dblock = 0; |
437 | unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; | 437 | unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; |
438 | unsigned dblks = 0; | 438 | unsigned dblks = 0; |
439 | unsigned ptrs_per_blk; | 439 | unsigned ptrs_per_blk; |
440 | const unsigned end_of_metadata = height - 1; | 440 | const unsigned end_of_metadata = height - 1; |
441 | int eob = 0; | 441 | int eob = 0; |
442 | enum alloc_state state; | 442 | enum alloc_state state; |
443 | __be64 *ptr; | 443 | __be64 *ptr; |
444 | __be64 zero_bn = 0; | 444 | __be64 zero_bn = 0; |
445 | 445 | ||
446 | BUG_ON(sheight < 1); | 446 | BUG_ON(sheight < 1); |
447 | BUG_ON(dibh == NULL); | 447 | BUG_ON(dibh == NULL); |
448 | 448 | ||
449 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 449 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
450 | 450 | ||
451 | if (height == sheight) { | 451 | if (height == sheight) { |
452 | struct buffer_head *bh; | 452 | struct buffer_head *bh; |
453 | /* Bottom indirect block exists, find unalloced extent size */ | 453 | /* Bottom indirect block exists, find unalloced extent size */ |
454 | ptr = metapointer(end_of_metadata, mp); | 454 | ptr = metapointer(end_of_metadata, mp); |
455 | bh = mp->mp_bh[end_of_metadata]; | 455 | bh = mp->mp_bh[end_of_metadata]; |
456 | dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, | 456 | dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, |
457 | &eob); | 457 | &eob); |
458 | BUG_ON(dblks < 1); | 458 | BUG_ON(dblks < 1); |
459 | state = ALLOC_DATA; | 459 | state = ALLOC_DATA; |
460 | } else { | 460 | } else { |
461 | /* Need to allocate indirect blocks */ | 461 | /* Need to allocate indirect blocks */ |
462 | ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; | 462 | ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; |
463 | dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]); | 463 | dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]); |
464 | if (height == ip->i_height) { | 464 | if (height == ip->i_height) { |
465 | /* Writing into existing tree, extend tree down */ | 465 | /* Writing into existing tree, extend tree down */ |
466 | iblks = height - sheight; | 466 | iblks = height - sheight; |
467 | state = ALLOC_GROW_DEPTH; | 467 | state = ALLOC_GROW_DEPTH; |
468 | } else { | 468 | } else { |
469 | /* Building up tree height */ | 469 | /* Building up tree height */ |
470 | state = ALLOC_GROW_HEIGHT; | 470 | state = ALLOC_GROW_HEIGHT; |
471 | iblks = height - ip->i_height; | 471 | iblks = height - ip->i_height; |
472 | branch_start = metapath_branch_start(mp); | 472 | branch_start = metapath_branch_start(mp); |
473 | iblks += (height - branch_start); | 473 | iblks += (height - branch_start); |
474 | } | 474 | } |
475 | } | 475 | } |
476 | 476 | ||
477 | /* start of the second part of the function (state machine) */ | 477 | /* start of the second part of the function (state machine) */ |
478 | 478 | ||
479 | blks = dblks + iblks; | 479 | blks = dblks + iblks; |
480 | i = sheight; | 480 | i = sheight; |
481 | do { | 481 | do { |
482 | int error; | 482 | int error; |
483 | n = blks - alloced; | 483 | n = blks - alloced; |
484 | error = gfs2_alloc_block(ip, &bn, &n); | 484 | error = gfs2_alloc_block(ip, &bn, &n); |
485 | if (error) | 485 | if (error) |
486 | return error; | 486 | return error; |
487 | alloced += n; | 487 | alloced += n; |
488 | if (state != ALLOC_DATA || gfs2_is_jdata(ip)) | 488 | if (state != ALLOC_DATA || gfs2_is_jdata(ip)) |
489 | gfs2_trans_add_unrevoke(sdp, bn, n); | 489 | gfs2_trans_add_unrevoke(sdp, bn, n); |
490 | switch (state) { | 490 | switch (state) { |
491 | /* Growing height of tree */ | 491 | /* Growing height of tree */ |
492 | case ALLOC_GROW_HEIGHT: | 492 | case ALLOC_GROW_HEIGHT: |
493 | if (i == 1) { | 493 | if (i == 1) { |
494 | ptr = (__be64 *)(dibh->b_data + | 494 | ptr = (__be64 *)(dibh->b_data + |
495 | sizeof(struct gfs2_dinode)); | 495 | sizeof(struct gfs2_dinode)); |
496 | zero_bn = *ptr; | 496 | zero_bn = *ptr; |
497 | } | 497 | } |
498 | for (; i - 1 < height - ip->i_height && n > 0; i++, n--) | 498 | for (; i - 1 < height - ip->i_height && n > 0; i++, n--) |
499 | gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); | 499 | gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); |
500 | if (i - 1 == height - ip->i_height) { | 500 | if (i - 1 == height - ip->i_height) { |
501 | i--; | 501 | i--; |
502 | gfs2_buffer_copy_tail(mp->mp_bh[i], | 502 | gfs2_buffer_copy_tail(mp->mp_bh[i], |
503 | sizeof(struct gfs2_meta_header), | 503 | sizeof(struct gfs2_meta_header), |
504 | dibh, sizeof(struct gfs2_dinode)); | 504 | dibh, sizeof(struct gfs2_dinode)); |
505 | gfs2_buffer_clear_tail(dibh, | 505 | gfs2_buffer_clear_tail(dibh, |
506 | sizeof(struct gfs2_dinode) + | 506 | sizeof(struct gfs2_dinode) + |
507 | sizeof(__be64)); | 507 | sizeof(__be64)); |
508 | ptr = (__be64 *)(mp->mp_bh[i]->b_data + | 508 | ptr = (__be64 *)(mp->mp_bh[i]->b_data + |
509 | sizeof(struct gfs2_meta_header)); | 509 | sizeof(struct gfs2_meta_header)); |
510 | *ptr = zero_bn; | 510 | *ptr = zero_bn; |
511 | state = ALLOC_GROW_DEPTH; | 511 | state = ALLOC_GROW_DEPTH; |
512 | for(i = branch_start; i < height; i++) { | 512 | for(i = branch_start; i < height; i++) { |
513 | if (mp->mp_bh[i] == NULL) | 513 | if (mp->mp_bh[i] == NULL) |
514 | break; | 514 | break; |
515 | brelse(mp->mp_bh[i]); | 515 | brelse(mp->mp_bh[i]); |
516 | mp->mp_bh[i] = NULL; | 516 | mp->mp_bh[i] = NULL; |
517 | } | 517 | } |
518 | i = branch_start; | 518 | i = branch_start; |
519 | } | 519 | } |
520 | if (n == 0) | 520 | if (n == 0) |
521 | break; | 521 | break; |
522 | /* Branching from existing tree */ | 522 | /* Branching from existing tree */ |
523 | case ALLOC_GROW_DEPTH: | 523 | case ALLOC_GROW_DEPTH: |
524 | if (i > 1 && i < height) | 524 | if (i > 1 && i < height) |
525 | gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); | 525 | gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); |
526 | for (; i < height && n > 0; i++, n--) | 526 | for (; i < height && n > 0; i++, n--) |
527 | gfs2_indirect_init(mp, ip->i_gl, i, | 527 | gfs2_indirect_init(mp, ip->i_gl, i, |
528 | mp->mp_list[i-1], bn++); | 528 | mp->mp_list[i-1], bn++); |
529 | if (i == height) | 529 | if (i == height) |
530 | state = ALLOC_DATA; | 530 | state = ALLOC_DATA; |
531 | if (n == 0) | 531 | if (n == 0) |
532 | break; | 532 | break; |
533 | /* Tree complete, adding data blocks */ | 533 | /* Tree complete, adding data blocks */ |
534 | case ALLOC_DATA: | 534 | case ALLOC_DATA: |
535 | BUG_ON(n > dblks); | 535 | BUG_ON(n > dblks); |
536 | BUG_ON(mp->mp_bh[end_of_metadata] == NULL); | 536 | BUG_ON(mp->mp_bh[end_of_metadata] == NULL); |
537 | gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); | 537 | gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); |
538 | dblks = n; | 538 | dblks = n; |
539 | ptr = metapointer(end_of_metadata, mp); | 539 | ptr = metapointer(end_of_metadata, mp); |
540 | dblock = bn; | 540 | dblock = bn; |
541 | while (n-- > 0) | 541 | while (n-- > 0) |
542 | *ptr++ = cpu_to_be64(bn++); | 542 | *ptr++ = cpu_to_be64(bn++); |
543 | break; | 543 | break; |
544 | } | 544 | } |
545 | } while ((state != ALLOC_DATA) || !dblock); | 545 | } while ((state != ALLOC_DATA) || !dblock); |
546 | 546 | ||
547 | ip->i_height = height; | 547 | ip->i_height = height; |
548 | gfs2_add_inode_blocks(&ip->i_inode, alloced); | 548 | gfs2_add_inode_blocks(&ip->i_inode, alloced); |
549 | gfs2_dinode_out(ip, mp->mp_bh[0]->b_data); | 549 | gfs2_dinode_out(ip, mp->mp_bh[0]->b_data); |
550 | map_bh(bh_map, inode->i_sb, dblock); | 550 | map_bh(bh_map, inode->i_sb, dblock); |
551 | bh_map->b_size = dblks << inode->i_blkbits; | 551 | bh_map->b_size = dblks << inode->i_blkbits; |
552 | set_buffer_new(bh_map); | 552 | set_buffer_new(bh_map); |
553 | return 0; | 553 | return 0; |
554 | } | 554 | } |
555 | 555 | ||
556 | /** | 556 | /** |
557 | * gfs2_block_map - Map a block from an inode to a disk block | 557 | * gfs2_block_map - Map a block from an inode to a disk block |
558 | * @inode: The inode | 558 | * @inode: The inode |
559 | * @lblock: The logical block number | 559 | * @lblock: The logical block number |
560 | * @bh_map: The bh to be mapped | 560 | * @bh_map: The bh to be mapped |
561 | * @create: True if its ok to alloc blocks to satify the request | 561 | * @create: True if its ok to alloc blocks to satify the request |
562 | * | 562 | * |
563 | * Sets buffer_mapped() if successful, sets buffer_boundary() if a | 563 | * Sets buffer_mapped() if successful, sets buffer_boundary() if a |
564 | * read of metadata will be required before the next block can be | 564 | * read of metadata will be required before the next block can be |
565 | * mapped. Sets buffer_new() if new blocks were allocated. | 565 | * mapped. Sets buffer_new() if new blocks were allocated. |
566 | * | 566 | * |
567 | * Returns: errno | 567 | * Returns: errno |
568 | */ | 568 | */ |
569 | 569 | ||
570 | int gfs2_block_map(struct inode *inode, sector_t lblock, | 570 | int gfs2_block_map(struct inode *inode, sector_t lblock, |
571 | struct buffer_head *bh_map, int create) | 571 | struct buffer_head *bh_map, int create) |
572 | { | 572 | { |
573 | struct gfs2_inode *ip = GFS2_I(inode); | 573 | struct gfs2_inode *ip = GFS2_I(inode); |
574 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 574 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
575 | unsigned int bsize = sdp->sd_sb.sb_bsize; | 575 | unsigned int bsize = sdp->sd_sb.sb_bsize; |
576 | const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; | 576 | const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; |
577 | const u64 *arr = sdp->sd_heightsize; | 577 | const u64 *arr = sdp->sd_heightsize; |
578 | __be64 *ptr; | 578 | __be64 *ptr; |
579 | u64 size; | 579 | u64 size; |
580 | struct metapath mp; | 580 | struct metapath mp; |
581 | int ret; | 581 | int ret; |
582 | int eob; | 582 | int eob; |
583 | unsigned int len; | 583 | unsigned int len; |
584 | struct buffer_head *bh; | 584 | struct buffer_head *bh; |
585 | u8 height; | 585 | u8 height; |
586 | 586 | ||
587 | BUG_ON(maxlen == 0); | 587 | BUG_ON(maxlen == 0); |
588 | 588 | ||
589 | memset(mp.mp_bh, 0, sizeof(mp.mp_bh)); | 589 | memset(mp.mp_bh, 0, sizeof(mp.mp_bh)); |
590 | bmap_lock(ip, create); | 590 | bmap_lock(ip, create); |
591 | clear_buffer_mapped(bh_map); | 591 | clear_buffer_mapped(bh_map); |
592 | clear_buffer_new(bh_map); | 592 | clear_buffer_new(bh_map); |
593 | clear_buffer_boundary(bh_map); | 593 | clear_buffer_boundary(bh_map); |
594 | trace_gfs2_bmap(ip, bh_map, lblock, create, 1); | 594 | trace_gfs2_bmap(ip, bh_map, lblock, create, 1); |
595 | if (gfs2_is_dir(ip)) { | 595 | if (gfs2_is_dir(ip)) { |
596 | bsize = sdp->sd_jbsize; | 596 | bsize = sdp->sd_jbsize; |
597 | arr = sdp->sd_jheightsize; | 597 | arr = sdp->sd_jheightsize; |
598 | } | 598 | } |
599 | 599 | ||
600 | ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]); | 600 | ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]); |
601 | if (ret) | 601 | if (ret) |
602 | goto out; | 602 | goto out; |
603 | 603 | ||
604 | height = ip->i_height; | 604 | height = ip->i_height; |
605 | size = (lblock + 1) * bsize; | 605 | size = (lblock + 1) * bsize; |
606 | while (size > arr[height]) | 606 | while (size > arr[height]) |
607 | height++; | 607 | height++; |
608 | find_metapath(sdp, lblock, &mp, height); | 608 | find_metapath(sdp, lblock, &mp, height); |
609 | ret = 1; | 609 | ret = 1; |
610 | if (height > ip->i_height || gfs2_is_stuffed(ip)) | 610 | if (height > ip->i_height || gfs2_is_stuffed(ip)) |
611 | goto do_alloc; | 611 | goto do_alloc; |
612 | ret = lookup_metapath(ip, &mp); | 612 | ret = lookup_metapath(ip, &mp); |
613 | if (ret < 0) | 613 | if (ret < 0) |
614 | goto out; | 614 | goto out; |
615 | if (ret != ip->i_height) | 615 | if (ret != ip->i_height) |
616 | goto do_alloc; | 616 | goto do_alloc; |
617 | ptr = metapointer(ip->i_height - 1, &mp); | 617 | ptr = metapointer(ip->i_height - 1, &mp); |
618 | if (*ptr == 0) | 618 | if (*ptr == 0) |
619 | goto do_alloc; | 619 | goto do_alloc; |
620 | map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr)); | 620 | map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr)); |
621 | bh = mp.mp_bh[ip->i_height - 1]; | 621 | bh = mp.mp_bh[ip->i_height - 1]; |
622 | len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob); | 622 | len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob); |
623 | bh_map->b_size = (len << inode->i_blkbits); | 623 | bh_map->b_size = (len << inode->i_blkbits); |
624 | if (eob) | 624 | if (eob) |
625 | set_buffer_boundary(bh_map); | 625 | set_buffer_boundary(bh_map); |
626 | ret = 0; | 626 | ret = 0; |
627 | out: | 627 | out: |
628 | release_metapath(&mp); | 628 | release_metapath(&mp); |
629 | trace_gfs2_bmap(ip, bh_map, lblock, create, ret); | 629 | trace_gfs2_bmap(ip, bh_map, lblock, create, ret); |
630 | bmap_unlock(ip, create); | 630 | bmap_unlock(ip, create); |
631 | return ret; | 631 | return ret; |
632 | 632 | ||
633 | do_alloc: | 633 | do_alloc: |
634 | /* All allocations are done here, firstly check create flag */ | 634 | /* All allocations are done here, firstly check create flag */ |
635 | if (!create) { | 635 | if (!create) { |
636 | BUG_ON(gfs2_is_stuffed(ip)); | 636 | BUG_ON(gfs2_is_stuffed(ip)); |
637 | ret = 0; | 637 | ret = 0; |
638 | goto out; | 638 | goto out; |
639 | } | 639 | } |
640 | 640 | ||
641 | /* At this point ret is the tree depth of already allocated blocks */ | 641 | /* At this point ret is the tree depth of already allocated blocks */ |
642 | ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen); | 642 | ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen); |
643 | goto out; | 643 | goto out; |
644 | } | 644 | } |
645 | 645 | ||
646 | /* | 646 | /* |
647 | * Deprecated: do not use in new code | 647 | * Deprecated: do not use in new code |
648 | */ | 648 | */ |
649 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) | 649 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) |
650 | { | 650 | { |
651 | struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; | 651 | struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; |
652 | int ret; | 652 | int ret; |
653 | int create = *new; | 653 | int create = *new; |
654 | 654 | ||
655 | BUG_ON(!extlen); | 655 | BUG_ON(!extlen); |
656 | BUG_ON(!dblock); | 656 | BUG_ON(!dblock); |
657 | BUG_ON(!new); | 657 | BUG_ON(!new); |
658 | 658 | ||
659 | bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5)); | 659 | bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5)); |
660 | ret = gfs2_block_map(inode, lblock, &bh, create); | 660 | ret = gfs2_block_map(inode, lblock, &bh, create); |
661 | *extlen = bh.b_size >> inode->i_blkbits; | 661 | *extlen = bh.b_size >> inode->i_blkbits; |
662 | *dblock = bh.b_blocknr; | 662 | *dblock = bh.b_blocknr; |
663 | if (buffer_new(&bh)) | 663 | if (buffer_new(&bh)) |
664 | *new = 1; | 664 | *new = 1; |
665 | else | 665 | else |
666 | *new = 0; | 666 | *new = 0; |
667 | return ret; | 667 | return ret; |
668 | } | 668 | } |
669 | 669 | ||
670 | /** | 670 | /** |
671 | * recursive_scan - recursively scan through the end of a file | 671 | * recursive_scan - recursively scan through the end of a file |
672 | * @ip: the inode | 672 | * @ip: the inode |
673 | * @dibh: the dinode buffer | 673 | * @dibh: the dinode buffer |
674 | * @mp: the path through the metadata to the point to start | 674 | * @mp: the path through the metadata to the point to start |
675 | * @height: the height the recursion is at | 675 | * @height: the height the recursion is at |
676 | * @block: the indirect block to look at | 676 | * @block: the indirect block to look at |
677 | * @first: 1 if this is the first block | 677 | * @first: 1 if this is the first block |
678 | * @bc: the call to make for each piece of metadata | 678 | * @bc: the call to make for each piece of metadata |
679 | * @data: data opaque to this function to pass to @bc | 679 | * @data: data opaque to this function to pass to @bc |
680 | * | 680 | * |
681 | * When this is first called @height and @block should be zero and | 681 | * When this is first called @height and @block should be zero and |
682 | * @first should be 1. | 682 | * @first should be 1. |
683 | * | 683 | * |
684 | * Returns: errno | 684 | * Returns: errno |
685 | */ | 685 | */ |
686 | 686 | ||
687 | static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, | 687 | static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, |
688 | struct metapath *mp, unsigned int height, | 688 | struct metapath *mp, unsigned int height, |
689 | u64 block, int first, block_call_t bc, | 689 | u64 block, int first, block_call_t bc, |
690 | void *data) | 690 | void *data) |
691 | { | 691 | { |
692 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 692 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
693 | struct buffer_head *bh = NULL; | 693 | struct buffer_head *bh = NULL; |
694 | __be64 *top, *bottom; | 694 | __be64 *top, *bottom; |
695 | u64 bn; | 695 | u64 bn; |
696 | int error; | 696 | int error; |
697 | int mh_size = sizeof(struct gfs2_meta_header); | 697 | int mh_size = sizeof(struct gfs2_meta_header); |
698 | 698 | ||
699 | if (!height) { | 699 | if (!height) { |
700 | error = gfs2_meta_inode_buffer(ip, &bh); | 700 | error = gfs2_meta_inode_buffer(ip, &bh); |
701 | if (error) | 701 | if (error) |
702 | return error; | 702 | return error; |
703 | dibh = bh; | 703 | dibh = bh; |
704 | 704 | ||
705 | top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; | 705 | top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; |
706 | bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; | 706 | bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; |
707 | } else { | 707 | } else { |
708 | error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh); | 708 | error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh); |
709 | if (error) | 709 | if (error) |
710 | return error; | 710 | return error; |
711 | 711 | ||
712 | top = (__be64 *)(bh->b_data + mh_size) + | 712 | top = (__be64 *)(bh->b_data + mh_size) + |
713 | (first ? mp->mp_list[height] : 0); | 713 | (first ? mp->mp_list[height] : 0); |
714 | 714 | ||
715 | bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; | 715 | bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; |
716 | } | 716 | } |
717 | 717 | ||
718 | error = bc(ip, dibh, bh, top, bottom, height, data); | 718 | error = bc(ip, dibh, bh, top, bottom, height, data); |
719 | if (error) | 719 | if (error) |
720 | goto out; | 720 | goto out; |
721 | 721 | ||
722 | if (height < ip->i_height - 1) | 722 | if (height < ip->i_height - 1) |
723 | for (; top < bottom; top++, first = 0) { | 723 | for (; top < bottom; top++, first = 0) { |
724 | if (!*top) | 724 | if (!*top) |
725 | continue; | 725 | continue; |
726 | 726 | ||
727 | bn = be64_to_cpu(*top); | 727 | bn = be64_to_cpu(*top); |
728 | 728 | ||
729 | error = recursive_scan(ip, dibh, mp, height + 1, bn, | 729 | error = recursive_scan(ip, dibh, mp, height + 1, bn, |
730 | first, bc, data); | 730 | first, bc, data); |
731 | if (error) | 731 | if (error) |
732 | break; | 732 | break; |
733 | } | 733 | } |
734 | 734 | ||
735 | out: | 735 | out: |
736 | brelse(bh); | 736 | brelse(bh); |
737 | return error; | 737 | return error; |
738 | } | 738 | } |
739 | 739 | ||
740 | /** | 740 | /** |
741 | * do_strip - Look for a layer a particular layer of the file and strip it off | 741 | * do_strip - Look for a layer a particular layer of the file and strip it off |
742 | * @ip: the inode | 742 | * @ip: the inode |
743 | * @dibh: the dinode buffer | 743 | * @dibh: the dinode buffer |
744 | * @bh: A buffer of pointers | 744 | * @bh: A buffer of pointers |
745 | * @top: The first pointer in the buffer | 745 | * @top: The first pointer in the buffer |
746 | * @bottom: One more than the last pointer | 746 | * @bottom: One more than the last pointer |
747 | * @height: the height this buffer is at | 747 | * @height: the height this buffer is at |
748 | * @data: a pointer to a struct strip_mine | 748 | * @data: a pointer to a struct strip_mine |
749 | * | 749 | * |
750 | * Returns: errno | 750 | * Returns: errno |
751 | */ | 751 | */ |
752 | 752 | ||
753 | static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | 753 | static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, |
754 | struct buffer_head *bh, __be64 *top, __be64 *bottom, | 754 | struct buffer_head *bh, __be64 *top, __be64 *bottom, |
755 | unsigned int height, void *data) | 755 | unsigned int height, void *data) |
756 | { | 756 | { |
757 | struct strip_mine *sm = data; | 757 | struct strip_mine *sm = data; |
758 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 758 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
759 | struct gfs2_rgrp_list rlist; | 759 | struct gfs2_rgrp_list rlist; |
760 | u64 bn, bstart; | 760 | u64 bn, bstart; |
761 | u32 blen, btotal; | 761 | u32 blen, btotal; |
762 | __be64 *p; | 762 | __be64 *p; |
763 | unsigned int rg_blocks = 0; | 763 | unsigned int rg_blocks = 0; |
764 | int metadata; | 764 | int metadata; |
765 | unsigned int revokes = 0; | 765 | unsigned int revokes = 0; |
766 | int x; | 766 | int x; |
767 | int error = 0; | 767 | int error = 0; |
768 | 768 | ||
769 | if (!*top) | 769 | if (!*top) |
770 | sm->sm_first = 0; | 770 | sm->sm_first = 0; |
771 | 771 | ||
772 | if (height != sm->sm_height) | 772 | if (height != sm->sm_height) |
773 | return 0; | 773 | return 0; |
774 | 774 | ||
775 | if (sm->sm_first) { | 775 | if (sm->sm_first) { |
776 | top++; | 776 | top++; |
777 | sm->sm_first = 0; | 777 | sm->sm_first = 0; |
778 | } | 778 | } |
779 | 779 | ||
780 | metadata = (height != ip->i_height - 1); | 780 | metadata = (height != ip->i_height - 1); |
781 | if (metadata) | 781 | if (metadata) |
782 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; | 782 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; |
783 | else if (ip->i_depth) | 783 | else if (ip->i_depth) |
784 | revokes = sdp->sd_inptrs; | 784 | revokes = sdp->sd_inptrs; |
785 | 785 | ||
786 | if (ip != GFS2_I(sdp->sd_rindex)) | 786 | if (ip != GFS2_I(sdp->sd_rindex)) |
787 | error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); | 787 | error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); |
788 | else if (!sdp->sd_rgrps) | 788 | else if (!sdp->sd_rgrps) |
789 | error = gfs2_ri_update(ip); | 789 | error = gfs2_ri_update(ip); |
790 | 790 | ||
791 | if (error) | 791 | if (error) |
792 | return error; | 792 | return error; |
793 | 793 | ||
794 | memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); | 794 | memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); |
795 | bstart = 0; | 795 | bstart = 0; |
796 | blen = 0; | 796 | blen = 0; |
797 | 797 | ||
798 | for (p = top; p < bottom; p++) { | 798 | for (p = top; p < bottom; p++) { |
799 | if (!*p) | 799 | if (!*p) |
800 | continue; | 800 | continue; |
801 | 801 | ||
802 | bn = be64_to_cpu(*p); | 802 | bn = be64_to_cpu(*p); |
803 | 803 | ||
804 | if (bstart + blen == bn) | 804 | if (bstart + blen == bn) |
805 | blen++; | 805 | blen++; |
806 | else { | 806 | else { |
807 | if (bstart) | 807 | if (bstart) |
808 | gfs2_rlist_add(sdp, &rlist, bstart); | 808 | gfs2_rlist_add(sdp, &rlist, bstart); |
809 | 809 | ||
810 | bstart = bn; | 810 | bstart = bn; |
811 | blen = 1; | 811 | blen = 1; |
812 | } | 812 | } |
813 | } | 813 | } |
814 | 814 | ||
815 | if (bstart) | 815 | if (bstart) |
816 | gfs2_rlist_add(sdp, &rlist, bstart); | 816 | gfs2_rlist_add(sdp, &rlist, bstart); |
817 | else | 817 | else |
818 | goto out; /* Nothing to do */ | 818 | goto out; /* Nothing to do */ |
819 | 819 | ||
820 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); | 820 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); |
821 | 821 | ||
822 | for (x = 0; x < rlist.rl_rgrps; x++) { | 822 | for (x = 0; x < rlist.rl_rgrps; x++) { |
823 | struct gfs2_rgrpd *rgd; | 823 | struct gfs2_rgrpd *rgd; |
824 | rgd = rlist.rl_ghs[x].gh_gl->gl_object; | 824 | rgd = rlist.rl_ghs[x].gh_gl->gl_object; |
825 | rg_blocks += rgd->rd_length; | 825 | rg_blocks += rgd->rd_length; |
826 | } | 826 | } |
827 | 827 | ||
828 | error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); | 828 | error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); |
829 | if (error) | 829 | if (error) |
830 | goto out_rlist; | 830 | goto out_rlist; |
831 | 831 | ||
832 | error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + | 832 | error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + |
833 | RES_INDIRECT + RES_STATFS + RES_QUOTA, | 833 | RES_INDIRECT + RES_STATFS + RES_QUOTA, |
834 | revokes); | 834 | revokes); |
835 | if (error) | 835 | if (error) |
836 | goto out_rg_gunlock; | 836 | goto out_rg_gunlock; |
837 | 837 | ||
838 | down_write(&ip->i_rw_mutex); | 838 | down_write(&ip->i_rw_mutex); |
839 | 839 | ||
840 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 840 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
841 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 841 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
842 | 842 | ||
843 | bstart = 0; | 843 | bstart = 0; |
844 | blen = 0; | 844 | blen = 0; |
845 | btotal = 0; | 845 | btotal = 0; |
846 | 846 | ||
847 | for (p = top; p < bottom; p++) { | 847 | for (p = top; p < bottom; p++) { |
848 | if (!*p) | 848 | if (!*p) |
849 | continue; | 849 | continue; |
850 | 850 | ||
851 | bn = be64_to_cpu(*p); | 851 | bn = be64_to_cpu(*p); |
852 | 852 | ||
853 | if (bstart + blen == bn) | 853 | if (bstart + blen == bn) |
854 | blen++; | 854 | blen++; |
855 | else { | 855 | else { |
856 | if (bstart) { | 856 | if (bstart) { |
857 | if (metadata) | 857 | if (metadata) |
858 | __gfs2_free_meta(ip, bstart, blen); | 858 | __gfs2_free_meta(ip, bstart, blen); |
859 | else | 859 | else |
860 | __gfs2_free_data(ip, bstart, blen); | 860 | __gfs2_free_data(ip, bstart, blen); |
861 | 861 | ||
862 | btotal += blen; | 862 | btotal += blen; |
863 | } | 863 | } |
864 | 864 | ||
865 | bstart = bn; | 865 | bstart = bn; |
866 | blen = 1; | 866 | blen = 1; |
867 | } | 867 | } |
868 | 868 | ||
869 | *p = 0; | 869 | *p = 0; |
870 | gfs2_add_inode_blocks(&ip->i_inode, -1); | 870 | gfs2_add_inode_blocks(&ip->i_inode, -1); |
871 | } | 871 | } |
872 | if (bstart) { | 872 | if (bstart) { |
873 | if (metadata) | 873 | if (metadata) |
874 | __gfs2_free_meta(ip, bstart, blen); | 874 | __gfs2_free_meta(ip, bstart, blen); |
875 | else | 875 | else |
876 | __gfs2_free_data(ip, bstart, blen); | 876 | __gfs2_free_data(ip, bstart, blen); |
877 | 877 | ||
878 | btotal += blen; | 878 | btotal += blen; |
879 | } | 879 | } |
880 | 880 | ||
881 | gfs2_statfs_change(sdp, 0, +btotal, 0); | 881 | gfs2_statfs_change(sdp, 0, +btotal, 0); |
882 | gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, | 882 | gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, |
883 | ip->i_inode.i_gid); | 883 | ip->i_inode.i_gid); |
884 | 884 | ||
885 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 885 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
886 | 886 | ||
887 | gfs2_dinode_out(ip, dibh->b_data); | 887 | gfs2_dinode_out(ip, dibh->b_data); |
888 | 888 | ||
889 | up_write(&ip->i_rw_mutex); | 889 | up_write(&ip->i_rw_mutex); |
890 | 890 | ||
891 | gfs2_trans_end(sdp); | 891 | gfs2_trans_end(sdp); |
892 | 892 | ||
893 | out_rg_gunlock: | 893 | out_rg_gunlock: |
894 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); | 894 | gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); |
895 | out_rlist: | 895 | out_rlist: |
896 | gfs2_rlist_free(&rlist); | 896 | gfs2_rlist_free(&rlist); |
897 | out: | 897 | out: |
898 | if (ip != GFS2_I(sdp->sd_rindex)) | 898 | if (ip != GFS2_I(sdp->sd_rindex)) |
899 | gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); | 899 | gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); |
900 | return error; | 900 | return error; |
901 | } | 901 | } |
902 | 902 | ||
903 | /** | 903 | /** |
904 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate | 904 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate |
905 | * | 905 | * |
906 | * This is partly borrowed from ext3. | 906 | * This is partly borrowed from ext3. |
907 | */ | 907 | */ |
908 | static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) | 908 | static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) |
909 | { | 909 | { |
910 | struct inode *inode = mapping->host; | 910 | struct inode *inode = mapping->host; |
911 | struct gfs2_inode *ip = GFS2_I(inode); | 911 | struct gfs2_inode *ip = GFS2_I(inode); |
912 | unsigned long index = from >> PAGE_CACHE_SHIFT; | 912 | unsigned long index = from >> PAGE_CACHE_SHIFT; |
913 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 913 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
914 | unsigned blocksize, iblock, length, pos; | 914 | unsigned blocksize, iblock, length, pos; |
915 | struct buffer_head *bh; | 915 | struct buffer_head *bh; |
916 | struct page *page; | 916 | struct page *page; |
917 | int err; | 917 | int err; |
918 | 918 | ||
919 | page = grab_cache_page(mapping, index); | 919 | page = grab_cache_page(mapping, index); |
920 | if (!page) | 920 | if (!page) |
921 | return 0; | 921 | return 0; |
922 | 922 | ||
923 | blocksize = inode->i_sb->s_blocksize; | 923 | blocksize = inode->i_sb->s_blocksize; |
924 | length = blocksize - (offset & (blocksize - 1)); | 924 | length = blocksize - (offset & (blocksize - 1)); |
925 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 925 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
926 | 926 | ||
927 | if (!page_has_buffers(page)) | 927 | if (!page_has_buffers(page)) |
928 | create_empty_buffers(page, blocksize, 0); | 928 | create_empty_buffers(page, blocksize, 0); |
929 | 929 | ||
930 | /* Find the buffer that contains "offset" */ | 930 | /* Find the buffer that contains "offset" */ |
931 | bh = page_buffers(page); | 931 | bh = page_buffers(page); |
932 | pos = blocksize; | 932 | pos = blocksize; |
933 | while (offset >= pos) { | 933 | while (offset >= pos) { |
934 | bh = bh->b_this_page; | 934 | bh = bh->b_this_page; |
935 | iblock++; | 935 | iblock++; |
936 | pos += blocksize; | 936 | pos += blocksize; |
937 | } | 937 | } |
938 | 938 | ||
939 | err = 0; | 939 | err = 0; |
940 | 940 | ||
941 | if (!buffer_mapped(bh)) { | 941 | if (!buffer_mapped(bh)) { |
942 | gfs2_block_map(inode, iblock, bh, 0); | 942 | gfs2_block_map(inode, iblock, bh, 0); |
943 | /* unmapped? It's a hole - nothing to do */ | 943 | /* unmapped? It's a hole - nothing to do */ |
944 | if (!buffer_mapped(bh)) | 944 | if (!buffer_mapped(bh)) |
945 | goto unlock; | 945 | goto unlock; |
946 | } | 946 | } |
947 | 947 | ||
948 | /* Ok, it's mapped. Make sure it's up-to-date */ | 948 | /* Ok, it's mapped. Make sure it's up-to-date */ |
949 | if (PageUptodate(page)) | 949 | if (PageUptodate(page)) |
950 | set_buffer_uptodate(bh); | 950 | set_buffer_uptodate(bh); |
951 | 951 | ||
952 | if (!buffer_uptodate(bh)) { | 952 | if (!buffer_uptodate(bh)) { |
953 | err = -EIO; | 953 | err = -EIO; |
954 | ll_rw_block(READ, 1, &bh); | 954 | ll_rw_block(READ, 1, &bh); |
955 | wait_on_buffer(bh); | 955 | wait_on_buffer(bh); |
956 | /* Uhhuh. Read error. Complain and punt. */ | 956 | /* Uhhuh. Read error. Complain and punt. */ |
957 | if (!buffer_uptodate(bh)) | 957 | if (!buffer_uptodate(bh)) |
958 | goto unlock; | 958 | goto unlock; |
959 | err = 0; | 959 | err = 0; |
960 | } | 960 | } |
961 | 961 | ||
962 | if (!gfs2_is_writeback(ip)) | 962 | if (!gfs2_is_writeback(ip)) |
963 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | 963 | gfs2_trans_add_bh(ip->i_gl, bh, 0); |
964 | 964 | ||
965 | zero_user(page, offset, length); | 965 | zero_user(page, offset, length); |
966 | mark_buffer_dirty(bh); | 966 | mark_buffer_dirty(bh); |
967 | unlock: | 967 | unlock: |
968 | unlock_page(page); | 968 | unlock_page(page); |
969 | page_cache_release(page); | 969 | page_cache_release(page); |
970 | return err; | 970 | return err; |
971 | } | 971 | } |
972 | 972 | ||
973 | static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) | 973 | static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) |
974 | { | 974 | { |
975 | struct gfs2_inode *ip = GFS2_I(inode); | 975 | struct gfs2_inode *ip = GFS2_I(inode); |
976 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 976 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
977 | struct address_space *mapping = inode->i_mapping; | 977 | struct address_space *mapping = inode->i_mapping; |
978 | struct buffer_head *dibh; | 978 | struct buffer_head *dibh; |
979 | int journaled = gfs2_is_jdata(ip); | 979 | int journaled = gfs2_is_jdata(ip); |
980 | int error; | 980 | int error; |
981 | 981 | ||
982 | error = gfs2_trans_begin(sdp, | 982 | error = gfs2_trans_begin(sdp, |
983 | RES_DINODE + (journaled ? RES_JDATA : 0), 0); | 983 | RES_DINODE + (journaled ? RES_JDATA : 0), 0); |
984 | if (error) | 984 | if (error) |
985 | return error; | 985 | return error; |
986 | 986 | ||
987 | error = gfs2_meta_inode_buffer(ip, &dibh); | 987 | error = gfs2_meta_inode_buffer(ip, &dibh); |
988 | if (error) | 988 | if (error) |
989 | goto out; | 989 | goto out; |
990 | 990 | ||
991 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 991 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
992 | 992 | ||
993 | if (gfs2_is_stuffed(ip)) { | 993 | if (gfs2_is_stuffed(ip)) { |
994 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); | 994 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); |
995 | } else { | 995 | } else { |
996 | if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { | 996 | if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { |
997 | error = gfs2_block_truncate_page(mapping, newsize); | 997 | error = gfs2_block_truncate_page(mapping, newsize); |
998 | if (error) | 998 | if (error) |
999 | goto out_brelse; | 999 | goto out_brelse; |
1000 | } | 1000 | } |
1001 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; | 1001 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; |
1002 | } | 1002 | } |
1003 | 1003 | ||
1004 | i_size_write(inode, newsize); | 1004 | i_size_write(inode, newsize); |
1005 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1005 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
1006 | gfs2_dinode_out(ip, dibh->b_data); | 1006 | gfs2_dinode_out(ip, dibh->b_data); |
1007 | 1007 | ||
1008 | truncate_pagecache(inode, oldsize, newsize); | 1008 | truncate_pagecache(inode, oldsize, newsize); |
1009 | out_brelse: | 1009 | out_brelse: |
1010 | brelse(dibh); | 1010 | brelse(dibh); |
1011 | out: | 1011 | out: |
1012 | gfs2_trans_end(sdp); | 1012 | gfs2_trans_end(sdp); |
1013 | return error; | 1013 | return error; |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | static int trunc_dealloc(struct gfs2_inode *ip, u64 size) | 1016 | static int trunc_dealloc(struct gfs2_inode *ip, u64 size) |
1017 | { | 1017 | { |
1018 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1018 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1019 | unsigned int height = ip->i_height; | 1019 | unsigned int height = ip->i_height; |
1020 | u64 lblock; | 1020 | u64 lblock; |
1021 | struct metapath mp; | 1021 | struct metapath mp; |
1022 | int error; | 1022 | int error; |
1023 | 1023 | ||
1024 | if (!size) | 1024 | if (!size) |
1025 | lblock = 0; | 1025 | lblock = 0; |
1026 | else | 1026 | else |
1027 | lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; | 1027 | lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; |
1028 | 1028 | ||
1029 | find_metapath(sdp, lblock, &mp, ip->i_height); | 1029 | find_metapath(sdp, lblock, &mp, ip->i_height); |
1030 | if (!gfs2_alloc_get(ip)) | 1030 | if (!gfs2_alloc_get(ip)) |
1031 | return -ENOMEM; | 1031 | return -ENOMEM; |
1032 | 1032 | ||
1033 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 1033 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
1034 | if (error) | 1034 | if (error) |
1035 | goto out; | 1035 | goto out; |
1036 | 1036 | ||
1037 | while (height--) { | 1037 | while (height--) { |
1038 | struct strip_mine sm; | 1038 | struct strip_mine sm; |
1039 | sm.sm_first = !!size; | 1039 | sm.sm_first = !!size; |
1040 | sm.sm_height = height; | 1040 | sm.sm_height = height; |
1041 | 1041 | ||
1042 | error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm); | 1042 | error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm); |
1043 | if (error) | 1043 | if (error) |
1044 | break; | 1044 | break; |
1045 | } | 1045 | } |
1046 | 1046 | ||
1047 | gfs2_quota_unhold(ip); | 1047 | gfs2_quota_unhold(ip); |
1048 | 1048 | ||
1049 | out: | 1049 | out: |
1050 | gfs2_alloc_put(ip); | 1050 | gfs2_alloc_put(ip); |
1051 | return error; | 1051 | return error; |
1052 | } | 1052 | } |
1053 | 1053 | ||
1054 | static int trunc_end(struct gfs2_inode *ip) | 1054 | static int trunc_end(struct gfs2_inode *ip) |
1055 | { | 1055 | { |
1056 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1056 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1057 | struct buffer_head *dibh; | 1057 | struct buffer_head *dibh; |
1058 | int error; | 1058 | int error; |
1059 | 1059 | ||
1060 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | 1060 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); |
1061 | if (error) | 1061 | if (error) |
1062 | return error; | 1062 | return error; |
1063 | 1063 | ||
1064 | down_write(&ip->i_rw_mutex); | 1064 | down_write(&ip->i_rw_mutex); |
1065 | 1065 | ||
1066 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1066 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1067 | if (error) | 1067 | if (error) |
1068 | goto out; | 1068 | goto out; |
1069 | 1069 | ||
1070 | if (!i_size_read(&ip->i_inode)) { | 1070 | if (!i_size_read(&ip->i_inode)) { |
1071 | ip->i_height = 0; | 1071 | ip->i_height = 0; |
1072 | ip->i_goal = ip->i_no_addr; | 1072 | ip->i_goal = ip->i_no_addr; |
1073 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 1073 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
1074 | } | 1074 | } |
1075 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1075 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
1076 | ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; | 1076 | ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; |
1077 | 1077 | ||
1078 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1078 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
1079 | gfs2_dinode_out(ip, dibh->b_data); | 1079 | gfs2_dinode_out(ip, dibh->b_data); |
1080 | brelse(dibh); | 1080 | brelse(dibh); |
1081 | 1081 | ||
1082 | out: | 1082 | out: |
1083 | up_write(&ip->i_rw_mutex); | 1083 | up_write(&ip->i_rw_mutex); |
1084 | gfs2_trans_end(sdp); | 1084 | gfs2_trans_end(sdp); |
1085 | return error; | 1085 | return error; |
1086 | } | 1086 | } |
1087 | 1087 | ||
1088 | /** | 1088 | /** |
1089 | * do_shrink - make a file smaller | 1089 | * do_shrink - make a file smaller |
1090 | * @inode: the inode | 1090 | * @inode: the inode |
1091 | * @oldsize: the current inode size | 1091 | * @oldsize: the current inode size |
1092 | * @newsize: the size to make the file | 1092 | * @newsize: the size to make the file |
1093 | * | 1093 | * |
1094 | * Called with an exclusive lock on @inode. The @size must | 1094 | * Called with an exclusive lock on @inode. The @size must |
1095 | * be equal to or smaller than the current inode size. | 1095 | * be equal to or smaller than the current inode size. |
1096 | * | 1096 | * |
1097 | * Returns: errno | 1097 | * Returns: errno |
1098 | */ | 1098 | */ |
1099 | 1099 | ||
1100 | static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) | 1100 | static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) |
1101 | { | 1101 | { |
1102 | struct gfs2_inode *ip = GFS2_I(inode); | 1102 | struct gfs2_inode *ip = GFS2_I(inode); |
1103 | int error; | 1103 | int error; |
1104 | 1104 | ||
1105 | error = trunc_start(inode, oldsize, newsize); | 1105 | error = trunc_start(inode, oldsize, newsize); |
1106 | if (error < 0) | 1106 | if (error < 0) |
1107 | return error; | 1107 | return error; |
1108 | if (gfs2_is_stuffed(ip)) | 1108 | if (gfs2_is_stuffed(ip)) |
1109 | return 0; | 1109 | return 0; |
1110 | 1110 | ||
1111 | error = trunc_dealloc(ip, newsize); | 1111 | error = trunc_dealloc(ip, newsize); |
1112 | if (error == 0) | 1112 | if (error == 0) |
1113 | error = trunc_end(ip); | 1113 | error = trunc_end(ip); |
1114 | 1114 | ||
1115 | return error; | 1115 | return error; |
1116 | } | 1116 | } |
1117 | 1117 | ||
1118 | void gfs2_trim_blocks(struct inode *inode) | 1118 | void gfs2_trim_blocks(struct inode *inode) |
1119 | { | 1119 | { |
1120 | u64 size = inode->i_size; | 1120 | u64 size = inode->i_size; |
1121 | int ret; | 1121 | int ret; |
1122 | 1122 | ||
1123 | ret = do_shrink(inode, size, size); | 1123 | ret = do_shrink(inode, size, size); |
1124 | WARN_ON(ret != 0); | 1124 | WARN_ON(ret != 0); |
1125 | } | 1125 | } |
1126 | 1126 | ||
1127 | /** | 1127 | /** |
1128 | * do_grow - Touch and update inode size | 1128 | * do_grow - Touch and update inode size |
1129 | * @inode: The inode | 1129 | * @inode: The inode |
1130 | * @size: The new size | 1130 | * @size: The new size |
1131 | * | 1131 | * |
1132 | * This function updates the timestamps on the inode and | 1132 | * This function updates the timestamps on the inode and |
1133 | * may also increase the size of the inode. This function | 1133 | * may also increase the size of the inode. This function |
1134 | * must not be called with @size any smaller than the current | 1134 | * must not be called with @size any smaller than the current |
1135 | * inode size. | 1135 | * inode size. |
1136 | * | 1136 | * |
1137 | * Although it is not strictly required to unstuff files here, | 1137 | * Although it is not strictly required to unstuff files here, |
1138 | * earlier versions of GFS2 have a bug in the stuffed file reading | 1138 | * earlier versions of GFS2 have a bug in the stuffed file reading |
1139 | * code which will result in a buffer overrun if the size is larger | 1139 | * code which will result in a buffer overrun if the size is larger |
1140 | * than the max stuffed file size. In order to prevent this from | 1140 | * than the max stuffed file size. In order to prevent this from |
1141 | * occurring, such files are unstuffed, but in other cases we can | 1141 | * occurring, such files are unstuffed, but in other cases we can |
1142 | * just update the inode size directly. | 1142 | * just update the inode size directly. |
1143 | * | 1143 | * |
1144 | * Returns: 0 on success, or -ve on error | 1144 | * Returns: 0 on success, or -ve on error |
1145 | */ | 1145 | */ |
1146 | 1146 | ||
1147 | static int do_grow(struct inode *inode, u64 size) | 1147 | static int do_grow(struct inode *inode, u64 size) |
1148 | { | 1148 | { |
1149 | struct gfs2_inode *ip = GFS2_I(inode); | 1149 | struct gfs2_inode *ip = GFS2_I(inode); |
1150 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 1150 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
1151 | struct buffer_head *dibh; | 1151 | struct buffer_head *dibh; |
1152 | struct gfs2_alloc *al = NULL; | 1152 | struct gfs2_alloc *al = NULL; |
1153 | int error; | 1153 | int error; |
1154 | 1154 | ||
1155 | if (gfs2_is_stuffed(ip) && | 1155 | if (gfs2_is_stuffed(ip) && |
1156 | (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { | 1156 | (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { |
1157 | al = gfs2_alloc_get(ip); | 1157 | al = gfs2_alloc_get(ip); |
1158 | if (al == NULL) | 1158 | if (al == NULL) |
1159 | return -ENOMEM; | 1159 | return -ENOMEM; |
1160 | 1160 | ||
1161 | error = gfs2_quota_lock_check(ip); | 1161 | error = gfs2_quota_lock_check(ip); |
1162 | if (error) | 1162 | if (error) |
1163 | goto do_grow_alloc_put; | 1163 | goto do_grow_alloc_put; |
1164 | 1164 | ||
1165 | al->al_requested = 1; | 1165 | al->al_requested = 1; |
1166 | error = gfs2_inplace_reserve(ip); | 1166 | error = gfs2_inplace_reserve(ip); |
1167 | if (error) | 1167 | if (error) |
1168 | goto do_grow_qunlock; | 1168 | goto do_grow_qunlock; |
1169 | } | 1169 | } |
1170 | 1170 | ||
1171 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); | 1171 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); |
1172 | if (error) | 1172 | if (error) |
1173 | goto do_grow_release; | 1173 | goto do_grow_release; |
1174 | 1174 | ||
1175 | if (al) { | 1175 | if (al) { |
1176 | error = gfs2_unstuff_dinode(ip, NULL); | 1176 | error = gfs2_unstuff_dinode(ip, NULL); |
1177 | if (error) | 1177 | if (error) |
1178 | goto do_end_trans; | 1178 | goto do_end_trans; |
1179 | } | 1179 | } |
1180 | 1180 | ||
1181 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1181 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1182 | if (error) | 1182 | if (error) |
1183 | goto do_end_trans; | 1183 | goto do_end_trans; |
1184 | 1184 | ||
1185 | i_size_write(inode, size); | 1185 | i_size_write(inode, size); |
1186 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1186 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
1187 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1187 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
1188 | gfs2_dinode_out(ip, dibh->b_data); | 1188 | gfs2_dinode_out(ip, dibh->b_data); |
1189 | brelse(dibh); | 1189 | brelse(dibh); |
1190 | 1190 | ||
1191 | do_end_trans: | 1191 | do_end_trans: |
1192 | gfs2_trans_end(sdp); | 1192 | gfs2_trans_end(sdp); |
1193 | do_grow_release: | 1193 | do_grow_release: |
1194 | if (al) { | 1194 | if (al) { |
1195 | gfs2_inplace_release(ip); | 1195 | gfs2_inplace_release(ip); |
1196 | do_grow_qunlock: | 1196 | do_grow_qunlock: |
1197 | gfs2_quota_unlock(ip); | 1197 | gfs2_quota_unlock(ip); |
1198 | do_grow_alloc_put: | 1198 | do_grow_alloc_put: |
1199 | gfs2_alloc_put(ip); | 1199 | gfs2_alloc_put(ip); |
1200 | } | 1200 | } |
1201 | return error; | 1201 | return error; |
1202 | } | 1202 | } |
1203 | 1203 | ||
1204 | /** | 1204 | /** |
1205 | * gfs2_setattr_size - make a file a given size | 1205 | * gfs2_setattr_size - make a file a given size |
1206 | * @inode: the inode | 1206 | * @inode: the inode |
1207 | * @newsize: the size to make the file | 1207 | * @newsize: the size to make the file |
1208 | * | 1208 | * |
1209 | * The file size can grow, shrink, or stay the same size. This | 1209 | * The file size can grow, shrink, or stay the same size. This |
1210 | * is called holding i_mutex and an exclusive glock on the inode | 1210 | * is called holding i_mutex and an exclusive glock on the inode |
1211 | * in question. | 1211 | * in question. |
1212 | * | 1212 | * |
1213 | * Returns: errno | 1213 | * Returns: errno |
1214 | */ | 1214 | */ |
1215 | 1215 | ||
1216 | int gfs2_setattr_size(struct inode *inode, u64 newsize) | 1216 | int gfs2_setattr_size(struct inode *inode, u64 newsize) |
1217 | { | 1217 | { |
1218 | int ret; | 1218 | int ret; |
1219 | u64 oldsize; | 1219 | u64 oldsize; |
1220 | 1220 | ||
1221 | BUG_ON(!S_ISREG(inode->i_mode)); | 1221 | BUG_ON(!S_ISREG(inode->i_mode)); |
1222 | 1222 | ||
1223 | ret = inode_newsize_ok(inode, newsize); | 1223 | ret = inode_newsize_ok(inode, newsize); |
1224 | if (ret) | 1224 | if (ret) |
1225 | return ret; | 1225 | return ret; |
1226 | 1226 | ||
1227 | inode_dio_wait(inode); | ||
1228 | |||
1227 | oldsize = inode->i_size; | 1229 | oldsize = inode->i_size; |
1228 | if (newsize >= oldsize) | 1230 | if (newsize >= oldsize) |
1229 | return do_grow(inode, newsize); | 1231 | return do_grow(inode, newsize); |
1230 | 1232 | ||
1231 | return do_shrink(inode, oldsize, newsize); | 1233 | return do_shrink(inode, oldsize, newsize); |
1232 | } | 1234 | } |
1233 | 1235 | ||
1234 | int gfs2_truncatei_resume(struct gfs2_inode *ip) | 1236 | int gfs2_truncatei_resume(struct gfs2_inode *ip) |
1235 | { | 1237 | { |
1236 | int error; | 1238 | int error; |
1237 | error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); | 1239 | error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); |
1238 | if (!error) | 1240 | if (!error) |
1239 | error = trunc_end(ip); | 1241 | error = trunc_end(ip); |
1240 | return error; | 1242 | return error; |
1241 | } | 1243 | } |
1242 | 1244 | ||
1243 | int gfs2_file_dealloc(struct gfs2_inode *ip) | 1245 | int gfs2_file_dealloc(struct gfs2_inode *ip) |
1244 | { | 1246 | { |
1245 | return trunc_dealloc(ip, 0); | 1247 | return trunc_dealloc(ip, 0); |
1246 | } | 1248 | } |
1247 | 1249 | ||
1248 | /** | 1250 | /** |
1249 | * gfs2_write_alloc_required - figure out if a write will require an allocation | 1251 | * gfs2_write_alloc_required - figure out if a write will require an allocation |
1250 | * @ip: the file being written to | 1252 | * @ip: the file being written to |
1251 | * @offset: the offset to write to | 1253 | * @offset: the offset to write to |
1252 | * @len: the number of bytes being written | 1254 | * @len: the number of bytes being written |
1253 | * | 1255 | * |
1254 | * Returns: 1 if an alloc is required, 0 otherwise | 1256 | * Returns: 1 if an alloc is required, 0 otherwise |
1255 | */ | 1257 | */ |
1256 | 1258 | ||
1257 | int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | 1259 | int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, |
1258 | unsigned int len) | 1260 | unsigned int len) |
1259 | { | 1261 | { |
1260 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1262 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1261 | struct buffer_head bh; | 1263 | struct buffer_head bh; |
1262 | unsigned int shift; | 1264 | unsigned int shift; |
1263 | u64 lblock, lblock_stop, size; | 1265 | u64 lblock, lblock_stop, size; |
1264 | u64 end_of_file; | 1266 | u64 end_of_file; |
1265 | 1267 | ||
1266 | if (!len) | 1268 | if (!len) |
1267 | return 0; | 1269 | return 0; |
1268 | 1270 | ||
1269 | if (gfs2_is_stuffed(ip)) { | 1271 | if (gfs2_is_stuffed(ip)) { |
1270 | if (offset + len > | 1272 | if (offset + len > |
1271 | sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) | 1273 | sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) |
1272 | return 1; | 1274 | return 1; |
1273 | return 0; | 1275 | return 0; |
1274 | } | 1276 | } |
1275 | 1277 | ||
1276 | shift = sdp->sd_sb.sb_bsize_shift; | 1278 | shift = sdp->sd_sb.sb_bsize_shift; |
1277 | BUG_ON(gfs2_is_dir(ip)); | 1279 | BUG_ON(gfs2_is_dir(ip)); |
1278 | end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; | 1280 | end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; |
1279 | lblock = offset >> shift; | 1281 | lblock = offset >> shift; |
1280 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | 1282 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; |
1281 | if (lblock_stop > end_of_file) | 1283 | if (lblock_stop > end_of_file) |
1282 | return 1; | 1284 | return 1; |
1283 | 1285 | ||
1284 | size = (lblock_stop - lblock) << shift; | 1286 | size = (lblock_stop - lblock) << shift; |
1285 | do { | 1287 | do { |
1286 | bh.b_state = 0; | 1288 | bh.b_state = 0; |
1287 | bh.b_size = size; | 1289 | bh.b_size = size; |
1288 | gfs2_block_map(&ip->i_inode, lblock, &bh, 0); | 1290 | gfs2_block_map(&ip->i_inode, lblock, &bh, 0); |
1289 | if (!buffer_mapped(&bh)) | 1291 | if (!buffer_mapped(&bh)) |
1290 | return 1; | 1292 | return 1; |
1291 | size -= bh.b_size; | 1293 | size -= bh.b_size; |
1292 | lblock += (bh.b_size >> ip->i_inode.i_blkbits); | 1294 | lblock += (bh.b_size >> ip->i_inode.i_blkbits); |
1293 | } while(size > 0); | 1295 | } while(size > 0); |
1294 | 1296 | ||
1295 | return 0; | 1297 | return 0; |
1296 | } | 1298 | } |
1297 | 1299 | ||
1298 | 1300 |
fs/hfs/inode.c
1 | /* | 1 | /* |
2 | * linux/fs/hfs/inode.c | 2 | * linux/fs/hfs/inode.c |
3 | * | 3 | * |
4 | * Copyright (C) 1995-1997 Paul H. Hargrove | 4 | * Copyright (C) 1995-1997 Paul H. Hargrove |
5 | * (C) 2003 Ardis Technologies <roman@ardistech.com> | 5 | * (C) 2003 Ardis Technologies <roman@ardistech.com> |
6 | * This file may be distributed under the terms of the GNU General Public License. | 6 | * This file may be distributed under the terms of the GNU General Public License. |
7 | * | 7 | * |
8 | * This file contains inode-related functions which do not depend on | 8 | * This file contains inode-related functions which do not depend on |
9 | * which scheme is being used to represent forks. | 9 | * which scheme is being used to represent forks. |
10 | * | 10 | * |
11 | * Based on the minix file system code, (C) 1991, 1992 by Linus Torvalds | 11 | * Based on the minix file system code, (C) 1991, 1992 by Linus Torvalds |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
15 | #include <linux/mpage.h> | 15 | #include <linux/mpage.h> |
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | 17 | ||
18 | #include "hfs_fs.h" | 18 | #include "hfs_fs.h" |
19 | #include "btree.h" | 19 | #include "btree.h" |
20 | 20 | ||
21 | static const struct file_operations hfs_file_operations; | 21 | static const struct file_operations hfs_file_operations; |
22 | static const struct inode_operations hfs_file_inode_operations; | 22 | static const struct inode_operations hfs_file_inode_operations; |
23 | 23 | ||
24 | /*================ Variable-like macros ================*/ | 24 | /*================ Variable-like macros ================*/ |
25 | 25 | ||
26 | #define HFS_VALID_MODE_BITS (S_IFREG | S_IFDIR | S_IRWXUGO) | 26 | #define HFS_VALID_MODE_BITS (S_IFREG | S_IFDIR | S_IRWXUGO) |
27 | 27 | ||
28 | static int hfs_writepage(struct page *page, struct writeback_control *wbc) | 28 | static int hfs_writepage(struct page *page, struct writeback_control *wbc) |
29 | { | 29 | { |
30 | return block_write_full_page(page, hfs_get_block, wbc); | 30 | return block_write_full_page(page, hfs_get_block, wbc); |
31 | } | 31 | } |
32 | 32 | ||
33 | static int hfs_readpage(struct file *file, struct page *page) | 33 | static int hfs_readpage(struct file *file, struct page *page) |
34 | { | 34 | { |
35 | return block_read_full_page(page, hfs_get_block); | 35 | return block_read_full_page(page, hfs_get_block); |
36 | } | 36 | } |
37 | 37 | ||
38 | static int hfs_write_begin(struct file *file, struct address_space *mapping, | 38 | static int hfs_write_begin(struct file *file, struct address_space *mapping, |
39 | loff_t pos, unsigned len, unsigned flags, | 39 | loff_t pos, unsigned len, unsigned flags, |
40 | struct page **pagep, void **fsdata) | 40 | struct page **pagep, void **fsdata) |
41 | { | 41 | { |
42 | int ret; | 42 | int ret; |
43 | 43 | ||
44 | *pagep = NULL; | 44 | *pagep = NULL; |
45 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 45 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
46 | hfs_get_block, | 46 | hfs_get_block, |
47 | &HFS_I(mapping->host)->phys_size); | 47 | &HFS_I(mapping->host)->phys_size); |
48 | if (unlikely(ret)) { | 48 | if (unlikely(ret)) { |
49 | loff_t isize = mapping->host->i_size; | 49 | loff_t isize = mapping->host->i_size; |
50 | if (pos + len > isize) | 50 | if (pos + len > isize) |
51 | vmtruncate(mapping->host, isize); | 51 | vmtruncate(mapping->host, isize); |
52 | } | 52 | } |
53 | 53 | ||
54 | return ret; | 54 | return ret; |
55 | } | 55 | } |
56 | 56 | ||
57 | static sector_t hfs_bmap(struct address_space *mapping, sector_t block) | 57 | static sector_t hfs_bmap(struct address_space *mapping, sector_t block) |
58 | { | 58 | { |
59 | return generic_block_bmap(mapping, block, hfs_get_block); | 59 | return generic_block_bmap(mapping, block, hfs_get_block); |
60 | } | 60 | } |
61 | 61 | ||
62 | static int hfs_releasepage(struct page *page, gfp_t mask) | 62 | static int hfs_releasepage(struct page *page, gfp_t mask) |
63 | { | 63 | { |
64 | struct inode *inode = page->mapping->host; | 64 | struct inode *inode = page->mapping->host; |
65 | struct super_block *sb = inode->i_sb; | 65 | struct super_block *sb = inode->i_sb; |
66 | struct hfs_btree *tree; | 66 | struct hfs_btree *tree; |
67 | struct hfs_bnode *node; | 67 | struct hfs_bnode *node; |
68 | u32 nidx; | 68 | u32 nidx; |
69 | int i, res = 1; | 69 | int i, res = 1; |
70 | 70 | ||
71 | switch (inode->i_ino) { | 71 | switch (inode->i_ino) { |
72 | case HFS_EXT_CNID: | 72 | case HFS_EXT_CNID: |
73 | tree = HFS_SB(sb)->ext_tree; | 73 | tree = HFS_SB(sb)->ext_tree; |
74 | break; | 74 | break; |
75 | case HFS_CAT_CNID: | 75 | case HFS_CAT_CNID: |
76 | tree = HFS_SB(sb)->cat_tree; | 76 | tree = HFS_SB(sb)->cat_tree; |
77 | break; | 77 | break; |
78 | default: | 78 | default: |
79 | BUG(); | 79 | BUG(); |
80 | return 0; | 80 | return 0; |
81 | } | 81 | } |
82 | 82 | ||
83 | if (!tree) | 83 | if (!tree) |
84 | return 0; | 84 | return 0; |
85 | 85 | ||
86 | if (tree->node_size >= PAGE_CACHE_SIZE) { | 86 | if (tree->node_size >= PAGE_CACHE_SIZE) { |
87 | nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); | 87 | nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); |
88 | spin_lock(&tree->hash_lock); | 88 | spin_lock(&tree->hash_lock); |
89 | node = hfs_bnode_findhash(tree, nidx); | 89 | node = hfs_bnode_findhash(tree, nidx); |
90 | if (!node) | 90 | if (!node) |
91 | ; | 91 | ; |
92 | else if (atomic_read(&node->refcnt)) | 92 | else if (atomic_read(&node->refcnt)) |
93 | res = 0; | 93 | res = 0; |
94 | if (res && node) { | 94 | if (res && node) { |
95 | hfs_bnode_unhash(node); | 95 | hfs_bnode_unhash(node); |
96 | hfs_bnode_free(node); | 96 | hfs_bnode_free(node); |
97 | } | 97 | } |
98 | spin_unlock(&tree->hash_lock); | 98 | spin_unlock(&tree->hash_lock); |
99 | } else { | 99 | } else { |
100 | nidx = page->index << (PAGE_CACHE_SHIFT - tree->node_size_shift); | 100 | nidx = page->index << (PAGE_CACHE_SHIFT - tree->node_size_shift); |
101 | i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift); | 101 | i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift); |
102 | spin_lock(&tree->hash_lock); | 102 | spin_lock(&tree->hash_lock); |
103 | do { | 103 | do { |
104 | node = hfs_bnode_findhash(tree, nidx++); | 104 | node = hfs_bnode_findhash(tree, nidx++); |
105 | if (!node) | 105 | if (!node) |
106 | continue; | 106 | continue; |
107 | if (atomic_read(&node->refcnt)) { | 107 | if (atomic_read(&node->refcnt)) { |
108 | res = 0; | 108 | res = 0; |
109 | break; | 109 | break; |
110 | } | 110 | } |
111 | hfs_bnode_unhash(node); | 111 | hfs_bnode_unhash(node); |
112 | hfs_bnode_free(node); | 112 | hfs_bnode_free(node); |
113 | } while (--i && nidx < tree->node_count); | 113 | } while (--i && nidx < tree->node_count); |
114 | spin_unlock(&tree->hash_lock); | 114 | spin_unlock(&tree->hash_lock); |
115 | } | 115 | } |
116 | return res ? try_to_free_buffers(page) : 0; | 116 | return res ? try_to_free_buffers(page) : 0; |
117 | } | 117 | } |
118 | 118 | ||
119 | static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, | 119 | static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, |
120 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 120 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) |
121 | { | 121 | { |
122 | struct file *file = iocb->ki_filp; | 122 | struct file *file = iocb->ki_filp; |
123 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; | 123 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; |
124 | ssize_t ret; | 124 | ssize_t ret; |
125 | 125 | ||
126 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 126 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
127 | offset, nr_segs, hfs_get_block, NULL); | 127 | offset, nr_segs, hfs_get_block, NULL); |
128 | 128 | ||
129 | /* | 129 | /* |
130 | * In case of error extending write may have instantiated a few | 130 | * In case of error extending write may have instantiated a few |
131 | * blocks outside i_size. Trim these off again. | 131 | * blocks outside i_size. Trim these off again. |
132 | */ | 132 | */ |
133 | if (unlikely((rw & WRITE) && ret < 0)) { | 133 | if (unlikely((rw & WRITE) && ret < 0)) { |
134 | loff_t isize = i_size_read(inode); | 134 | loff_t isize = i_size_read(inode); |
135 | loff_t end = offset + iov_length(iov, nr_segs); | 135 | loff_t end = offset + iov_length(iov, nr_segs); |
136 | 136 | ||
137 | if (end > isize) | 137 | if (end > isize) |
138 | vmtruncate(inode, isize); | 138 | vmtruncate(inode, isize); |
139 | } | 139 | } |
140 | 140 | ||
141 | return ret; | 141 | return ret; |
142 | } | 142 | } |
143 | 143 | ||
144 | static int hfs_writepages(struct address_space *mapping, | 144 | static int hfs_writepages(struct address_space *mapping, |
145 | struct writeback_control *wbc) | 145 | struct writeback_control *wbc) |
146 | { | 146 | { |
147 | return mpage_writepages(mapping, wbc, hfs_get_block); | 147 | return mpage_writepages(mapping, wbc, hfs_get_block); |
148 | } | 148 | } |
149 | 149 | ||
150 | const struct address_space_operations hfs_btree_aops = { | 150 | const struct address_space_operations hfs_btree_aops = { |
151 | .readpage = hfs_readpage, | 151 | .readpage = hfs_readpage, |
152 | .writepage = hfs_writepage, | 152 | .writepage = hfs_writepage, |
153 | .write_begin = hfs_write_begin, | 153 | .write_begin = hfs_write_begin, |
154 | .write_end = generic_write_end, | 154 | .write_end = generic_write_end, |
155 | .bmap = hfs_bmap, | 155 | .bmap = hfs_bmap, |
156 | .releasepage = hfs_releasepage, | 156 | .releasepage = hfs_releasepage, |
157 | }; | 157 | }; |
158 | 158 | ||
159 | const struct address_space_operations hfs_aops = { | 159 | const struct address_space_operations hfs_aops = { |
160 | .readpage = hfs_readpage, | 160 | .readpage = hfs_readpage, |
161 | .writepage = hfs_writepage, | 161 | .writepage = hfs_writepage, |
162 | .write_begin = hfs_write_begin, | 162 | .write_begin = hfs_write_begin, |
163 | .write_end = generic_write_end, | 163 | .write_end = generic_write_end, |
164 | .bmap = hfs_bmap, | 164 | .bmap = hfs_bmap, |
165 | .direct_IO = hfs_direct_IO, | 165 | .direct_IO = hfs_direct_IO, |
166 | .writepages = hfs_writepages, | 166 | .writepages = hfs_writepages, |
167 | }; | 167 | }; |
168 | 168 | ||
169 | /* | 169 | /* |
170 | * hfs_new_inode | 170 | * hfs_new_inode |
171 | */ | 171 | */ |
172 | struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) | 172 | struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) |
173 | { | 173 | { |
174 | struct super_block *sb = dir->i_sb; | 174 | struct super_block *sb = dir->i_sb; |
175 | struct inode *inode = new_inode(sb); | 175 | struct inode *inode = new_inode(sb); |
176 | if (!inode) | 176 | if (!inode) |
177 | return NULL; | 177 | return NULL; |
178 | 178 | ||
179 | mutex_init(&HFS_I(inode)->extents_lock); | 179 | mutex_init(&HFS_I(inode)->extents_lock); |
180 | INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); | 180 | INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); |
181 | hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); | 181 | hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); |
182 | inode->i_ino = HFS_SB(sb)->next_id++; | 182 | inode->i_ino = HFS_SB(sb)->next_id++; |
183 | inode->i_mode = mode; | 183 | inode->i_mode = mode; |
184 | inode->i_uid = current_fsuid(); | 184 | inode->i_uid = current_fsuid(); |
185 | inode->i_gid = current_fsgid(); | 185 | inode->i_gid = current_fsgid(); |
186 | inode->i_nlink = 1; | 186 | inode->i_nlink = 1; |
187 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 187 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
188 | HFS_I(inode)->flags = 0; | 188 | HFS_I(inode)->flags = 0; |
189 | HFS_I(inode)->rsrc_inode = NULL; | 189 | HFS_I(inode)->rsrc_inode = NULL; |
190 | HFS_I(inode)->fs_blocks = 0; | 190 | HFS_I(inode)->fs_blocks = 0; |
191 | if (S_ISDIR(mode)) { | 191 | if (S_ISDIR(mode)) { |
192 | inode->i_size = 2; | 192 | inode->i_size = 2; |
193 | HFS_SB(sb)->folder_count++; | 193 | HFS_SB(sb)->folder_count++; |
194 | if (dir->i_ino == HFS_ROOT_CNID) | 194 | if (dir->i_ino == HFS_ROOT_CNID) |
195 | HFS_SB(sb)->root_dirs++; | 195 | HFS_SB(sb)->root_dirs++; |
196 | inode->i_op = &hfs_dir_inode_operations; | 196 | inode->i_op = &hfs_dir_inode_operations; |
197 | inode->i_fop = &hfs_dir_operations; | 197 | inode->i_fop = &hfs_dir_operations; |
198 | inode->i_mode |= S_IRWXUGO; | 198 | inode->i_mode |= S_IRWXUGO; |
199 | inode->i_mode &= ~HFS_SB(inode->i_sb)->s_dir_umask; | 199 | inode->i_mode &= ~HFS_SB(inode->i_sb)->s_dir_umask; |
200 | } else if (S_ISREG(mode)) { | 200 | } else if (S_ISREG(mode)) { |
201 | HFS_I(inode)->clump_blocks = HFS_SB(sb)->clumpablks; | 201 | HFS_I(inode)->clump_blocks = HFS_SB(sb)->clumpablks; |
202 | HFS_SB(sb)->file_count++; | 202 | HFS_SB(sb)->file_count++; |
203 | if (dir->i_ino == HFS_ROOT_CNID) | 203 | if (dir->i_ino == HFS_ROOT_CNID) |
204 | HFS_SB(sb)->root_files++; | 204 | HFS_SB(sb)->root_files++; |
205 | inode->i_op = &hfs_file_inode_operations; | 205 | inode->i_op = &hfs_file_inode_operations; |
206 | inode->i_fop = &hfs_file_operations; | 206 | inode->i_fop = &hfs_file_operations; |
207 | inode->i_mapping->a_ops = &hfs_aops; | 207 | inode->i_mapping->a_ops = &hfs_aops; |
208 | inode->i_mode |= S_IRUGO|S_IXUGO; | 208 | inode->i_mode |= S_IRUGO|S_IXUGO; |
209 | if (mode & S_IWUSR) | 209 | if (mode & S_IWUSR) |
210 | inode->i_mode |= S_IWUGO; | 210 | inode->i_mode |= S_IWUGO; |
211 | inode->i_mode &= ~HFS_SB(inode->i_sb)->s_file_umask; | 211 | inode->i_mode &= ~HFS_SB(inode->i_sb)->s_file_umask; |
212 | HFS_I(inode)->phys_size = 0; | 212 | HFS_I(inode)->phys_size = 0; |
213 | HFS_I(inode)->alloc_blocks = 0; | 213 | HFS_I(inode)->alloc_blocks = 0; |
214 | HFS_I(inode)->first_blocks = 0; | 214 | HFS_I(inode)->first_blocks = 0; |
215 | HFS_I(inode)->cached_start = 0; | 215 | HFS_I(inode)->cached_start = 0; |
216 | HFS_I(inode)->cached_blocks = 0; | 216 | HFS_I(inode)->cached_blocks = 0; |
217 | memset(HFS_I(inode)->first_extents, 0, sizeof(hfs_extent_rec)); | 217 | memset(HFS_I(inode)->first_extents, 0, sizeof(hfs_extent_rec)); |
218 | memset(HFS_I(inode)->cached_extents, 0, sizeof(hfs_extent_rec)); | 218 | memset(HFS_I(inode)->cached_extents, 0, sizeof(hfs_extent_rec)); |
219 | } | 219 | } |
220 | insert_inode_hash(inode); | 220 | insert_inode_hash(inode); |
221 | mark_inode_dirty(inode); | 221 | mark_inode_dirty(inode); |
222 | set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); | 222 | set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); |
223 | sb->s_dirt = 1; | 223 | sb->s_dirt = 1; |
224 | 224 | ||
225 | return inode; | 225 | return inode; |
226 | } | 226 | } |
227 | 227 | ||
228 | void hfs_delete_inode(struct inode *inode) | 228 | void hfs_delete_inode(struct inode *inode) |
229 | { | 229 | { |
230 | struct super_block *sb = inode->i_sb; | 230 | struct super_block *sb = inode->i_sb; |
231 | 231 | ||
232 | dprint(DBG_INODE, "delete_inode: %lu\n", inode->i_ino); | 232 | dprint(DBG_INODE, "delete_inode: %lu\n", inode->i_ino); |
233 | if (S_ISDIR(inode->i_mode)) { | 233 | if (S_ISDIR(inode->i_mode)) { |
234 | HFS_SB(sb)->folder_count--; | 234 | HFS_SB(sb)->folder_count--; |
235 | if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) | 235 | if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) |
236 | HFS_SB(sb)->root_dirs--; | 236 | HFS_SB(sb)->root_dirs--; |
237 | set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); | 237 | set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); |
238 | sb->s_dirt = 1; | 238 | sb->s_dirt = 1; |
239 | return; | 239 | return; |
240 | } | 240 | } |
241 | HFS_SB(sb)->file_count--; | 241 | HFS_SB(sb)->file_count--; |
242 | if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) | 242 | if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) |
243 | HFS_SB(sb)->root_files--; | 243 | HFS_SB(sb)->root_files--; |
244 | if (S_ISREG(inode->i_mode)) { | 244 | if (S_ISREG(inode->i_mode)) { |
245 | if (!inode->i_nlink) { | 245 | if (!inode->i_nlink) { |
246 | inode->i_size = 0; | 246 | inode->i_size = 0; |
247 | hfs_file_truncate(inode); | 247 | hfs_file_truncate(inode); |
248 | } | 248 | } |
249 | } | 249 | } |
250 | set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); | 250 | set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); |
251 | sb->s_dirt = 1; | 251 | sb->s_dirt = 1; |
252 | } | 252 | } |
253 | 253 | ||
254 | void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, | 254 | void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, |
255 | __be32 __log_size, __be32 phys_size, u32 clump_size) | 255 | __be32 __log_size, __be32 phys_size, u32 clump_size) |
256 | { | 256 | { |
257 | struct super_block *sb = inode->i_sb; | 257 | struct super_block *sb = inode->i_sb; |
258 | u32 log_size = be32_to_cpu(__log_size); | 258 | u32 log_size = be32_to_cpu(__log_size); |
259 | u16 count; | 259 | u16 count; |
260 | int i; | 260 | int i; |
261 | 261 | ||
262 | memcpy(HFS_I(inode)->first_extents, ext, sizeof(hfs_extent_rec)); | 262 | memcpy(HFS_I(inode)->first_extents, ext, sizeof(hfs_extent_rec)); |
263 | for (count = 0, i = 0; i < 3; i++) | 263 | for (count = 0, i = 0; i < 3; i++) |
264 | count += be16_to_cpu(ext[i].count); | 264 | count += be16_to_cpu(ext[i].count); |
265 | HFS_I(inode)->first_blocks = count; | 265 | HFS_I(inode)->first_blocks = count; |
266 | 266 | ||
267 | inode->i_size = HFS_I(inode)->phys_size = log_size; | 267 | inode->i_size = HFS_I(inode)->phys_size = log_size; |
268 | HFS_I(inode)->fs_blocks = (log_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 268 | HFS_I(inode)->fs_blocks = (log_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
269 | inode_set_bytes(inode, HFS_I(inode)->fs_blocks << sb->s_blocksize_bits); | 269 | inode_set_bytes(inode, HFS_I(inode)->fs_blocks << sb->s_blocksize_bits); |
270 | HFS_I(inode)->alloc_blocks = be32_to_cpu(phys_size) / | 270 | HFS_I(inode)->alloc_blocks = be32_to_cpu(phys_size) / |
271 | HFS_SB(sb)->alloc_blksz; | 271 | HFS_SB(sb)->alloc_blksz; |
272 | HFS_I(inode)->clump_blocks = clump_size / HFS_SB(sb)->alloc_blksz; | 272 | HFS_I(inode)->clump_blocks = clump_size / HFS_SB(sb)->alloc_blksz; |
273 | if (!HFS_I(inode)->clump_blocks) | 273 | if (!HFS_I(inode)->clump_blocks) |
274 | HFS_I(inode)->clump_blocks = HFS_SB(sb)->clumpablks; | 274 | HFS_I(inode)->clump_blocks = HFS_SB(sb)->clumpablks; |
275 | } | 275 | } |
276 | 276 | ||
277 | struct hfs_iget_data { | 277 | struct hfs_iget_data { |
278 | struct hfs_cat_key *key; | 278 | struct hfs_cat_key *key; |
279 | hfs_cat_rec *rec; | 279 | hfs_cat_rec *rec; |
280 | }; | 280 | }; |
281 | 281 | ||
282 | static int hfs_test_inode(struct inode *inode, void *data) | 282 | static int hfs_test_inode(struct inode *inode, void *data) |
283 | { | 283 | { |
284 | struct hfs_iget_data *idata = data; | 284 | struct hfs_iget_data *idata = data; |
285 | hfs_cat_rec *rec; | 285 | hfs_cat_rec *rec; |
286 | 286 | ||
287 | rec = idata->rec; | 287 | rec = idata->rec; |
288 | switch (rec->type) { | 288 | switch (rec->type) { |
289 | case HFS_CDR_DIR: | 289 | case HFS_CDR_DIR: |
290 | return inode->i_ino == be32_to_cpu(rec->dir.DirID); | 290 | return inode->i_ino == be32_to_cpu(rec->dir.DirID); |
291 | case HFS_CDR_FIL: | 291 | case HFS_CDR_FIL: |
292 | return inode->i_ino == be32_to_cpu(rec->file.FlNum); | 292 | return inode->i_ino == be32_to_cpu(rec->file.FlNum); |
293 | default: | 293 | default: |
294 | BUG(); | 294 | BUG(); |
295 | return 1; | 295 | return 1; |
296 | } | 296 | } |
297 | } | 297 | } |
298 | 298 | ||
299 | /* | 299 | /* |
300 | * hfs_read_inode | 300 | * hfs_read_inode |
301 | */ | 301 | */ |
302 | static int hfs_read_inode(struct inode *inode, void *data) | 302 | static int hfs_read_inode(struct inode *inode, void *data) |
303 | { | 303 | { |
304 | struct hfs_iget_data *idata = data; | 304 | struct hfs_iget_data *idata = data; |
305 | struct hfs_sb_info *hsb = HFS_SB(inode->i_sb); | 305 | struct hfs_sb_info *hsb = HFS_SB(inode->i_sb); |
306 | hfs_cat_rec *rec; | 306 | hfs_cat_rec *rec; |
307 | 307 | ||
308 | HFS_I(inode)->flags = 0; | 308 | HFS_I(inode)->flags = 0; |
309 | HFS_I(inode)->rsrc_inode = NULL; | 309 | HFS_I(inode)->rsrc_inode = NULL; |
310 | mutex_init(&HFS_I(inode)->extents_lock); | 310 | mutex_init(&HFS_I(inode)->extents_lock); |
311 | INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); | 311 | INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); |
312 | 312 | ||
313 | /* Initialize the inode */ | 313 | /* Initialize the inode */ |
314 | inode->i_uid = hsb->s_uid; | 314 | inode->i_uid = hsb->s_uid; |
315 | inode->i_gid = hsb->s_gid; | 315 | inode->i_gid = hsb->s_gid; |
316 | inode->i_nlink = 1; | 316 | inode->i_nlink = 1; |
317 | 317 | ||
318 | if (idata->key) | 318 | if (idata->key) |
319 | HFS_I(inode)->cat_key = *idata->key; | 319 | HFS_I(inode)->cat_key = *idata->key; |
320 | else | 320 | else |
321 | HFS_I(inode)->flags |= HFS_FLG_RSRC; | 321 | HFS_I(inode)->flags |= HFS_FLG_RSRC; |
322 | HFS_I(inode)->tz_secondswest = sys_tz.tz_minuteswest * 60; | 322 | HFS_I(inode)->tz_secondswest = sys_tz.tz_minuteswest * 60; |
323 | 323 | ||
324 | rec = idata->rec; | 324 | rec = idata->rec; |
325 | switch (rec->type) { | 325 | switch (rec->type) { |
326 | case HFS_CDR_FIL: | 326 | case HFS_CDR_FIL: |
327 | if (!HFS_IS_RSRC(inode)) { | 327 | if (!HFS_IS_RSRC(inode)) { |
328 | hfs_inode_read_fork(inode, rec->file.ExtRec, rec->file.LgLen, | 328 | hfs_inode_read_fork(inode, rec->file.ExtRec, rec->file.LgLen, |
329 | rec->file.PyLen, be16_to_cpu(rec->file.ClpSize)); | 329 | rec->file.PyLen, be16_to_cpu(rec->file.ClpSize)); |
330 | } else { | 330 | } else { |
331 | hfs_inode_read_fork(inode, rec->file.RExtRec, rec->file.RLgLen, | 331 | hfs_inode_read_fork(inode, rec->file.RExtRec, rec->file.RLgLen, |
332 | rec->file.RPyLen, be16_to_cpu(rec->file.ClpSize)); | 332 | rec->file.RPyLen, be16_to_cpu(rec->file.ClpSize)); |
333 | } | 333 | } |
334 | 334 | ||
335 | inode->i_ino = be32_to_cpu(rec->file.FlNum); | 335 | inode->i_ino = be32_to_cpu(rec->file.FlNum); |
336 | inode->i_mode = S_IRUGO | S_IXUGO; | 336 | inode->i_mode = S_IRUGO | S_IXUGO; |
337 | if (!(rec->file.Flags & HFS_FIL_LOCK)) | 337 | if (!(rec->file.Flags & HFS_FIL_LOCK)) |
338 | inode->i_mode |= S_IWUGO; | 338 | inode->i_mode |= S_IWUGO; |
339 | inode->i_mode &= ~hsb->s_file_umask; | 339 | inode->i_mode &= ~hsb->s_file_umask; |
340 | inode->i_mode |= S_IFREG; | 340 | inode->i_mode |= S_IFREG; |
341 | inode->i_ctime = inode->i_atime = inode->i_mtime = | 341 | inode->i_ctime = inode->i_atime = inode->i_mtime = |
342 | hfs_m_to_utime(rec->file.MdDat); | 342 | hfs_m_to_utime(rec->file.MdDat); |
343 | inode->i_op = &hfs_file_inode_operations; | 343 | inode->i_op = &hfs_file_inode_operations; |
344 | inode->i_fop = &hfs_file_operations; | 344 | inode->i_fop = &hfs_file_operations; |
345 | inode->i_mapping->a_ops = &hfs_aops; | 345 | inode->i_mapping->a_ops = &hfs_aops; |
346 | break; | 346 | break; |
347 | case HFS_CDR_DIR: | 347 | case HFS_CDR_DIR: |
348 | inode->i_ino = be32_to_cpu(rec->dir.DirID); | 348 | inode->i_ino = be32_to_cpu(rec->dir.DirID); |
349 | inode->i_size = be16_to_cpu(rec->dir.Val) + 2; | 349 | inode->i_size = be16_to_cpu(rec->dir.Val) + 2; |
350 | HFS_I(inode)->fs_blocks = 0; | 350 | HFS_I(inode)->fs_blocks = 0; |
351 | inode->i_mode = S_IFDIR | (S_IRWXUGO & ~hsb->s_dir_umask); | 351 | inode->i_mode = S_IFDIR | (S_IRWXUGO & ~hsb->s_dir_umask); |
352 | inode->i_ctime = inode->i_atime = inode->i_mtime = | 352 | inode->i_ctime = inode->i_atime = inode->i_mtime = |
353 | hfs_m_to_utime(rec->dir.MdDat); | 353 | hfs_m_to_utime(rec->dir.MdDat); |
354 | inode->i_op = &hfs_dir_inode_operations; | 354 | inode->i_op = &hfs_dir_inode_operations; |
355 | inode->i_fop = &hfs_dir_operations; | 355 | inode->i_fop = &hfs_dir_operations; |
356 | break; | 356 | break; |
357 | default: | 357 | default: |
358 | make_bad_inode(inode); | 358 | make_bad_inode(inode); |
359 | } | 359 | } |
360 | return 0; | 360 | return 0; |
361 | } | 361 | } |
362 | 362 | ||
363 | /* | 363 | /* |
364 | * __hfs_iget() | 364 | * __hfs_iget() |
365 | * | 365 | * |
366 | * Given the MDB for a HFS filesystem, a 'key' and an 'entry' in | 366 | * Given the MDB for a HFS filesystem, a 'key' and an 'entry' in |
367 | * the catalog B-tree and the 'type' of the desired file return the | 367 | * the catalog B-tree and the 'type' of the desired file return the |
368 | * inode for that file/directory or NULL. Note that 'type' indicates | 368 | * inode for that file/directory or NULL. Note that 'type' indicates |
369 | * whether we want the actual file or directory, or the corresponding | 369 | * whether we want the actual file or directory, or the corresponding |
370 | * metadata (AppleDouble header file or CAP metadata file). | 370 | * metadata (AppleDouble header file or CAP metadata file). |
371 | */ | 371 | */ |
372 | struct inode *hfs_iget(struct super_block *sb, struct hfs_cat_key *key, hfs_cat_rec *rec) | 372 | struct inode *hfs_iget(struct super_block *sb, struct hfs_cat_key *key, hfs_cat_rec *rec) |
373 | { | 373 | { |
374 | struct hfs_iget_data data = { key, rec }; | 374 | struct hfs_iget_data data = { key, rec }; |
375 | struct inode *inode; | 375 | struct inode *inode; |
376 | u32 cnid; | 376 | u32 cnid; |
377 | 377 | ||
378 | switch (rec->type) { | 378 | switch (rec->type) { |
379 | case HFS_CDR_DIR: | 379 | case HFS_CDR_DIR: |
380 | cnid = be32_to_cpu(rec->dir.DirID); | 380 | cnid = be32_to_cpu(rec->dir.DirID); |
381 | break; | 381 | break; |
382 | case HFS_CDR_FIL: | 382 | case HFS_CDR_FIL: |
383 | cnid = be32_to_cpu(rec->file.FlNum); | 383 | cnid = be32_to_cpu(rec->file.FlNum); |
384 | break; | 384 | break; |
385 | default: | 385 | default: |
386 | return NULL; | 386 | return NULL; |
387 | } | 387 | } |
388 | inode = iget5_locked(sb, cnid, hfs_test_inode, hfs_read_inode, &data); | 388 | inode = iget5_locked(sb, cnid, hfs_test_inode, hfs_read_inode, &data); |
389 | if (inode && (inode->i_state & I_NEW)) | 389 | if (inode && (inode->i_state & I_NEW)) |
390 | unlock_new_inode(inode); | 390 | unlock_new_inode(inode); |
391 | return inode; | 391 | return inode; |
392 | } | 392 | } |
393 | 393 | ||
394 | void hfs_inode_write_fork(struct inode *inode, struct hfs_extent *ext, | 394 | void hfs_inode_write_fork(struct inode *inode, struct hfs_extent *ext, |
395 | __be32 *log_size, __be32 *phys_size) | 395 | __be32 *log_size, __be32 *phys_size) |
396 | { | 396 | { |
397 | memcpy(ext, HFS_I(inode)->first_extents, sizeof(hfs_extent_rec)); | 397 | memcpy(ext, HFS_I(inode)->first_extents, sizeof(hfs_extent_rec)); |
398 | 398 | ||
399 | if (log_size) | 399 | if (log_size) |
400 | *log_size = cpu_to_be32(inode->i_size); | 400 | *log_size = cpu_to_be32(inode->i_size); |
401 | if (phys_size) | 401 | if (phys_size) |
402 | *phys_size = cpu_to_be32(HFS_I(inode)->alloc_blocks * | 402 | *phys_size = cpu_to_be32(HFS_I(inode)->alloc_blocks * |
403 | HFS_SB(inode->i_sb)->alloc_blksz); | 403 | HFS_SB(inode->i_sb)->alloc_blksz); |
404 | } | 404 | } |
405 | 405 | ||
406 | int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 406 | int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
407 | { | 407 | { |
408 | struct inode *main_inode = inode; | 408 | struct inode *main_inode = inode; |
409 | struct hfs_find_data fd; | 409 | struct hfs_find_data fd; |
410 | hfs_cat_rec rec; | 410 | hfs_cat_rec rec; |
411 | 411 | ||
412 | dprint(DBG_INODE, "hfs_write_inode: %lu\n", inode->i_ino); | 412 | dprint(DBG_INODE, "hfs_write_inode: %lu\n", inode->i_ino); |
413 | hfs_ext_write_extent(inode); | 413 | hfs_ext_write_extent(inode); |
414 | 414 | ||
415 | if (inode->i_ino < HFS_FIRSTUSER_CNID) { | 415 | if (inode->i_ino < HFS_FIRSTUSER_CNID) { |
416 | switch (inode->i_ino) { | 416 | switch (inode->i_ino) { |
417 | case HFS_ROOT_CNID: | 417 | case HFS_ROOT_CNID: |
418 | break; | 418 | break; |
419 | case HFS_EXT_CNID: | 419 | case HFS_EXT_CNID: |
420 | hfs_btree_write(HFS_SB(inode->i_sb)->ext_tree); | 420 | hfs_btree_write(HFS_SB(inode->i_sb)->ext_tree); |
421 | return 0; | 421 | return 0; |
422 | case HFS_CAT_CNID: | 422 | case HFS_CAT_CNID: |
423 | hfs_btree_write(HFS_SB(inode->i_sb)->cat_tree); | 423 | hfs_btree_write(HFS_SB(inode->i_sb)->cat_tree); |
424 | return 0; | 424 | return 0; |
425 | default: | 425 | default: |
426 | BUG(); | 426 | BUG(); |
427 | return -EIO; | 427 | return -EIO; |
428 | } | 428 | } |
429 | } | 429 | } |
430 | 430 | ||
431 | if (HFS_IS_RSRC(inode)) | 431 | if (HFS_IS_RSRC(inode)) |
432 | main_inode = HFS_I(inode)->rsrc_inode; | 432 | main_inode = HFS_I(inode)->rsrc_inode; |
433 | 433 | ||
434 | if (!main_inode->i_nlink) | 434 | if (!main_inode->i_nlink) |
435 | return 0; | 435 | return 0; |
436 | 436 | ||
437 | if (hfs_find_init(HFS_SB(main_inode->i_sb)->cat_tree, &fd)) | 437 | if (hfs_find_init(HFS_SB(main_inode->i_sb)->cat_tree, &fd)) |
438 | /* panic? */ | 438 | /* panic? */ |
439 | return -EIO; | 439 | return -EIO; |
440 | 440 | ||
441 | fd.search_key->cat = HFS_I(main_inode)->cat_key; | 441 | fd.search_key->cat = HFS_I(main_inode)->cat_key; |
442 | if (hfs_brec_find(&fd)) | 442 | if (hfs_brec_find(&fd)) |
443 | /* panic? */ | 443 | /* panic? */ |
444 | goto out; | 444 | goto out; |
445 | 445 | ||
446 | if (S_ISDIR(main_inode->i_mode)) { | 446 | if (S_ISDIR(main_inode->i_mode)) { |
447 | if (fd.entrylength < sizeof(struct hfs_cat_dir)) | 447 | if (fd.entrylength < sizeof(struct hfs_cat_dir)) |
448 | /* panic? */; | 448 | /* panic? */; |
449 | hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, | 449 | hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, |
450 | sizeof(struct hfs_cat_dir)); | 450 | sizeof(struct hfs_cat_dir)); |
451 | if (rec.type != HFS_CDR_DIR || | 451 | if (rec.type != HFS_CDR_DIR || |
452 | be32_to_cpu(rec.dir.DirID) != inode->i_ino) { | 452 | be32_to_cpu(rec.dir.DirID) != inode->i_ino) { |
453 | } | 453 | } |
454 | 454 | ||
455 | rec.dir.MdDat = hfs_u_to_mtime(inode->i_mtime); | 455 | rec.dir.MdDat = hfs_u_to_mtime(inode->i_mtime); |
456 | rec.dir.Val = cpu_to_be16(inode->i_size - 2); | 456 | rec.dir.Val = cpu_to_be16(inode->i_size - 2); |
457 | 457 | ||
458 | hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, | 458 | hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, |
459 | sizeof(struct hfs_cat_dir)); | 459 | sizeof(struct hfs_cat_dir)); |
460 | } else if (HFS_IS_RSRC(inode)) { | 460 | } else if (HFS_IS_RSRC(inode)) { |
461 | hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, | 461 | hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, |
462 | sizeof(struct hfs_cat_file)); | 462 | sizeof(struct hfs_cat_file)); |
463 | hfs_inode_write_fork(inode, rec.file.RExtRec, | 463 | hfs_inode_write_fork(inode, rec.file.RExtRec, |
464 | &rec.file.RLgLen, &rec.file.RPyLen); | 464 | &rec.file.RLgLen, &rec.file.RPyLen); |
465 | hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, | 465 | hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, |
466 | sizeof(struct hfs_cat_file)); | 466 | sizeof(struct hfs_cat_file)); |
467 | } else { | 467 | } else { |
468 | if (fd.entrylength < sizeof(struct hfs_cat_file)) | 468 | if (fd.entrylength < sizeof(struct hfs_cat_file)) |
469 | /* panic? */; | 469 | /* panic? */; |
470 | hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, | 470 | hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, |
471 | sizeof(struct hfs_cat_file)); | 471 | sizeof(struct hfs_cat_file)); |
472 | if (rec.type != HFS_CDR_FIL || | 472 | if (rec.type != HFS_CDR_FIL || |
473 | be32_to_cpu(rec.file.FlNum) != inode->i_ino) { | 473 | be32_to_cpu(rec.file.FlNum) != inode->i_ino) { |
474 | } | 474 | } |
475 | 475 | ||
476 | if (inode->i_mode & S_IWUSR) | 476 | if (inode->i_mode & S_IWUSR) |
477 | rec.file.Flags &= ~HFS_FIL_LOCK; | 477 | rec.file.Flags &= ~HFS_FIL_LOCK; |
478 | else | 478 | else |
479 | rec.file.Flags |= HFS_FIL_LOCK; | 479 | rec.file.Flags |= HFS_FIL_LOCK; |
480 | hfs_inode_write_fork(inode, rec.file.ExtRec, &rec.file.LgLen, &rec.file.PyLen); | 480 | hfs_inode_write_fork(inode, rec.file.ExtRec, &rec.file.LgLen, &rec.file.PyLen); |
481 | rec.file.MdDat = hfs_u_to_mtime(inode->i_mtime); | 481 | rec.file.MdDat = hfs_u_to_mtime(inode->i_mtime); |
482 | 482 | ||
483 | hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, | 483 | hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, |
484 | sizeof(struct hfs_cat_file)); | 484 | sizeof(struct hfs_cat_file)); |
485 | } | 485 | } |
486 | out: | 486 | out: |
487 | hfs_find_exit(&fd); | 487 | hfs_find_exit(&fd); |
488 | return 0; | 488 | return 0; |
489 | } | 489 | } |
490 | 490 | ||
491 | static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, | 491 | static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, |
492 | struct nameidata *nd) | 492 | struct nameidata *nd) |
493 | { | 493 | { |
494 | struct inode *inode = NULL; | 494 | struct inode *inode = NULL; |
495 | hfs_cat_rec rec; | 495 | hfs_cat_rec rec; |
496 | struct hfs_find_data fd; | 496 | struct hfs_find_data fd; |
497 | int res; | 497 | int res; |
498 | 498 | ||
499 | if (HFS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) | 499 | if (HFS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) |
500 | goto out; | 500 | goto out; |
501 | 501 | ||
502 | inode = HFS_I(dir)->rsrc_inode; | 502 | inode = HFS_I(dir)->rsrc_inode; |
503 | if (inode) | 503 | if (inode) |
504 | goto out; | 504 | goto out; |
505 | 505 | ||
506 | inode = new_inode(dir->i_sb); | 506 | inode = new_inode(dir->i_sb); |
507 | if (!inode) | 507 | if (!inode) |
508 | return ERR_PTR(-ENOMEM); | 508 | return ERR_PTR(-ENOMEM); |
509 | 509 | ||
510 | hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); | 510 | hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); |
511 | fd.search_key->cat = HFS_I(dir)->cat_key; | 511 | fd.search_key->cat = HFS_I(dir)->cat_key; |
512 | res = hfs_brec_read(&fd, &rec, sizeof(rec)); | 512 | res = hfs_brec_read(&fd, &rec, sizeof(rec)); |
513 | if (!res) { | 513 | if (!res) { |
514 | struct hfs_iget_data idata = { NULL, &rec }; | 514 | struct hfs_iget_data idata = { NULL, &rec }; |
515 | hfs_read_inode(inode, &idata); | 515 | hfs_read_inode(inode, &idata); |
516 | } | 516 | } |
517 | hfs_find_exit(&fd); | 517 | hfs_find_exit(&fd); |
518 | if (res) { | 518 | if (res) { |
519 | iput(inode); | 519 | iput(inode); |
520 | return ERR_PTR(res); | 520 | return ERR_PTR(res); |
521 | } | 521 | } |
522 | HFS_I(inode)->rsrc_inode = dir; | 522 | HFS_I(inode)->rsrc_inode = dir; |
523 | HFS_I(dir)->rsrc_inode = inode; | 523 | HFS_I(dir)->rsrc_inode = inode; |
524 | igrab(dir); | 524 | igrab(dir); |
525 | hlist_add_fake(&inode->i_hash); | 525 | hlist_add_fake(&inode->i_hash); |
526 | mark_inode_dirty(inode); | 526 | mark_inode_dirty(inode); |
527 | out: | 527 | out: |
528 | d_add(dentry, inode); | 528 | d_add(dentry, inode); |
529 | return NULL; | 529 | return NULL; |
530 | } | 530 | } |
531 | 531 | ||
532 | void hfs_evict_inode(struct inode *inode) | 532 | void hfs_evict_inode(struct inode *inode) |
533 | { | 533 | { |
534 | truncate_inode_pages(&inode->i_data, 0); | 534 | truncate_inode_pages(&inode->i_data, 0); |
535 | end_writeback(inode); | 535 | end_writeback(inode); |
536 | if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { | 536 | if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { |
537 | HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; | 537 | HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
538 | iput(HFS_I(inode)->rsrc_inode); | 538 | iput(HFS_I(inode)->rsrc_inode); |
539 | } | 539 | } |
540 | } | 540 | } |
541 | 541 | ||
542 | static int hfs_file_open(struct inode *inode, struct file *file) | 542 | static int hfs_file_open(struct inode *inode, struct file *file) |
543 | { | 543 | { |
544 | if (HFS_IS_RSRC(inode)) | 544 | if (HFS_IS_RSRC(inode)) |
545 | inode = HFS_I(inode)->rsrc_inode; | 545 | inode = HFS_I(inode)->rsrc_inode; |
546 | atomic_inc(&HFS_I(inode)->opencnt); | 546 | atomic_inc(&HFS_I(inode)->opencnt); |
547 | return 0; | 547 | return 0; |
548 | } | 548 | } |
549 | 549 | ||
550 | static int hfs_file_release(struct inode *inode, struct file *file) | 550 | static int hfs_file_release(struct inode *inode, struct file *file) |
551 | { | 551 | { |
552 | //struct super_block *sb = inode->i_sb; | 552 | //struct super_block *sb = inode->i_sb; |
553 | 553 | ||
554 | if (HFS_IS_RSRC(inode)) | 554 | if (HFS_IS_RSRC(inode)) |
555 | inode = HFS_I(inode)->rsrc_inode; | 555 | inode = HFS_I(inode)->rsrc_inode; |
556 | if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { | 556 | if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { |
557 | mutex_lock(&inode->i_mutex); | 557 | mutex_lock(&inode->i_mutex); |
558 | hfs_file_truncate(inode); | 558 | hfs_file_truncate(inode); |
559 | //if (inode->i_flags & S_DEAD) { | 559 | //if (inode->i_flags & S_DEAD) { |
560 | // hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); | 560 | // hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); |
561 | // hfs_delete_inode(inode); | 561 | // hfs_delete_inode(inode); |
562 | //} | 562 | //} |
563 | mutex_unlock(&inode->i_mutex); | 563 | mutex_unlock(&inode->i_mutex); |
564 | } | 564 | } |
565 | return 0; | 565 | return 0; |
566 | } | 566 | } |
567 | 567 | ||
568 | /* | 568 | /* |
569 | * hfs_notify_change() | 569 | * hfs_notify_change() |
570 | * | 570 | * |
571 | * Based very closely on fs/msdos/inode.c by Werner Almesberger | 571 | * Based very closely on fs/msdos/inode.c by Werner Almesberger |
572 | * | 572 | * |
573 | * This is the notify_change() field in the super_operations structure | 573 | * This is the notify_change() field in the super_operations structure |
574 | * for HFS file systems. The purpose is to take that changes made to | 574 | * for HFS file systems. The purpose is to take that changes made to |
575 | * an inode and apply then in a filesystem-dependent manner. In this | 575 | * an inode and apply then in a filesystem-dependent manner. In this |
576 | * case the process has a few of tasks to do: | 576 | * case the process has a few of tasks to do: |
577 | * 1) prevent changes to the i_uid and i_gid fields. | 577 | * 1) prevent changes to the i_uid and i_gid fields. |
578 | * 2) map file permissions to the closest allowable permissions | 578 | * 2) map file permissions to the closest allowable permissions |
579 | * 3) Since multiple Linux files can share the same on-disk inode under | 579 | * 3) Since multiple Linux files can share the same on-disk inode under |
580 | * HFS (for instance the data and resource forks of a file) a change | 580 | * HFS (for instance the data and resource forks of a file) a change |
581 | * to permissions must be applied to all other in-core inodes which | 581 | * to permissions must be applied to all other in-core inodes which |
582 | * correspond to the same HFS file. | 582 | * correspond to the same HFS file. |
583 | */ | 583 | */ |
584 | 584 | ||
585 | int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) | 585 | int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) |
586 | { | 586 | { |
587 | struct inode *inode = dentry->d_inode; | 587 | struct inode *inode = dentry->d_inode; |
588 | struct hfs_sb_info *hsb = HFS_SB(inode->i_sb); | 588 | struct hfs_sb_info *hsb = HFS_SB(inode->i_sb); |
589 | int error; | 589 | int error; |
590 | 590 | ||
591 | error = inode_change_ok(inode, attr); /* basic permission checks */ | 591 | error = inode_change_ok(inode, attr); /* basic permission checks */ |
592 | if (error) | 592 | if (error) |
593 | return error; | 593 | return error; |
594 | 594 | ||
595 | /* no uig/gid changes and limit which mode bits can be set */ | 595 | /* no uig/gid changes and limit which mode bits can be set */ |
596 | if (((attr->ia_valid & ATTR_UID) && | 596 | if (((attr->ia_valid & ATTR_UID) && |
597 | (attr->ia_uid != hsb->s_uid)) || | 597 | (attr->ia_uid != hsb->s_uid)) || |
598 | ((attr->ia_valid & ATTR_GID) && | 598 | ((attr->ia_valid & ATTR_GID) && |
599 | (attr->ia_gid != hsb->s_gid)) || | 599 | (attr->ia_gid != hsb->s_gid)) || |
600 | ((attr->ia_valid & ATTR_MODE) && | 600 | ((attr->ia_valid & ATTR_MODE) && |
601 | ((S_ISDIR(inode->i_mode) && | 601 | ((S_ISDIR(inode->i_mode) && |
602 | (attr->ia_mode != inode->i_mode)) || | 602 | (attr->ia_mode != inode->i_mode)) || |
603 | (attr->ia_mode & ~HFS_VALID_MODE_BITS)))) { | 603 | (attr->ia_mode & ~HFS_VALID_MODE_BITS)))) { |
604 | return hsb->s_quiet ? 0 : error; | 604 | return hsb->s_quiet ? 0 : error; |
605 | } | 605 | } |
606 | 606 | ||
607 | if (attr->ia_valid & ATTR_MODE) { | 607 | if (attr->ia_valid & ATTR_MODE) { |
608 | /* Only the 'w' bits can ever change and only all together. */ | 608 | /* Only the 'w' bits can ever change and only all together. */ |
609 | if (attr->ia_mode & S_IWUSR) | 609 | if (attr->ia_mode & S_IWUSR) |
610 | attr->ia_mode = inode->i_mode | S_IWUGO; | 610 | attr->ia_mode = inode->i_mode | S_IWUGO; |
611 | else | 611 | else |
612 | attr->ia_mode = inode->i_mode & ~S_IWUGO; | 612 | attr->ia_mode = inode->i_mode & ~S_IWUGO; |
613 | attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask; | 613 | attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask; |
614 | } | 614 | } |
615 | 615 | ||
616 | if ((attr->ia_valid & ATTR_SIZE) && | 616 | if ((attr->ia_valid & ATTR_SIZE) && |
617 | attr->ia_size != i_size_read(inode)) { | 617 | attr->ia_size != i_size_read(inode)) { |
618 | inode_dio_wait(inode); | ||
619 | |||
618 | error = vmtruncate(inode, attr->ia_size); | 620 | error = vmtruncate(inode, attr->ia_size); |
619 | if (error) | 621 | if (error) |
620 | return error; | 622 | return error; |
621 | } | 623 | } |
622 | 624 | ||
623 | setattr_copy(inode, attr); | 625 | setattr_copy(inode, attr); |
624 | mark_inode_dirty(inode); | 626 | mark_inode_dirty(inode); |
625 | return 0; | 627 | return 0; |
626 | } | 628 | } |
627 | 629 | ||
628 | static int hfs_file_fsync(struct file *filp, int datasync) | 630 | static int hfs_file_fsync(struct file *filp, int datasync) |
629 | { | 631 | { |
630 | struct inode *inode = filp->f_mapping->host; | 632 | struct inode *inode = filp->f_mapping->host; |
631 | struct super_block * sb; | 633 | struct super_block * sb; |
632 | int ret, err; | 634 | int ret, err; |
633 | 635 | ||
634 | /* sync the inode to buffers */ | 636 | /* sync the inode to buffers */ |
635 | ret = write_inode_now(inode, 0); | 637 | ret = write_inode_now(inode, 0); |
636 | 638 | ||
637 | /* sync the superblock to buffers */ | 639 | /* sync the superblock to buffers */ |
638 | sb = inode->i_sb; | 640 | sb = inode->i_sb; |
639 | if (sb->s_dirt) { | 641 | if (sb->s_dirt) { |
640 | lock_super(sb); | 642 | lock_super(sb); |
641 | sb->s_dirt = 0; | 643 | sb->s_dirt = 0; |
642 | if (!(sb->s_flags & MS_RDONLY)) | 644 | if (!(sb->s_flags & MS_RDONLY)) |
643 | hfs_mdb_commit(sb); | 645 | hfs_mdb_commit(sb); |
644 | unlock_super(sb); | 646 | unlock_super(sb); |
645 | } | 647 | } |
646 | /* .. finally sync the buffers to disk */ | 648 | /* .. finally sync the buffers to disk */ |
647 | err = sync_blockdev(sb->s_bdev); | 649 | err = sync_blockdev(sb->s_bdev); |
648 | if (!ret) | 650 | if (!ret) |
649 | ret = err; | 651 | ret = err; |
650 | return ret; | 652 | return ret; |
651 | } | 653 | } |
652 | 654 | ||
653 | static const struct file_operations hfs_file_operations = { | 655 | static const struct file_operations hfs_file_operations = { |
654 | .llseek = generic_file_llseek, | 656 | .llseek = generic_file_llseek, |
655 | .read = do_sync_read, | 657 | .read = do_sync_read, |
656 | .aio_read = generic_file_aio_read, | 658 | .aio_read = generic_file_aio_read, |
657 | .write = do_sync_write, | 659 | .write = do_sync_write, |
658 | .aio_write = generic_file_aio_write, | 660 | .aio_write = generic_file_aio_write, |
659 | .mmap = generic_file_mmap, | 661 | .mmap = generic_file_mmap, |
660 | .splice_read = generic_file_splice_read, | 662 | .splice_read = generic_file_splice_read, |
661 | .fsync = hfs_file_fsync, | 663 | .fsync = hfs_file_fsync, |
662 | .open = hfs_file_open, | 664 | .open = hfs_file_open, |
663 | .release = hfs_file_release, | 665 | .release = hfs_file_release, |
664 | }; | 666 | }; |
665 | 667 | ||
666 | static const struct inode_operations hfs_file_inode_operations = { | 668 | static const struct inode_operations hfs_file_inode_operations = { |
667 | .lookup = hfs_file_lookup, | 669 | .lookup = hfs_file_lookup, |
668 | .truncate = hfs_file_truncate, | 670 | .truncate = hfs_file_truncate, |
669 | .setattr = hfs_inode_setattr, | 671 | .setattr = hfs_inode_setattr, |
670 | .setxattr = hfs_setxattr, | 672 | .setxattr = hfs_setxattr, |
671 | .getxattr = hfs_getxattr, | 673 | .getxattr = hfs_getxattr, |
672 | .listxattr = hfs_listxattr, | 674 | .listxattr = hfs_listxattr, |
673 | }; | 675 | }; |
674 | 676 |
fs/hfsplus/inode.c
1 | /* | 1 | /* |
2 | * linux/fs/hfsplus/inode.c | 2 | * linux/fs/hfsplus/inode.c |
3 | * | 3 | * |
4 | * Copyright (C) 2001 | 4 | * Copyright (C) 2001 |
5 | * Brad Boyer (flar@allandria.com) | 5 | * Brad Boyer (flar@allandria.com) |
6 | * (C) 2003 Ardis Technologies <roman@ardistech.com> | 6 | * (C) 2003 Ardis Technologies <roman@ardistech.com> |
7 | * | 7 | * |
8 | * Inode handling routines | 8 | * Inode handling routines |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/blkdev.h> | 11 | #include <linux/blkdev.h> |
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
15 | #include <linux/mpage.h> | 15 | #include <linux/mpage.h> |
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | 17 | ||
18 | #include "hfsplus_fs.h" | 18 | #include "hfsplus_fs.h" |
19 | #include "hfsplus_raw.h" | 19 | #include "hfsplus_raw.h" |
20 | 20 | ||
21 | static int hfsplus_readpage(struct file *file, struct page *page) | 21 | static int hfsplus_readpage(struct file *file, struct page *page) |
22 | { | 22 | { |
23 | return block_read_full_page(page, hfsplus_get_block); | 23 | return block_read_full_page(page, hfsplus_get_block); |
24 | } | 24 | } |
25 | 25 | ||
26 | static int hfsplus_writepage(struct page *page, struct writeback_control *wbc) | 26 | static int hfsplus_writepage(struct page *page, struct writeback_control *wbc) |
27 | { | 27 | { |
28 | return block_write_full_page(page, hfsplus_get_block, wbc); | 28 | return block_write_full_page(page, hfsplus_get_block, wbc); |
29 | } | 29 | } |
30 | 30 | ||
31 | static int hfsplus_write_begin(struct file *file, struct address_space *mapping, | 31 | static int hfsplus_write_begin(struct file *file, struct address_space *mapping, |
32 | loff_t pos, unsigned len, unsigned flags, | 32 | loff_t pos, unsigned len, unsigned flags, |
33 | struct page **pagep, void **fsdata) | 33 | struct page **pagep, void **fsdata) |
34 | { | 34 | { |
35 | int ret; | 35 | int ret; |
36 | 36 | ||
37 | *pagep = NULL; | 37 | *pagep = NULL; |
38 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 38 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
39 | hfsplus_get_block, | 39 | hfsplus_get_block, |
40 | &HFSPLUS_I(mapping->host)->phys_size); | 40 | &HFSPLUS_I(mapping->host)->phys_size); |
41 | if (unlikely(ret)) { | 41 | if (unlikely(ret)) { |
42 | loff_t isize = mapping->host->i_size; | 42 | loff_t isize = mapping->host->i_size; |
43 | if (pos + len > isize) | 43 | if (pos + len > isize) |
44 | vmtruncate(mapping->host, isize); | 44 | vmtruncate(mapping->host, isize); |
45 | } | 45 | } |
46 | 46 | ||
47 | return ret; | 47 | return ret; |
48 | } | 48 | } |
49 | 49 | ||
50 | static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) | 50 | static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) |
51 | { | 51 | { |
52 | return generic_block_bmap(mapping, block, hfsplus_get_block); | 52 | return generic_block_bmap(mapping, block, hfsplus_get_block); |
53 | } | 53 | } |
54 | 54 | ||
55 | static int hfsplus_releasepage(struct page *page, gfp_t mask) | 55 | static int hfsplus_releasepage(struct page *page, gfp_t mask) |
56 | { | 56 | { |
57 | struct inode *inode = page->mapping->host; | 57 | struct inode *inode = page->mapping->host; |
58 | struct super_block *sb = inode->i_sb; | 58 | struct super_block *sb = inode->i_sb; |
59 | struct hfs_btree *tree; | 59 | struct hfs_btree *tree; |
60 | struct hfs_bnode *node; | 60 | struct hfs_bnode *node; |
61 | u32 nidx; | 61 | u32 nidx; |
62 | int i, res = 1; | 62 | int i, res = 1; |
63 | 63 | ||
64 | switch (inode->i_ino) { | 64 | switch (inode->i_ino) { |
65 | case HFSPLUS_EXT_CNID: | 65 | case HFSPLUS_EXT_CNID: |
66 | tree = HFSPLUS_SB(sb)->ext_tree; | 66 | tree = HFSPLUS_SB(sb)->ext_tree; |
67 | break; | 67 | break; |
68 | case HFSPLUS_CAT_CNID: | 68 | case HFSPLUS_CAT_CNID: |
69 | tree = HFSPLUS_SB(sb)->cat_tree; | 69 | tree = HFSPLUS_SB(sb)->cat_tree; |
70 | break; | 70 | break; |
71 | case HFSPLUS_ATTR_CNID: | 71 | case HFSPLUS_ATTR_CNID: |
72 | tree = HFSPLUS_SB(sb)->attr_tree; | 72 | tree = HFSPLUS_SB(sb)->attr_tree; |
73 | break; | 73 | break; |
74 | default: | 74 | default: |
75 | BUG(); | 75 | BUG(); |
76 | return 0; | 76 | return 0; |
77 | } | 77 | } |
78 | if (!tree) | 78 | if (!tree) |
79 | return 0; | 79 | return 0; |
80 | if (tree->node_size >= PAGE_CACHE_SIZE) { | 80 | if (tree->node_size >= PAGE_CACHE_SIZE) { |
81 | nidx = page->index >> | 81 | nidx = page->index >> |
82 | (tree->node_size_shift - PAGE_CACHE_SHIFT); | 82 | (tree->node_size_shift - PAGE_CACHE_SHIFT); |
83 | spin_lock(&tree->hash_lock); | 83 | spin_lock(&tree->hash_lock); |
84 | node = hfs_bnode_findhash(tree, nidx); | 84 | node = hfs_bnode_findhash(tree, nidx); |
85 | if (!node) | 85 | if (!node) |
86 | ; | 86 | ; |
87 | else if (atomic_read(&node->refcnt)) | 87 | else if (atomic_read(&node->refcnt)) |
88 | res = 0; | 88 | res = 0; |
89 | if (res && node) { | 89 | if (res && node) { |
90 | hfs_bnode_unhash(node); | 90 | hfs_bnode_unhash(node); |
91 | hfs_bnode_free(node); | 91 | hfs_bnode_free(node); |
92 | } | 92 | } |
93 | spin_unlock(&tree->hash_lock); | 93 | spin_unlock(&tree->hash_lock); |
94 | } else { | 94 | } else { |
95 | nidx = page->index << | 95 | nidx = page->index << |
96 | (PAGE_CACHE_SHIFT - tree->node_size_shift); | 96 | (PAGE_CACHE_SHIFT - tree->node_size_shift); |
97 | i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift); | 97 | i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift); |
98 | spin_lock(&tree->hash_lock); | 98 | spin_lock(&tree->hash_lock); |
99 | do { | 99 | do { |
100 | node = hfs_bnode_findhash(tree, nidx++); | 100 | node = hfs_bnode_findhash(tree, nidx++); |
101 | if (!node) | 101 | if (!node) |
102 | continue; | 102 | continue; |
103 | if (atomic_read(&node->refcnt)) { | 103 | if (atomic_read(&node->refcnt)) { |
104 | res = 0; | 104 | res = 0; |
105 | break; | 105 | break; |
106 | } | 106 | } |
107 | hfs_bnode_unhash(node); | 107 | hfs_bnode_unhash(node); |
108 | hfs_bnode_free(node); | 108 | hfs_bnode_free(node); |
109 | } while (--i && nidx < tree->node_count); | 109 | } while (--i && nidx < tree->node_count); |
110 | spin_unlock(&tree->hash_lock); | 110 | spin_unlock(&tree->hash_lock); |
111 | } | 111 | } |
112 | return res ? try_to_free_buffers(page) : 0; | 112 | return res ? try_to_free_buffers(page) : 0; |
113 | } | 113 | } |
114 | 114 | ||
115 | static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, | 115 | static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, |
116 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 116 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) |
117 | { | 117 | { |
118 | struct file *file = iocb->ki_filp; | 118 | struct file *file = iocb->ki_filp; |
119 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; | 119 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; |
120 | ssize_t ret; | 120 | ssize_t ret; |
121 | 121 | ||
122 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 122 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
123 | offset, nr_segs, hfsplus_get_block, NULL); | 123 | offset, nr_segs, hfsplus_get_block, NULL); |
124 | 124 | ||
125 | /* | 125 | /* |
126 | * In case of error extending write may have instantiated a few | 126 | * In case of error extending write may have instantiated a few |
127 | * blocks outside i_size. Trim these off again. | 127 | * blocks outside i_size. Trim these off again. |
128 | */ | 128 | */ |
129 | if (unlikely((rw & WRITE) && ret < 0)) { | 129 | if (unlikely((rw & WRITE) && ret < 0)) { |
130 | loff_t isize = i_size_read(inode); | 130 | loff_t isize = i_size_read(inode); |
131 | loff_t end = offset + iov_length(iov, nr_segs); | 131 | loff_t end = offset + iov_length(iov, nr_segs); |
132 | 132 | ||
133 | if (end > isize) | 133 | if (end > isize) |
134 | vmtruncate(inode, isize); | 134 | vmtruncate(inode, isize); |
135 | } | 135 | } |
136 | 136 | ||
137 | return ret; | 137 | return ret; |
138 | } | 138 | } |
139 | 139 | ||
140 | static int hfsplus_writepages(struct address_space *mapping, | 140 | static int hfsplus_writepages(struct address_space *mapping, |
141 | struct writeback_control *wbc) | 141 | struct writeback_control *wbc) |
142 | { | 142 | { |
143 | return mpage_writepages(mapping, wbc, hfsplus_get_block); | 143 | return mpage_writepages(mapping, wbc, hfsplus_get_block); |
144 | } | 144 | } |
145 | 145 | ||
146 | const struct address_space_operations hfsplus_btree_aops = { | 146 | const struct address_space_operations hfsplus_btree_aops = { |
147 | .readpage = hfsplus_readpage, | 147 | .readpage = hfsplus_readpage, |
148 | .writepage = hfsplus_writepage, | 148 | .writepage = hfsplus_writepage, |
149 | .write_begin = hfsplus_write_begin, | 149 | .write_begin = hfsplus_write_begin, |
150 | .write_end = generic_write_end, | 150 | .write_end = generic_write_end, |
151 | .bmap = hfsplus_bmap, | 151 | .bmap = hfsplus_bmap, |
152 | .releasepage = hfsplus_releasepage, | 152 | .releasepage = hfsplus_releasepage, |
153 | }; | 153 | }; |
154 | 154 | ||
155 | const struct address_space_operations hfsplus_aops = { | 155 | const struct address_space_operations hfsplus_aops = { |
156 | .readpage = hfsplus_readpage, | 156 | .readpage = hfsplus_readpage, |
157 | .writepage = hfsplus_writepage, | 157 | .writepage = hfsplus_writepage, |
158 | .write_begin = hfsplus_write_begin, | 158 | .write_begin = hfsplus_write_begin, |
159 | .write_end = generic_write_end, | 159 | .write_end = generic_write_end, |
160 | .bmap = hfsplus_bmap, | 160 | .bmap = hfsplus_bmap, |
161 | .direct_IO = hfsplus_direct_IO, | 161 | .direct_IO = hfsplus_direct_IO, |
162 | .writepages = hfsplus_writepages, | 162 | .writepages = hfsplus_writepages, |
163 | }; | 163 | }; |
164 | 164 | ||
165 | const struct dentry_operations hfsplus_dentry_operations = { | 165 | const struct dentry_operations hfsplus_dentry_operations = { |
166 | .d_hash = hfsplus_hash_dentry, | 166 | .d_hash = hfsplus_hash_dentry, |
167 | .d_compare = hfsplus_compare_dentry, | 167 | .d_compare = hfsplus_compare_dentry, |
168 | }; | 168 | }; |
169 | 169 | ||
170 | static struct dentry *hfsplus_file_lookup(struct inode *dir, | 170 | static struct dentry *hfsplus_file_lookup(struct inode *dir, |
171 | struct dentry *dentry, struct nameidata *nd) | 171 | struct dentry *dentry, struct nameidata *nd) |
172 | { | 172 | { |
173 | struct hfs_find_data fd; | 173 | struct hfs_find_data fd; |
174 | struct super_block *sb = dir->i_sb; | 174 | struct super_block *sb = dir->i_sb; |
175 | struct inode *inode = NULL; | 175 | struct inode *inode = NULL; |
176 | struct hfsplus_inode_info *hip; | 176 | struct hfsplus_inode_info *hip; |
177 | int err; | 177 | int err; |
178 | 178 | ||
179 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) | 179 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) |
180 | goto out; | 180 | goto out; |
181 | 181 | ||
182 | inode = HFSPLUS_I(dir)->rsrc_inode; | 182 | inode = HFSPLUS_I(dir)->rsrc_inode; |
183 | if (inode) | 183 | if (inode) |
184 | goto out; | 184 | goto out; |
185 | 185 | ||
186 | inode = new_inode(sb); | 186 | inode = new_inode(sb); |
187 | if (!inode) | 187 | if (!inode) |
188 | return ERR_PTR(-ENOMEM); | 188 | return ERR_PTR(-ENOMEM); |
189 | 189 | ||
190 | hip = HFSPLUS_I(inode); | 190 | hip = HFSPLUS_I(inode); |
191 | inode->i_ino = dir->i_ino; | 191 | inode->i_ino = dir->i_ino; |
192 | INIT_LIST_HEAD(&hip->open_dir_list); | 192 | INIT_LIST_HEAD(&hip->open_dir_list); |
193 | mutex_init(&hip->extents_lock); | 193 | mutex_init(&hip->extents_lock); |
194 | hip->extent_state = 0; | 194 | hip->extent_state = 0; |
195 | hip->flags = 0; | 195 | hip->flags = 0; |
196 | set_bit(HFSPLUS_I_RSRC, &hip->flags); | 196 | set_bit(HFSPLUS_I_RSRC, &hip->flags); |
197 | 197 | ||
198 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); | 198 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
199 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); | 199 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); |
200 | if (!err) | 200 | if (!err) |
201 | err = hfsplus_cat_read_inode(inode, &fd); | 201 | err = hfsplus_cat_read_inode(inode, &fd); |
202 | hfs_find_exit(&fd); | 202 | hfs_find_exit(&fd); |
203 | if (err) { | 203 | if (err) { |
204 | iput(inode); | 204 | iput(inode); |
205 | return ERR_PTR(err); | 205 | return ERR_PTR(err); |
206 | } | 206 | } |
207 | hip->rsrc_inode = dir; | 207 | hip->rsrc_inode = dir; |
208 | HFSPLUS_I(dir)->rsrc_inode = inode; | 208 | HFSPLUS_I(dir)->rsrc_inode = inode; |
209 | igrab(dir); | 209 | igrab(dir); |
210 | 210 | ||
211 | /* | 211 | /* |
212 | * __mark_inode_dirty expects inodes to be hashed. Since we don't | 212 | * __mark_inode_dirty expects inodes to be hashed. Since we don't |
213 | * want resource fork inodes in the regular inode space, we make them | 213 | * want resource fork inodes in the regular inode space, we make them |
214 | * appear hashed, but do not put on any lists. hlist_del() | 214 | * appear hashed, but do not put on any lists. hlist_del() |
215 | * will work fine and require no locking. | 215 | * will work fine and require no locking. |
216 | */ | 216 | */ |
217 | hlist_add_fake(&inode->i_hash); | 217 | hlist_add_fake(&inode->i_hash); |
218 | 218 | ||
219 | mark_inode_dirty(inode); | 219 | mark_inode_dirty(inode); |
220 | out: | 220 | out: |
221 | d_add(dentry, inode); | 221 | d_add(dentry, inode); |
222 | return NULL; | 222 | return NULL; |
223 | } | 223 | } |
224 | 224 | ||
225 | static void hfsplus_get_perms(struct inode *inode, | 225 | static void hfsplus_get_perms(struct inode *inode, |
226 | struct hfsplus_perm *perms, int dir) | 226 | struct hfsplus_perm *perms, int dir) |
227 | { | 227 | { |
228 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); | 228 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
229 | u16 mode; | 229 | u16 mode; |
230 | 230 | ||
231 | mode = be16_to_cpu(perms->mode); | 231 | mode = be16_to_cpu(perms->mode); |
232 | 232 | ||
233 | inode->i_uid = be32_to_cpu(perms->owner); | 233 | inode->i_uid = be32_to_cpu(perms->owner); |
234 | if (!inode->i_uid && !mode) | 234 | if (!inode->i_uid && !mode) |
235 | inode->i_uid = sbi->uid; | 235 | inode->i_uid = sbi->uid; |
236 | 236 | ||
237 | inode->i_gid = be32_to_cpu(perms->group); | 237 | inode->i_gid = be32_to_cpu(perms->group); |
238 | if (!inode->i_gid && !mode) | 238 | if (!inode->i_gid && !mode) |
239 | inode->i_gid = sbi->gid; | 239 | inode->i_gid = sbi->gid; |
240 | 240 | ||
241 | if (dir) { | 241 | if (dir) { |
242 | mode = mode ? (mode & S_IALLUGO) : (S_IRWXUGO & ~(sbi->umask)); | 242 | mode = mode ? (mode & S_IALLUGO) : (S_IRWXUGO & ~(sbi->umask)); |
243 | mode |= S_IFDIR; | 243 | mode |= S_IFDIR; |
244 | } else if (!mode) | 244 | } else if (!mode) |
245 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & ~(sbi->umask)); | 245 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & ~(sbi->umask)); |
246 | inode->i_mode = mode; | 246 | inode->i_mode = mode; |
247 | 247 | ||
248 | HFSPLUS_I(inode)->userflags = perms->userflags; | 248 | HFSPLUS_I(inode)->userflags = perms->userflags; |
249 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) | 249 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) |
250 | inode->i_flags |= S_IMMUTABLE; | 250 | inode->i_flags |= S_IMMUTABLE; |
251 | else | 251 | else |
252 | inode->i_flags &= ~S_IMMUTABLE; | 252 | inode->i_flags &= ~S_IMMUTABLE; |
253 | if (perms->rootflags & HFSPLUS_FLG_APPEND) | 253 | if (perms->rootflags & HFSPLUS_FLG_APPEND) |
254 | inode->i_flags |= S_APPEND; | 254 | inode->i_flags |= S_APPEND; |
255 | else | 255 | else |
256 | inode->i_flags &= ~S_APPEND; | 256 | inode->i_flags &= ~S_APPEND; |
257 | } | 257 | } |
258 | 258 | ||
259 | static int hfsplus_file_open(struct inode *inode, struct file *file) | 259 | static int hfsplus_file_open(struct inode *inode, struct file *file) |
260 | { | 260 | { |
261 | if (HFSPLUS_IS_RSRC(inode)) | 261 | if (HFSPLUS_IS_RSRC(inode)) |
262 | inode = HFSPLUS_I(inode)->rsrc_inode; | 262 | inode = HFSPLUS_I(inode)->rsrc_inode; |
263 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) | 263 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) |
264 | return -EOVERFLOW; | 264 | return -EOVERFLOW; |
265 | atomic_inc(&HFSPLUS_I(inode)->opencnt); | 265 | atomic_inc(&HFSPLUS_I(inode)->opencnt); |
266 | return 0; | 266 | return 0; |
267 | } | 267 | } |
268 | 268 | ||
269 | static int hfsplus_file_release(struct inode *inode, struct file *file) | 269 | static int hfsplus_file_release(struct inode *inode, struct file *file) |
270 | { | 270 | { |
271 | struct super_block *sb = inode->i_sb; | 271 | struct super_block *sb = inode->i_sb; |
272 | 272 | ||
273 | if (HFSPLUS_IS_RSRC(inode)) | 273 | if (HFSPLUS_IS_RSRC(inode)) |
274 | inode = HFSPLUS_I(inode)->rsrc_inode; | 274 | inode = HFSPLUS_I(inode)->rsrc_inode; |
275 | if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) { | 275 | if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) { |
276 | mutex_lock(&inode->i_mutex); | 276 | mutex_lock(&inode->i_mutex); |
277 | hfsplus_file_truncate(inode); | 277 | hfsplus_file_truncate(inode); |
278 | if (inode->i_flags & S_DEAD) { | 278 | if (inode->i_flags & S_DEAD) { |
279 | hfsplus_delete_cat(inode->i_ino, | 279 | hfsplus_delete_cat(inode->i_ino, |
280 | HFSPLUS_SB(sb)->hidden_dir, NULL); | 280 | HFSPLUS_SB(sb)->hidden_dir, NULL); |
281 | hfsplus_delete_inode(inode); | 281 | hfsplus_delete_inode(inode); |
282 | } | 282 | } |
283 | mutex_unlock(&inode->i_mutex); | 283 | mutex_unlock(&inode->i_mutex); |
284 | } | 284 | } |
285 | return 0; | 285 | return 0; |
286 | } | 286 | } |
287 | 287 | ||
288 | static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) | 288 | static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) |
289 | { | 289 | { |
290 | struct inode *inode = dentry->d_inode; | 290 | struct inode *inode = dentry->d_inode; |
291 | int error; | 291 | int error; |
292 | 292 | ||
293 | error = inode_change_ok(inode, attr); | 293 | error = inode_change_ok(inode, attr); |
294 | if (error) | 294 | if (error) |
295 | return error; | 295 | return error; |
296 | 296 | ||
297 | if ((attr->ia_valid & ATTR_SIZE) && | 297 | if ((attr->ia_valid & ATTR_SIZE) && |
298 | attr->ia_size != i_size_read(inode)) { | 298 | attr->ia_size != i_size_read(inode)) { |
299 | inode_dio_wait(inode); | ||
300 | |||
299 | error = vmtruncate(inode, attr->ia_size); | 301 | error = vmtruncate(inode, attr->ia_size); |
300 | if (error) | 302 | if (error) |
301 | return error; | 303 | return error; |
302 | } | 304 | } |
303 | 305 | ||
304 | setattr_copy(inode, attr); | 306 | setattr_copy(inode, attr); |
305 | mark_inode_dirty(inode); | 307 | mark_inode_dirty(inode); |
306 | return 0; | 308 | return 0; |
307 | } | 309 | } |
308 | 310 | ||
309 | int hfsplus_file_fsync(struct file *file, int datasync) | 311 | int hfsplus_file_fsync(struct file *file, int datasync) |
310 | { | 312 | { |
311 | struct inode *inode = file->f_mapping->host; | 313 | struct inode *inode = file->f_mapping->host; |
312 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | 314 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); |
313 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); | 315 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
314 | int error = 0, error2; | 316 | int error = 0, error2; |
315 | 317 | ||
316 | /* | 318 | /* |
317 | * Sync inode metadata into the catalog and extent trees. | 319 | * Sync inode metadata into the catalog and extent trees. |
318 | */ | 320 | */ |
319 | sync_inode_metadata(inode, 1); | 321 | sync_inode_metadata(inode, 1); |
320 | 322 | ||
321 | /* | 323 | /* |
322 | * And explicitly write out the btrees. | 324 | * And explicitly write out the btrees. |
323 | */ | 325 | */ |
324 | if (test_and_clear_bit(HFSPLUS_I_CAT_DIRTY, &hip->flags)) | 326 | if (test_and_clear_bit(HFSPLUS_I_CAT_DIRTY, &hip->flags)) |
325 | error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping); | 327 | error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping); |
326 | 328 | ||
327 | if (test_and_clear_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags)) { | 329 | if (test_and_clear_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags)) { |
328 | error2 = | 330 | error2 = |
329 | filemap_write_and_wait(sbi->ext_tree->inode->i_mapping); | 331 | filemap_write_and_wait(sbi->ext_tree->inode->i_mapping); |
330 | if (!error) | 332 | if (!error) |
331 | error = error2; | 333 | error = error2; |
332 | } | 334 | } |
333 | 335 | ||
334 | if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) { | 336 | if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) { |
335 | error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); | 337 | error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); |
336 | if (!error) | 338 | if (!error) |
337 | error = error2; | 339 | error = error2; |
338 | } | 340 | } |
339 | 341 | ||
340 | if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) | 342 | if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) |
341 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 343 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
342 | 344 | ||
343 | return error; | 345 | return error; |
344 | } | 346 | } |
345 | 347 | ||
346 | static const struct inode_operations hfsplus_file_inode_operations = { | 348 | static const struct inode_operations hfsplus_file_inode_operations = { |
347 | .lookup = hfsplus_file_lookup, | 349 | .lookup = hfsplus_file_lookup, |
348 | .truncate = hfsplus_file_truncate, | 350 | .truncate = hfsplus_file_truncate, |
349 | .setattr = hfsplus_setattr, | 351 | .setattr = hfsplus_setattr, |
350 | .setxattr = hfsplus_setxattr, | 352 | .setxattr = hfsplus_setxattr, |
351 | .getxattr = hfsplus_getxattr, | 353 | .getxattr = hfsplus_getxattr, |
352 | .listxattr = hfsplus_listxattr, | 354 | .listxattr = hfsplus_listxattr, |
353 | }; | 355 | }; |
354 | 356 | ||
355 | static const struct file_operations hfsplus_file_operations = { | 357 | static const struct file_operations hfsplus_file_operations = { |
356 | .llseek = generic_file_llseek, | 358 | .llseek = generic_file_llseek, |
357 | .read = do_sync_read, | 359 | .read = do_sync_read, |
358 | .aio_read = generic_file_aio_read, | 360 | .aio_read = generic_file_aio_read, |
359 | .write = do_sync_write, | 361 | .write = do_sync_write, |
360 | .aio_write = generic_file_aio_write, | 362 | .aio_write = generic_file_aio_write, |
361 | .mmap = generic_file_mmap, | 363 | .mmap = generic_file_mmap, |
362 | .splice_read = generic_file_splice_read, | 364 | .splice_read = generic_file_splice_read, |
363 | .fsync = hfsplus_file_fsync, | 365 | .fsync = hfsplus_file_fsync, |
364 | .open = hfsplus_file_open, | 366 | .open = hfsplus_file_open, |
365 | .release = hfsplus_file_release, | 367 | .release = hfsplus_file_release, |
366 | .unlocked_ioctl = hfsplus_ioctl, | 368 | .unlocked_ioctl = hfsplus_ioctl, |
367 | }; | 369 | }; |
368 | 370 | ||
369 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) | 371 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) |
370 | { | 372 | { |
371 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | 373 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); |
372 | struct inode *inode = new_inode(sb); | 374 | struct inode *inode = new_inode(sb); |
373 | struct hfsplus_inode_info *hip; | 375 | struct hfsplus_inode_info *hip; |
374 | 376 | ||
375 | if (!inode) | 377 | if (!inode) |
376 | return NULL; | 378 | return NULL; |
377 | 379 | ||
378 | inode->i_ino = sbi->next_cnid++; | 380 | inode->i_ino = sbi->next_cnid++; |
379 | inode->i_mode = mode; | 381 | inode->i_mode = mode; |
380 | inode->i_uid = current_fsuid(); | 382 | inode->i_uid = current_fsuid(); |
381 | inode->i_gid = current_fsgid(); | 383 | inode->i_gid = current_fsgid(); |
382 | inode->i_nlink = 1; | 384 | inode->i_nlink = 1; |
383 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 385 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
384 | 386 | ||
385 | hip = HFSPLUS_I(inode); | 387 | hip = HFSPLUS_I(inode); |
386 | INIT_LIST_HEAD(&hip->open_dir_list); | 388 | INIT_LIST_HEAD(&hip->open_dir_list); |
387 | mutex_init(&hip->extents_lock); | 389 | mutex_init(&hip->extents_lock); |
388 | atomic_set(&hip->opencnt, 0); | 390 | atomic_set(&hip->opencnt, 0); |
389 | hip->extent_state = 0; | 391 | hip->extent_state = 0; |
390 | hip->flags = 0; | 392 | hip->flags = 0; |
391 | memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); | 393 | memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); |
392 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); | 394 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
393 | hip->alloc_blocks = 0; | 395 | hip->alloc_blocks = 0; |
394 | hip->first_blocks = 0; | 396 | hip->first_blocks = 0; |
395 | hip->cached_start = 0; | 397 | hip->cached_start = 0; |
396 | hip->cached_blocks = 0; | 398 | hip->cached_blocks = 0; |
397 | hip->phys_size = 0; | 399 | hip->phys_size = 0; |
398 | hip->fs_blocks = 0; | 400 | hip->fs_blocks = 0; |
399 | hip->rsrc_inode = NULL; | 401 | hip->rsrc_inode = NULL; |
400 | if (S_ISDIR(inode->i_mode)) { | 402 | if (S_ISDIR(inode->i_mode)) { |
401 | inode->i_size = 2; | 403 | inode->i_size = 2; |
402 | sbi->folder_count++; | 404 | sbi->folder_count++; |
403 | inode->i_op = &hfsplus_dir_inode_operations; | 405 | inode->i_op = &hfsplus_dir_inode_operations; |
404 | inode->i_fop = &hfsplus_dir_operations; | 406 | inode->i_fop = &hfsplus_dir_operations; |
405 | } else if (S_ISREG(inode->i_mode)) { | 407 | } else if (S_ISREG(inode->i_mode)) { |
406 | sbi->file_count++; | 408 | sbi->file_count++; |
407 | inode->i_op = &hfsplus_file_inode_operations; | 409 | inode->i_op = &hfsplus_file_inode_operations; |
408 | inode->i_fop = &hfsplus_file_operations; | 410 | inode->i_fop = &hfsplus_file_operations; |
409 | inode->i_mapping->a_ops = &hfsplus_aops; | 411 | inode->i_mapping->a_ops = &hfsplus_aops; |
410 | hip->clump_blocks = sbi->data_clump_blocks; | 412 | hip->clump_blocks = sbi->data_clump_blocks; |
411 | } else if (S_ISLNK(inode->i_mode)) { | 413 | } else if (S_ISLNK(inode->i_mode)) { |
412 | sbi->file_count++; | 414 | sbi->file_count++; |
413 | inode->i_op = &page_symlink_inode_operations; | 415 | inode->i_op = &page_symlink_inode_operations; |
414 | inode->i_mapping->a_ops = &hfsplus_aops; | 416 | inode->i_mapping->a_ops = &hfsplus_aops; |
415 | hip->clump_blocks = 1; | 417 | hip->clump_blocks = 1; |
416 | } else | 418 | } else |
417 | sbi->file_count++; | 419 | sbi->file_count++; |
418 | insert_inode_hash(inode); | 420 | insert_inode_hash(inode); |
419 | mark_inode_dirty(inode); | 421 | mark_inode_dirty(inode); |
420 | sb->s_dirt = 1; | 422 | sb->s_dirt = 1; |
421 | 423 | ||
422 | return inode; | 424 | return inode; |
423 | } | 425 | } |
424 | 426 | ||
425 | void hfsplus_delete_inode(struct inode *inode) | 427 | void hfsplus_delete_inode(struct inode *inode) |
426 | { | 428 | { |
427 | struct super_block *sb = inode->i_sb; | 429 | struct super_block *sb = inode->i_sb; |
428 | 430 | ||
429 | if (S_ISDIR(inode->i_mode)) { | 431 | if (S_ISDIR(inode->i_mode)) { |
430 | HFSPLUS_SB(sb)->folder_count--; | 432 | HFSPLUS_SB(sb)->folder_count--; |
431 | sb->s_dirt = 1; | 433 | sb->s_dirt = 1; |
432 | return; | 434 | return; |
433 | } | 435 | } |
434 | HFSPLUS_SB(sb)->file_count--; | 436 | HFSPLUS_SB(sb)->file_count--; |
435 | if (S_ISREG(inode->i_mode)) { | 437 | if (S_ISREG(inode->i_mode)) { |
436 | if (!inode->i_nlink) { | 438 | if (!inode->i_nlink) { |
437 | inode->i_size = 0; | 439 | inode->i_size = 0; |
438 | hfsplus_file_truncate(inode); | 440 | hfsplus_file_truncate(inode); |
439 | } | 441 | } |
440 | } else if (S_ISLNK(inode->i_mode)) { | 442 | } else if (S_ISLNK(inode->i_mode)) { |
441 | inode->i_size = 0; | 443 | inode->i_size = 0; |
442 | hfsplus_file_truncate(inode); | 444 | hfsplus_file_truncate(inode); |
443 | } | 445 | } |
444 | sb->s_dirt = 1; | 446 | sb->s_dirt = 1; |
445 | } | 447 | } |
446 | 448 | ||
447 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) | 449 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) |
448 | { | 450 | { |
449 | struct super_block *sb = inode->i_sb; | 451 | struct super_block *sb = inode->i_sb; |
450 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | 452 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); |
451 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | 453 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); |
452 | u32 count; | 454 | u32 count; |
453 | int i; | 455 | int i; |
454 | 456 | ||
455 | memcpy(&hip->first_extents, &fork->extents, sizeof(hfsplus_extent_rec)); | 457 | memcpy(&hip->first_extents, &fork->extents, sizeof(hfsplus_extent_rec)); |
456 | for (count = 0, i = 0; i < 8; i++) | 458 | for (count = 0, i = 0; i < 8; i++) |
457 | count += be32_to_cpu(fork->extents[i].block_count); | 459 | count += be32_to_cpu(fork->extents[i].block_count); |
458 | hip->first_blocks = count; | 460 | hip->first_blocks = count; |
459 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); | 461 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
460 | hip->cached_start = 0; | 462 | hip->cached_start = 0; |
461 | hip->cached_blocks = 0; | 463 | hip->cached_blocks = 0; |
462 | 464 | ||
463 | hip->alloc_blocks = be32_to_cpu(fork->total_blocks); | 465 | hip->alloc_blocks = be32_to_cpu(fork->total_blocks); |
464 | hip->phys_size = inode->i_size = be64_to_cpu(fork->total_size); | 466 | hip->phys_size = inode->i_size = be64_to_cpu(fork->total_size); |
465 | hip->fs_blocks = | 467 | hip->fs_blocks = |
466 | (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 468 | (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
467 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); | 469 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); |
468 | hip->clump_blocks = | 470 | hip->clump_blocks = |
469 | be32_to_cpu(fork->clump_size) >> sbi->alloc_blksz_shift; | 471 | be32_to_cpu(fork->clump_size) >> sbi->alloc_blksz_shift; |
470 | if (!hip->clump_blocks) { | 472 | if (!hip->clump_blocks) { |
471 | hip->clump_blocks = HFSPLUS_IS_RSRC(inode) ? | 473 | hip->clump_blocks = HFSPLUS_IS_RSRC(inode) ? |
472 | sbi->rsrc_clump_blocks : | 474 | sbi->rsrc_clump_blocks : |
473 | sbi->data_clump_blocks; | 475 | sbi->data_clump_blocks; |
474 | } | 476 | } |
475 | } | 477 | } |
476 | 478 | ||
477 | void hfsplus_inode_write_fork(struct inode *inode, | 479 | void hfsplus_inode_write_fork(struct inode *inode, |
478 | struct hfsplus_fork_raw *fork) | 480 | struct hfsplus_fork_raw *fork) |
479 | { | 481 | { |
480 | memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, | 482 | memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, |
481 | sizeof(hfsplus_extent_rec)); | 483 | sizeof(hfsplus_extent_rec)); |
482 | fork->total_size = cpu_to_be64(inode->i_size); | 484 | fork->total_size = cpu_to_be64(inode->i_size); |
483 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode)->alloc_blocks); | 485 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode)->alloc_blocks); |
484 | } | 486 | } |
485 | 487 | ||
486 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | 488 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) |
487 | { | 489 | { |
488 | hfsplus_cat_entry entry; | 490 | hfsplus_cat_entry entry; |
489 | int res = 0; | 491 | int res = 0; |
490 | u16 type; | 492 | u16 type; |
491 | 493 | ||
492 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); | 494 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); |
493 | 495 | ||
494 | HFSPLUS_I(inode)->linkid = 0; | 496 | HFSPLUS_I(inode)->linkid = 0; |
495 | if (type == HFSPLUS_FOLDER) { | 497 | if (type == HFSPLUS_FOLDER) { |
496 | struct hfsplus_cat_folder *folder = &entry.folder; | 498 | struct hfsplus_cat_folder *folder = &entry.folder; |
497 | 499 | ||
498 | if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) | 500 | if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) |
499 | /* panic? */; | 501 | /* panic? */; |
500 | hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, | 502 | hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, |
501 | sizeof(struct hfsplus_cat_folder)); | 503 | sizeof(struct hfsplus_cat_folder)); |
502 | hfsplus_get_perms(inode, &folder->permissions, 1); | 504 | hfsplus_get_perms(inode, &folder->permissions, 1); |
503 | inode->i_nlink = 1; | 505 | inode->i_nlink = 1; |
504 | inode->i_size = 2 + be32_to_cpu(folder->valence); | 506 | inode->i_size = 2 + be32_to_cpu(folder->valence); |
505 | inode->i_atime = hfsp_mt2ut(folder->access_date); | 507 | inode->i_atime = hfsp_mt2ut(folder->access_date); |
506 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); | 508 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); |
507 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); | 509 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); |
508 | HFSPLUS_I(inode)->create_date = folder->create_date; | 510 | HFSPLUS_I(inode)->create_date = folder->create_date; |
509 | HFSPLUS_I(inode)->fs_blocks = 0; | 511 | HFSPLUS_I(inode)->fs_blocks = 0; |
510 | inode->i_op = &hfsplus_dir_inode_operations; | 512 | inode->i_op = &hfsplus_dir_inode_operations; |
511 | inode->i_fop = &hfsplus_dir_operations; | 513 | inode->i_fop = &hfsplus_dir_operations; |
512 | } else if (type == HFSPLUS_FILE) { | 514 | } else if (type == HFSPLUS_FILE) { |
513 | struct hfsplus_cat_file *file = &entry.file; | 515 | struct hfsplus_cat_file *file = &entry.file; |
514 | 516 | ||
515 | if (fd->entrylength < sizeof(struct hfsplus_cat_file)) | 517 | if (fd->entrylength < sizeof(struct hfsplus_cat_file)) |
516 | /* panic? */; | 518 | /* panic? */; |
517 | hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, | 519 | hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, |
518 | sizeof(struct hfsplus_cat_file)); | 520 | sizeof(struct hfsplus_cat_file)); |
519 | 521 | ||
520 | hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ? | 522 | hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ? |
521 | &file->rsrc_fork : &file->data_fork); | 523 | &file->rsrc_fork : &file->data_fork); |
522 | hfsplus_get_perms(inode, &file->permissions, 0); | 524 | hfsplus_get_perms(inode, &file->permissions, 0); |
523 | inode->i_nlink = 1; | 525 | inode->i_nlink = 1; |
524 | if (S_ISREG(inode->i_mode)) { | 526 | if (S_ISREG(inode->i_mode)) { |
525 | if (file->permissions.dev) | 527 | if (file->permissions.dev) |
526 | inode->i_nlink = | 528 | inode->i_nlink = |
527 | be32_to_cpu(file->permissions.dev); | 529 | be32_to_cpu(file->permissions.dev); |
528 | inode->i_op = &hfsplus_file_inode_operations; | 530 | inode->i_op = &hfsplus_file_inode_operations; |
529 | inode->i_fop = &hfsplus_file_operations; | 531 | inode->i_fop = &hfsplus_file_operations; |
530 | inode->i_mapping->a_ops = &hfsplus_aops; | 532 | inode->i_mapping->a_ops = &hfsplus_aops; |
531 | } else if (S_ISLNK(inode->i_mode)) { | 533 | } else if (S_ISLNK(inode->i_mode)) { |
532 | inode->i_op = &page_symlink_inode_operations; | 534 | inode->i_op = &page_symlink_inode_operations; |
533 | inode->i_mapping->a_ops = &hfsplus_aops; | 535 | inode->i_mapping->a_ops = &hfsplus_aops; |
534 | } else { | 536 | } else { |
535 | init_special_inode(inode, inode->i_mode, | 537 | init_special_inode(inode, inode->i_mode, |
536 | be32_to_cpu(file->permissions.dev)); | 538 | be32_to_cpu(file->permissions.dev)); |
537 | } | 539 | } |
538 | inode->i_atime = hfsp_mt2ut(file->access_date); | 540 | inode->i_atime = hfsp_mt2ut(file->access_date); |
539 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); | 541 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); |
540 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); | 542 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); |
541 | HFSPLUS_I(inode)->create_date = file->create_date; | 543 | HFSPLUS_I(inode)->create_date = file->create_date; |
542 | } else { | 544 | } else { |
543 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); | 545 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); |
544 | res = -EIO; | 546 | res = -EIO; |
545 | } | 547 | } |
546 | return res; | 548 | return res; |
547 | } | 549 | } |
548 | 550 | ||
549 | int hfsplus_cat_write_inode(struct inode *inode) | 551 | int hfsplus_cat_write_inode(struct inode *inode) |
550 | { | 552 | { |
551 | struct inode *main_inode = inode; | 553 | struct inode *main_inode = inode; |
552 | struct hfs_find_data fd; | 554 | struct hfs_find_data fd; |
553 | hfsplus_cat_entry entry; | 555 | hfsplus_cat_entry entry; |
554 | 556 | ||
555 | if (HFSPLUS_IS_RSRC(inode)) | 557 | if (HFSPLUS_IS_RSRC(inode)) |
556 | main_inode = HFSPLUS_I(inode)->rsrc_inode; | 558 | main_inode = HFSPLUS_I(inode)->rsrc_inode; |
557 | 559 | ||
558 | if (!main_inode->i_nlink) | 560 | if (!main_inode->i_nlink) |
559 | return 0; | 561 | return 0; |
560 | 562 | ||
561 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb)->cat_tree, &fd)) | 563 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb)->cat_tree, &fd)) |
562 | /* panic? */ | 564 | /* panic? */ |
563 | return -EIO; | 565 | return -EIO; |
564 | 566 | ||
565 | if (hfsplus_find_cat(main_inode->i_sb, main_inode->i_ino, &fd)) | 567 | if (hfsplus_find_cat(main_inode->i_sb, main_inode->i_ino, &fd)) |
566 | /* panic? */ | 568 | /* panic? */ |
567 | goto out; | 569 | goto out; |
568 | 570 | ||
569 | if (S_ISDIR(main_inode->i_mode)) { | 571 | if (S_ISDIR(main_inode->i_mode)) { |
570 | struct hfsplus_cat_folder *folder = &entry.folder; | 572 | struct hfsplus_cat_folder *folder = &entry.folder; |
571 | 573 | ||
572 | if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) | 574 | if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) |
573 | /* panic? */; | 575 | /* panic? */; |
574 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 576 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
575 | sizeof(struct hfsplus_cat_folder)); | 577 | sizeof(struct hfsplus_cat_folder)); |
576 | /* simple node checks? */ | 578 | /* simple node checks? */ |
577 | hfsplus_cat_set_perms(inode, &folder->permissions); | 579 | hfsplus_cat_set_perms(inode, &folder->permissions); |
578 | folder->access_date = hfsp_ut2mt(inode->i_atime); | 580 | folder->access_date = hfsp_ut2mt(inode->i_atime); |
579 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); | 581 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); |
580 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); | 582 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); |
581 | folder->valence = cpu_to_be32(inode->i_size - 2); | 583 | folder->valence = cpu_to_be32(inode->i_size - 2); |
582 | hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, | 584 | hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, |
583 | sizeof(struct hfsplus_cat_folder)); | 585 | sizeof(struct hfsplus_cat_folder)); |
584 | } else if (HFSPLUS_IS_RSRC(inode)) { | 586 | } else if (HFSPLUS_IS_RSRC(inode)) { |
585 | struct hfsplus_cat_file *file = &entry.file; | 587 | struct hfsplus_cat_file *file = &entry.file; |
586 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 588 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
587 | sizeof(struct hfsplus_cat_file)); | 589 | sizeof(struct hfsplus_cat_file)); |
588 | hfsplus_inode_write_fork(inode, &file->rsrc_fork); | 590 | hfsplus_inode_write_fork(inode, &file->rsrc_fork); |
589 | hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, | 591 | hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, |
590 | sizeof(struct hfsplus_cat_file)); | 592 | sizeof(struct hfsplus_cat_file)); |
591 | } else { | 593 | } else { |
592 | struct hfsplus_cat_file *file = &entry.file; | 594 | struct hfsplus_cat_file *file = &entry.file; |
593 | 595 | ||
594 | if (fd.entrylength < sizeof(struct hfsplus_cat_file)) | 596 | if (fd.entrylength < sizeof(struct hfsplus_cat_file)) |
595 | /* panic? */; | 597 | /* panic? */; |
596 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 598 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
597 | sizeof(struct hfsplus_cat_file)); | 599 | sizeof(struct hfsplus_cat_file)); |
598 | hfsplus_inode_write_fork(inode, &file->data_fork); | 600 | hfsplus_inode_write_fork(inode, &file->data_fork); |
599 | hfsplus_cat_set_perms(inode, &file->permissions); | 601 | hfsplus_cat_set_perms(inode, &file->permissions); |
600 | if (HFSPLUS_FLG_IMMUTABLE & | 602 | if (HFSPLUS_FLG_IMMUTABLE & |
601 | (file->permissions.rootflags | | 603 | (file->permissions.rootflags | |
602 | file->permissions.userflags)) | 604 | file->permissions.userflags)) |
603 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); | 605 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); |
604 | else | 606 | else |
605 | file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED); | 607 | file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED); |
606 | file->access_date = hfsp_ut2mt(inode->i_atime); | 608 | file->access_date = hfsp_ut2mt(inode->i_atime); |
607 | file->content_mod_date = hfsp_ut2mt(inode->i_mtime); | 609 | file->content_mod_date = hfsp_ut2mt(inode->i_mtime); |
608 | file->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); | 610 | file->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); |
609 | hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, | 611 | hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, |
610 | sizeof(struct hfsplus_cat_file)); | 612 | sizeof(struct hfsplus_cat_file)); |
611 | } | 613 | } |
612 | 614 | ||
613 | set_bit(HFSPLUS_I_CAT_DIRTY, &HFSPLUS_I(inode)->flags); | 615 | set_bit(HFSPLUS_I_CAT_DIRTY, &HFSPLUS_I(inode)->flags); |
614 | out: | 616 | out: |
615 | hfs_find_exit(&fd); | 617 | hfs_find_exit(&fd); |
616 | return 0; | 618 | return 0; |
617 | } | 619 | } |
618 | 620 |
fs/jfs/file.c
1 | /* | 1 | /* |
2 | * Copyright (C) International Business Machines Corp., 2000-2002 | 2 | * Copyright (C) International Business Machines Corp., 2000-2002 |
3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | 3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
13 | * the GNU General Public License for more details. | 13 | * the GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/quotaops.h> | 22 | #include <linux/quotaops.h> |
23 | #include "jfs_incore.h" | 23 | #include "jfs_incore.h" |
24 | #include "jfs_inode.h" | 24 | #include "jfs_inode.h" |
25 | #include "jfs_dmap.h" | 25 | #include "jfs_dmap.h" |
26 | #include "jfs_txnmgr.h" | 26 | #include "jfs_txnmgr.h" |
27 | #include "jfs_xattr.h" | 27 | #include "jfs_xattr.h" |
28 | #include "jfs_acl.h" | 28 | #include "jfs_acl.h" |
29 | #include "jfs_debug.h" | 29 | #include "jfs_debug.h" |
30 | 30 | ||
31 | int jfs_fsync(struct file *file, int datasync) | 31 | int jfs_fsync(struct file *file, int datasync) |
32 | { | 32 | { |
33 | struct inode *inode = file->f_mapping->host; | 33 | struct inode *inode = file->f_mapping->host; |
34 | int rc = 0; | 34 | int rc = 0; |
35 | 35 | ||
36 | if (!(inode->i_state & I_DIRTY) || | 36 | if (!(inode->i_state & I_DIRTY) || |
37 | (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { | 37 | (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { |
38 | /* Make sure committed changes hit the disk */ | 38 | /* Make sure committed changes hit the disk */ |
39 | jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); | 39 | jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); |
40 | return rc; | 40 | return rc; |
41 | } | 41 | } |
42 | 42 | ||
43 | rc |= jfs_commit_inode(inode, 1); | 43 | rc |= jfs_commit_inode(inode, 1); |
44 | 44 | ||
45 | return rc ? -EIO : 0; | 45 | return rc ? -EIO : 0; |
46 | } | 46 | } |
47 | 47 | ||
48 | static int jfs_open(struct inode *inode, struct file *file) | 48 | static int jfs_open(struct inode *inode, struct file *file) |
49 | { | 49 | { |
50 | int rc; | 50 | int rc; |
51 | 51 | ||
52 | if ((rc = dquot_file_open(inode, file))) | 52 | if ((rc = dquot_file_open(inode, file))) |
53 | return rc; | 53 | return rc; |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * We attempt to allow only one "active" file open per aggregate | 56 | * We attempt to allow only one "active" file open per aggregate |
57 | * group. Otherwise, appending to files in parallel can cause | 57 | * group. Otherwise, appending to files in parallel can cause |
58 | * fragmentation within the files. | 58 | * fragmentation within the files. |
59 | * | 59 | * |
60 | * If the file is empty, it was probably just created and going | 60 | * If the file is empty, it was probably just created and going |
61 | * to be written to. If it has a size, we'll hold off until the | 61 | * to be written to. If it has a size, we'll hold off until the |
62 | * file is actually grown. | 62 | * file is actually grown. |
63 | */ | 63 | */ |
64 | if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE && | 64 | if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE && |
65 | (inode->i_size == 0)) { | 65 | (inode->i_size == 0)) { |
66 | struct jfs_inode_info *ji = JFS_IP(inode); | 66 | struct jfs_inode_info *ji = JFS_IP(inode); |
67 | spin_lock_irq(&ji->ag_lock); | 67 | spin_lock_irq(&ji->ag_lock); |
68 | if (ji->active_ag == -1) { | 68 | if (ji->active_ag == -1) { |
69 | struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); | 69 | struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); |
70 | ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); | 70 | ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); |
71 | atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); | 71 | atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); |
72 | } | 72 | } |
73 | spin_unlock_irq(&ji->ag_lock); | 73 | spin_unlock_irq(&ji->ag_lock); |
74 | } | 74 | } |
75 | 75 | ||
76 | return 0; | 76 | return 0; |
77 | } | 77 | } |
78 | static int jfs_release(struct inode *inode, struct file *file) | 78 | static int jfs_release(struct inode *inode, struct file *file) |
79 | { | 79 | { |
80 | struct jfs_inode_info *ji = JFS_IP(inode); | 80 | struct jfs_inode_info *ji = JFS_IP(inode); |
81 | 81 | ||
82 | spin_lock_irq(&ji->ag_lock); | 82 | spin_lock_irq(&ji->ag_lock); |
83 | if (ji->active_ag != -1) { | 83 | if (ji->active_ag != -1) { |
84 | struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; | 84 | struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap; |
85 | atomic_dec(&bmap->db_active[ji->active_ag]); | 85 | atomic_dec(&bmap->db_active[ji->active_ag]); |
86 | ji->active_ag = -1; | 86 | ji->active_ag = -1; |
87 | } | 87 | } |
88 | spin_unlock_irq(&ji->ag_lock); | 88 | spin_unlock_irq(&ji->ag_lock); |
89 | 89 | ||
90 | return 0; | 90 | return 0; |
91 | } | 91 | } |
92 | 92 | ||
93 | int jfs_setattr(struct dentry *dentry, struct iattr *iattr) | 93 | int jfs_setattr(struct dentry *dentry, struct iattr *iattr) |
94 | { | 94 | { |
95 | struct inode *inode = dentry->d_inode; | 95 | struct inode *inode = dentry->d_inode; |
96 | int rc; | 96 | int rc; |
97 | 97 | ||
98 | rc = inode_change_ok(inode, iattr); | 98 | rc = inode_change_ok(inode, iattr); |
99 | if (rc) | 99 | if (rc) |
100 | return rc; | 100 | return rc; |
101 | 101 | ||
102 | if (is_quota_modification(inode, iattr)) | 102 | if (is_quota_modification(inode, iattr)) |
103 | dquot_initialize(inode); | 103 | dquot_initialize(inode); |
104 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || | 104 | if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || |
105 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { | 105 | (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { |
106 | rc = dquot_transfer(inode, iattr); | 106 | rc = dquot_transfer(inode, iattr); |
107 | if (rc) | 107 | if (rc) |
108 | return rc; | 108 | return rc; |
109 | } | 109 | } |
110 | 110 | ||
111 | if ((iattr->ia_valid & ATTR_SIZE) && | 111 | if ((iattr->ia_valid & ATTR_SIZE) && |
112 | iattr->ia_size != i_size_read(inode)) { | 112 | iattr->ia_size != i_size_read(inode)) { |
113 | inode_dio_wait(inode); | ||
114 | |||
113 | rc = vmtruncate(inode, iattr->ia_size); | 115 | rc = vmtruncate(inode, iattr->ia_size); |
114 | if (rc) | 116 | if (rc) |
115 | return rc; | 117 | return rc; |
116 | } | 118 | } |
117 | 119 | ||
118 | setattr_copy(inode, iattr); | 120 | setattr_copy(inode, iattr); |
119 | mark_inode_dirty(inode); | 121 | mark_inode_dirty(inode); |
120 | 122 | ||
121 | if (iattr->ia_valid & ATTR_MODE) | 123 | if (iattr->ia_valid & ATTR_MODE) |
122 | rc = jfs_acl_chmod(inode); | 124 | rc = jfs_acl_chmod(inode); |
123 | return rc; | 125 | return rc; |
124 | } | 126 | } |
125 | 127 | ||
126 | const struct inode_operations jfs_file_inode_operations = { | 128 | const struct inode_operations jfs_file_inode_operations = { |
127 | .truncate = jfs_truncate, | 129 | .truncate = jfs_truncate, |
128 | .setxattr = jfs_setxattr, | 130 | .setxattr = jfs_setxattr, |
129 | .getxattr = jfs_getxattr, | 131 | .getxattr = jfs_getxattr, |
130 | .listxattr = jfs_listxattr, | 132 | .listxattr = jfs_listxattr, |
131 | .removexattr = jfs_removexattr, | 133 | .removexattr = jfs_removexattr, |
132 | .setattr = jfs_setattr, | 134 | .setattr = jfs_setattr, |
133 | #ifdef CONFIG_JFS_POSIX_ACL | 135 | #ifdef CONFIG_JFS_POSIX_ACL |
134 | .check_acl = jfs_check_acl, | 136 | .check_acl = jfs_check_acl, |
135 | #endif | 137 | #endif |
136 | }; | 138 | }; |
137 | 139 | ||
138 | const struct file_operations jfs_file_operations = { | 140 | const struct file_operations jfs_file_operations = { |
139 | .open = jfs_open, | 141 | .open = jfs_open, |
140 | .llseek = generic_file_llseek, | 142 | .llseek = generic_file_llseek, |
141 | .write = do_sync_write, | 143 | .write = do_sync_write, |
142 | .read = do_sync_read, | 144 | .read = do_sync_read, |
143 | .aio_read = generic_file_aio_read, | 145 | .aio_read = generic_file_aio_read, |
144 | .aio_write = generic_file_aio_write, | 146 | .aio_write = generic_file_aio_write, |
145 | .mmap = generic_file_mmap, | 147 | .mmap = generic_file_mmap, |
146 | .splice_read = generic_file_splice_read, | 148 | .splice_read = generic_file_splice_read, |
147 | .splice_write = generic_file_splice_write, | 149 | .splice_write = generic_file_splice_write, |
148 | .fsync = jfs_fsync, | 150 | .fsync = jfs_fsync, |
149 | .release = jfs_release, | 151 | .release = jfs_release, |
150 | .unlocked_ioctl = jfs_ioctl, | 152 | .unlocked_ioctl = jfs_ioctl, |
151 | #ifdef CONFIG_COMPAT | 153 | #ifdef CONFIG_COMPAT |
152 | .compat_ioctl = jfs_compat_ioctl, | 154 | .compat_ioctl = jfs_compat_ioctl, |
153 | #endif | 155 | #endif |
154 | }; | 156 | }; |
155 | 157 |
fs/nilfs2/inode.c
1 | /* | 1 | /* |
2 | * inode.c - NILFS inode operations. | 2 | * inode.c - NILFS inode operations. |
3 | * | 3 | * |
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | 4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation; either version 2 of the License, or | 8 | * the Free Software Foundation; either version 2 of the License, or |
9 | * (at your option) any later version. | 9 | * (at your option) any later version. |
10 | * | 10 | * |
11 | * This program is distributed in the hope that it will be useful, | 11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. | 14 | * GNU General Public License for more details. |
15 | * | 15 | * |
16 | * You should have received a copy of the GNU General Public License | 16 | * You should have received a copy of the GNU General Public License |
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
19 | * | 19 | * |
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | 20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> |
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/buffer_head.h> | 24 | #include <linux/buffer_head.h> |
25 | #include <linux/gfp.h> | 25 | #include <linux/gfp.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/uio.h> | 28 | #include <linux/uio.h> |
29 | #include "nilfs.h" | 29 | #include "nilfs.h" |
30 | #include "btnode.h" | 30 | #include "btnode.h" |
31 | #include "segment.h" | 31 | #include "segment.h" |
32 | #include "page.h" | 32 | #include "page.h" |
33 | #include "mdt.h" | 33 | #include "mdt.h" |
34 | #include "cpfile.h" | 34 | #include "cpfile.h" |
35 | #include "ifile.h" | 35 | #include "ifile.h" |
36 | 36 | ||
37 | struct nilfs_iget_args { | 37 | struct nilfs_iget_args { |
38 | u64 ino; | 38 | u64 ino; |
39 | __u64 cno; | 39 | __u64 cno; |
40 | struct nilfs_root *root; | 40 | struct nilfs_root *root; |
41 | int for_gc; | 41 | int for_gc; |
42 | }; | 42 | }; |
43 | 43 | ||
44 | void nilfs_inode_add_blocks(struct inode *inode, int n) | 44 | void nilfs_inode_add_blocks(struct inode *inode, int n) |
45 | { | 45 | { |
46 | struct nilfs_root *root = NILFS_I(inode)->i_root; | 46 | struct nilfs_root *root = NILFS_I(inode)->i_root; |
47 | 47 | ||
48 | inode_add_bytes(inode, (1 << inode->i_blkbits) * n); | 48 | inode_add_bytes(inode, (1 << inode->i_blkbits) * n); |
49 | if (root) | 49 | if (root) |
50 | atomic_add(n, &root->blocks_count); | 50 | atomic_add(n, &root->blocks_count); |
51 | } | 51 | } |
52 | 52 | ||
53 | void nilfs_inode_sub_blocks(struct inode *inode, int n) | 53 | void nilfs_inode_sub_blocks(struct inode *inode, int n) |
54 | { | 54 | { |
55 | struct nilfs_root *root = NILFS_I(inode)->i_root; | 55 | struct nilfs_root *root = NILFS_I(inode)->i_root; |
56 | 56 | ||
57 | inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); | 57 | inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); |
58 | if (root) | 58 | if (root) |
59 | atomic_sub(n, &root->blocks_count); | 59 | atomic_sub(n, &root->blocks_count); |
60 | } | 60 | } |
61 | 61 | ||
62 | /** | 62 | /** |
63 | * nilfs_get_block() - get a file block on the filesystem (callback function) | 63 | * nilfs_get_block() - get a file block on the filesystem (callback function) |
64 | * @inode - inode struct of the target file | 64 | * @inode - inode struct of the target file |
65 | * @blkoff - file block number | 65 | * @blkoff - file block number |
66 | * @bh_result - buffer head to be mapped on | 66 | * @bh_result - buffer head to be mapped on |
67 | * @create - indicate whether allocating the block or not when it has not | 67 | * @create - indicate whether allocating the block or not when it has not |
68 | * been allocated yet. | 68 | * been allocated yet. |
69 | * | 69 | * |
70 | * This function does not issue actual read request of the specified data | 70 | * This function does not issue actual read request of the specified data |
71 | * block. It is done by VFS. | 71 | * block. It is done by VFS. |
72 | */ | 72 | */ |
73 | int nilfs_get_block(struct inode *inode, sector_t blkoff, | 73 | int nilfs_get_block(struct inode *inode, sector_t blkoff, |
74 | struct buffer_head *bh_result, int create) | 74 | struct buffer_head *bh_result, int create) |
75 | { | 75 | { |
76 | struct nilfs_inode_info *ii = NILFS_I(inode); | 76 | struct nilfs_inode_info *ii = NILFS_I(inode); |
77 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | 77 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
78 | __u64 blknum = 0; | 78 | __u64 blknum = 0; |
79 | int err = 0, ret; | 79 | int err = 0, ret; |
80 | unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; | 80 | unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; |
81 | 81 | ||
82 | down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); | 82 | down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
83 | ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); | 83 | ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); |
84 | up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); | 84 | up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
85 | if (ret >= 0) { /* found */ | 85 | if (ret >= 0) { /* found */ |
86 | map_bh(bh_result, inode->i_sb, blknum); | 86 | map_bh(bh_result, inode->i_sb, blknum); |
87 | if (ret > 0) | 87 | if (ret > 0) |
88 | bh_result->b_size = (ret << inode->i_blkbits); | 88 | bh_result->b_size = (ret << inode->i_blkbits); |
89 | goto out; | 89 | goto out; |
90 | } | 90 | } |
91 | /* data block was not found */ | 91 | /* data block was not found */ |
92 | if (ret == -ENOENT && create) { | 92 | if (ret == -ENOENT && create) { |
93 | struct nilfs_transaction_info ti; | 93 | struct nilfs_transaction_info ti; |
94 | 94 | ||
95 | bh_result->b_blocknr = 0; | 95 | bh_result->b_blocknr = 0; |
96 | err = nilfs_transaction_begin(inode->i_sb, &ti, 1); | 96 | err = nilfs_transaction_begin(inode->i_sb, &ti, 1); |
97 | if (unlikely(err)) | 97 | if (unlikely(err)) |
98 | goto out; | 98 | goto out; |
99 | err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, | 99 | err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, |
100 | (unsigned long)bh_result); | 100 | (unsigned long)bh_result); |
101 | if (unlikely(err != 0)) { | 101 | if (unlikely(err != 0)) { |
102 | if (err == -EEXIST) { | 102 | if (err == -EEXIST) { |
103 | /* | 103 | /* |
104 | * The get_block() function could be called | 104 | * The get_block() function could be called |
105 | * from multiple callers for an inode. | 105 | * from multiple callers for an inode. |
106 | * However, the page having this block must | 106 | * However, the page having this block must |
107 | * be locked in this case. | 107 | * be locked in this case. |
108 | */ | 108 | */ |
109 | printk(KERN_WARNING | 109 | printk(KERN_WARNING |
110 | "nilfs_get_block: a race condition " | 110 | "nilfs_get_block: a race condition " |
111 | "while inserting a data block. " | 111 | "while inserting a data block. " |
112 | "(inode number=%lu, file block " | 112 | "(inode number=%lu, file block " |
113 | "offset=%llu)\n", | 113 | "offset=%llu)\n", |
114 | inode->i_ino, | 114 | inode->i_ino, |
115 | (unsigned long long)blkoff); | 115 | (unsigned long long)blkoff); |
116 | err = 0; | 116 | err = 0; |
117 | } | 117 | } |
118 | nilfs_transaction_abort(inode->i_sb); | 118 | nilfs_transaction_abort(inode->i_sb); |
119 | goto out; | 119 | goto out; |
120 | } | 120 | } |
121 | nilfs_mark_inode_dirty(inode); | 121 | nilfs_mark_inode_dirty(inode); |
122 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | 122 | nilfs_transaction_commit(inode->i_sb); /* never fails */ |
123 | /* Error handling should be detailed */ | 123 | /* Error handling should be detailed */ |
124 | set_buffer_new(bh_result); | 124 | set_buffer_new(bh_result); |
125 | set_buffer_delay(bh_result); | 125 | set_buffer_delay(bh_result); |
126 | map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed | 126 | map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed |
127 | to proper value */ | 127 | to proper value */ |
128 | } else if (ret == -ENOENT) { | 128 | } else if (ret == -ENOENT) { |
129 | /* not found is not error (e.g. hole); must return without | 129 | /* not found is not error (e.g. hole); must return without |
130 | the mapped state flag. */ | 130 | the mapped state flag. */ |
131 | ; | 131 | ; |
132 | } else { | 132 | } else { |
133 | err = ret; | 133 | err = ret; |
134 | } | 134 | } |
135 | 135 | ||
136 | out: | 136 | out: |
137 | return err; | 137 | return err; |
138 | } | 138 | } |
139 | 139 | ||
140 | /** | 140 | /** |
141 | * nilfs_readpage() - implement readpage() method of nilfs_aops {} | 141 | * nilfs_readpage() - implement readpage() method of nilfs_aops {} |
142 | * address_space_operations. | 142 | * address_space_operations. |
143 | * @file - file struct of the file to be read | 143 | * @file - file struct of the file to be read |
144 | * @page - the page to be read | 144 | * @page - the page to be read |
145 | */ | 145 | */ |
146 | static int nilfs_readpage(struct file *file, struct page *page) | 146 | static int nilfs_readpage(struct file *file, struct page *page) |
147 | { | 147 | { |
148 | return mpage_readpage(page, nilfs_get_block); | 148 | return mpage_readpage(page, nilfs_get_block); |
149 | } | 149 | } |
150 | 150 | ||
151 | /** | 151 | /** |
152 | * nilfs_readpages() - implement readpages() method of nilfs_aops {} | 152 | * nilfs_readpages() - implement readpages() method of nilfs_aops {} |
153 | * address_space_operations. | 153 | * address_space_operations. |
154 | * @file - file struct of the file to be read | 154 | * @file - file struct of the file to be read |
155 | * @mapping - address_space struct used for reading multiple pages | 155 | * @mapping - address_space struct used for reading multiple pages |
156 | * @pages - the pages to be read | 156 | * @pages - the pages to be read |
157 | * @nr_pages - number of pages to be read | 157 | * @nr_pages - number of pages to be read |
158 | */ | 158 | */ |
159 | static int nilfs_readpages(struct file *file, struct address_space *mapping, | 159 | static int nilfs_readpages(struct file *file, struct address_space *mapping, |
160 | struct list_head *pages, unsigned nr_pages) | 160 | struct list_head *pages, unsigned nr_pages) |
161 | { | 161 | { |
162 | return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); | 162 | return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); |
163 | } | 163 | } |
164 | 164 | ||
165 | static int nilfs_writepages(struct address_space *mapping, | 165 | static int nilfs_writepages(struct address_space *mapping, |
166 | struct writeback_control *wbc) | 166 | struct writeback_control *wbc) |
167 | { | 167 | { |
168 | struct inode *inode = mapping->host; | 168 | struct inode *inode = mapping->host; |
169 | int err = 0; | 169 | int err = 0; |
170 | 170 | ||
171 | if (wbc->sync_mode == WB_SYNC_ALL) | 171 | if (wbc->sync_mode == WB_SYNC_ALL) |
172 | err = nilfs_construct_dsync_segment(inode->i_sb, inode, | 172 | err = nilfs_construct_dsync_segment(inode->i_sb, inode, |
173 | wbc->range_start, | 173 | wbc->range_start, |
174 | wbc->range_end); | 174 | wbc->range_end); |
175 | return err; | 175 | return err; |
176 | } | 176 | } |
177 | 177 | ||
178 | static int nilfs_writepage(struct page *page, struct writeback_control *wbc) | 178 | static int nilfs_writepage(struct page *page, struct writeback_control *wbc) |
179 | { | 179 | { |
180 | struct inode *inode = page->mapping->host; | 180 | struct inode *inode = page->mapping->host; |
181 | int err; | 181 | int err; |
182 | 182 | ||
183 | redirty_page_for_writepage(wbc, page); | 183 | redirty_page_for_writepage(wbc, page); |
184 | unlock_page(page); | 184 | unlock_page(page); |
185 | 185 | ||
186 | if (wbc->sync_mode == WB_SYNC_ALL) { | 186 | if (wbc->sync_mode == WB_SYNC_ALL) { |
187 | err = nilfs_construct_segment(inode->i_sb); | 187 | err = nilfs_construct_segment(inode->i_sb); |
188 | if (unlikely(err)) | 188 | if (unlikely(err)) |
189 | return err; | 189 | return err; |
190 | } else if (wbc->for_reclaim) | 190 | } else if (wbc->for_reclaim) |
191 | nilfs_flush_segment(inode->i_sb, inode->i_ino); | 191 | nilfs_flush_segment(inode->i_sb, inode->i_ino); |
192 | 192 | ||
193 | return 0; | 193 | return 0; |
194 | } | 194 | } |
195 | 195 | ||
196 | static int nilfs_set_page_dirty(struct page *page) | 196 | static int nilfs_set_page_dirty(struct page *page) |
197 | { | 197 | { |
198 | int ret = __set_page_dirty_buffers(page); | 198 | int ret = __set_page_dirty_buffers(page); |
199 | 199 | ||
200 | if (ret) { | 200 | if (ret) { |
201 | struct inode *inode = page->mapping->host; | 201 | struct inode *inode = page->mapping->host; |
202 | unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); | 202 | unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); |
203 | 203 | ||
204 | nilfs_set_file_dirty(inode, nr_dirty); | 204 | nilfs_set_file_dirty(inode, nr_dirty); |
205 | } | 205 | } |
206 | return ret; | 206 | return ret; |
207 | } | 207 | } |
208 | 208 | ||
209 | static int nilfs_write_begin(struct file *file, struct address_space *mapping, | 209 | static int nilfs_write_begin(struct file *file, struct address_space *mapping, |
210 | loff_t pos, unsigned len, unsigned flags, | 210 | loff_t pos, unsigned len, unsigned flags, |
211 | struct page **pagep, void **fsdata) | 211 | struct page **pagep, void **fsdata) |
212 | 212 | ||
213 | { | 213 | { |
214 | struct inode *inode = mapping->host; | 214 | struct inode *inode = mapping->host; |
215 | int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); | 215 | int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); |
216 | 216 | ||
217 | if (unlikely(err)) | 217 | if (unlikely(err)) |
218 | return err; | 218 | return err; |
219 | 219 | ||
220 | err = block_write_begin(mapping, pos, len, flags, pagep, | 220 | err = block_write_begin(mapping, pos, len, flags, pagep, |
221 | nilfs_get_block); | 221 | nilfs_get_block); |
222 | if (unlikely(err)) { | 222 | if (unlikely(err)) { |
223 | loff_t isize = mapping->host->i_size; | 223 | loff_t isize = mapping->host->i_size; |
224 | if (pos + len > isize) | 224 | if (pos + len > isize) |
225 | vmtruncate(mapping->host, isize); | 225 | vmtruncate(mapping->host, isize); |
226 | 226 | ||
227 | nilfs_transaction_abort(inode->i_sb); | 227 | nilfs_transaction_abort(inode->i_sb); |
228 | } | 228 | } |
229 | return err; | 229 | return err; |
230 | } | 230 | } |
231 | 231 | ||
232 | static int nilfs_write_end(struct file *file, struct address_space *mapping, | 232 | static int nilfs_write_end(struct file *file, struct address_space *mapping, |
233 | loff_t pos, unsigned len, unsigned copied, | 233 | loff_t pos, unsigned len, unsigned copied, |
234 | struct page *page, void *fsdata) | 234 | struct page *page, void *fsdata) |
235 | { | 235 | { |
236 | struct inode *inode = mapping->host; | 236 | struct inode *inode = mapping->host; |
237 | unsigned start = pos & (PAGE_CACHE_SIZE - 1); | 237 | unsigned start = pos & (PAGE_CACHE_SIZE - 1); |
238 | unsigned nr_dirty; | 238 | unsigned nr_dirty; |
239 | int err; | 239 | int err; |
240 | 240 | ||
241 | nr_dirty = nilfs_page_count_clean_buffers(page, start, | 241 | nr_dirty = nilfs_page_count_clean_buffers(page, start, |
242 | start + copied); | 242 | start + copied); |
243 | copied = generic_write_end(file, mapping, pos, len, copied, page, | 243 | copied = generic_write_end(file, mapping, pos, len, copied, page, |
244 | fsdata); | 244 | fsdata); |
245 | nilfs_set_file_dirty(inode, nr_dirty); | 245 | nilfs_set_file_dirty(inode, nr_dirty); |
246 | err = nilfs_transaction_commit(inode->i_sb); | 246 | err = nilfs_transaction_commit(inode->i_sb); |
247 | return err ? : copied; | 247 | return err ? : copied; |
248 | } | 248 | } |
249 | 249 | ||
250 | static ssize_t | 250 | static ssize_t |
251 | nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 251 | nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, |
252 | loff_t offset, unsigned long nr_segs) | 252 | loff_t offset, unsigned long nr_segs) |
253 | { | 253 | { |
254 | struct file *file = iocb->ki_filp; | 254 | struct file *file = iocb->ki_filp; |
255 | struct inode *inode = file->f_mapping->host; | 255 | struct inode *inode = file->f_mapping->host; |
256 | ssize_t size; | 256 | ssize_t size; |
257 | 257 | ||
258 | if (rw == WRITE) | 258 | if (rw == WRITE) |
259 | return 0; | 259 | return 0; |
260 | 260 | ||
261 | /* Needs synchronization with the cleaner */ | 261 | /* Needs synchronization with the cleaner */ |
262 | size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 262 | size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
263 | offset, nr_segs, nilfs_get_block, NULL); | 263 | offset, nr_segs, nilfs_get_block, NULL); |
264 | 264 | ||
265 | /* | 265 | /* |
266 | * In case of error extending write may have instantiated a few | 266 | * In case of error extending write may have instantiated a few |
267 | * blocks outside i_size. Trim these off again. | 267 | * blocks outside i_size. Trim these off again. |
268 | */ | 268 | */ |
269 | if (unlikely((rw & WRITE) && size < 0)) { | 269 | if (unlikely((rw & WRITE) && size < 0)) { |
270 | loff_t isize = i_size_read(inode); | 270 | loff_t isize = i_size_read(inode); |
271 | loff_t end = offset + iov_length(iov, nr_segs); | 271 | loff_t end = offset + iov_length(iov, nr_segs); |
272 | 272 | ||
273 | if (end > isize) | 273 | if (end > isize) |
274 | vmtruncate(inode, isize); | 274 | vmtruncate(inode, isize); |
275 | } | 275 | } |
276 | 276 | ||
277 | return size; | 277 | return size; |
278 | } | 278 | } |
279 | 279 | ||
280 | const struct address_space_operations nilfs_aops = { | 280 | const struct address_space_operations nilfs_aops = { |
281 | .writepage = nilfs_writepage, | 281 | .writepage = nilfs_writepage, |
282 | .readpage = nilfs_readpage, | 282 | .readpage = nilfs_readpage, |
283 | .writepages = nilfs_writepages, | 283 | .writepages = nilfs_writepages, |
284 | .set_page_dirty = nilfs_set_page_dirty, | 284 | .set_page_dirty = nilfs_set_page_dirty, |
285 | .readpages = nilfs_readpages, | 285 | .readpages = nilfs_readpages, |
286 | .write_begin = nilfs_write_begin, | 286 | .write_begin = nilfs_write_begin, |
287 | .write_end = nilfs_write_end, | 287 | .write_end = nilfs_write_end, |
288 | /* .releasepage = nilfs_releasepage, */ | 288 | /* .releasepage = nilfs_releasepage, */ |
289 | .invalidatepage = block_invalidatepage, | 289 | .invalidatepage = block_invalidatepage, |
290 | .direct_IO = nilfs_direct_IO, | 290 | .direct_IO = nilfs_direct_IO, |
291 | .is_partially_uptodate = block_is_partially_uptodate, | 291 | .is_partially_uptodate = block_is_partially_uptodate, |
292 | }; | 292 | }; |
293 | 293 | ||
294 | struct inode *nilfs_new_inode(struct inode *dir, int mode) | 294 | struct inode *nilfs_new_inode(struct inode *dir, int mode) |
295 | { | 295 | { |
296 | struct super_block *sb = dir->i_sb; | 296 | struct super_block *sb = dir->i_sb; |
297 | struct the_nilfs *nilfs = sb->s_fs_info; | 297 | struct the_nilfs *nilfs = sb->s_fs_info; |
298 | struct inode *inode; | 298 | struct inode *inode; |
299 | struct nilfs_inode_info *ii; | 299 | struct nilfs_inode_info *ii; |
300 | struct nilfs_root *root; | 300 | struct nilfs_root *root; |
301 | int err = -ENOMEM; | 301 | int err = -ENOMEM; |
302 | ino_t ino; | 302 | ino_t ino; |
303 | 303 | ||
304 | inode = new_inode(sb); | 304 | inode = new_inode(sb); |
305 | if (unlikely(!inode)) | 305 | if (unlikely(!inode)) |
306 | goto failed; | 306 | goto failed; |
307 | 307 | ||
308 | mapping_set_gfp_mask(inode->i_mapping, | 308 | mapping_set_gfp_mask(inode->i_mapping, |
309 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | 309 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); |
310 | 310 | ||
311 | root = NILFS_I(dir)->i_root; | 311 | root = NILFS_I(dir)->i_root; |
312 | ii = NILFS_I(inode); | 312 | ii = NILFS_I(inode); |
313 | ii->i_state = 1 << NILFS_I_NEW; | 313 | ii->i_state = 1 << NILFS_I_NEW; |
314 | ii->i_root = root; | 314 | ii->i_root = root; |
315 | 315 | ||
316 | err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); | 316 | err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); |
317 | if (unlikely(err)) | 317 | if (unlikely(err)) |
318 | goto failed_ifile_create_inode; | 318 | goto failed_ifile_create_inode; |
319 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ | 319 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ |
320 | 320 | ||
321 | atomic_inc(&root->inodes_count); | 321 | atomic_inc(&root->inodes_count); |
322 | inode_init_owner(inode, dir, mode); | 322 | inode_init_owner(inode, dir, mode); |
323 | inode->i_ino = ino; | 323 | inode->i_ino = ino; |
324 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 324 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
325 | 325 | ||
326 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { | 326 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { |
327 | err = nilfs_bmap_read(ii->i_bmap, NULL); | 327 | err = nilfs_bmap_read(ii->i_bmap, NULL); |
328 | if (err < 0) | 328 | if (err < 0) |
329 | goto failed_bmap; | 329 | goto failed_bmap; |
330 | 330 | ||
331 | set_bit(NILFS_I_BMAP, &ii->i_state); | 331 | set_bit(NILFS_I_BMAP, &ii->i_state); |
332 | /* No lock is needed; iget() ensures it. */ | 332 | /* No lock is needed; iget() ensures it. */ |
333 | } | 333 | } |
334 | 334 | ||
335 | ii->i_flags = nilfs_mask_flags( | 335 | ii->i_flags = nilfs_mask_flags( |
336 | mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); | 336 | mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED); |
337 | 337 | ||
338 | /* ii->i_file_acl = 0; */ | 338 | /* ii->i_file_acl = 0; */ |
339 | /* ii->i_dir_acl = 0; */ | 339 | /* ii->i_dir_acl = 0; */ |
340 | ii->i_dir_start_lookup = 0; | 340 | ii->i_dir_start_lookup = 0; |
341 | nilfs_set_inode_flags(inode); | 341 | nilfs_set_inode_flags(inode); |
342 | spin_lock(&nilfs->ns_next_gen_lock); | 342 | spin_lock(&nilfs->ns_next_gen_lock); |
343 | inode->i_generation = nilfs->ns_next_generation++; | 343 | inode->i_generation = nilfs->ns_next_generation++; |
344 | spin_unlock(&nilfs->ns_next_gen_lock); | 344 | spin_unlock(&nilfs->ns_next_gen_lock); |
345 | insert_inode_hash(inode); | 345 | insert_inode_hash(inode); |
346 | 346 | ||
347 | err = nilfs_init_acl(inode, dir); | 347 | err = nilfs_init_acl(inode, dir); |
348 | if (unlikely(err)) | 348 | if (unlikely(err)) |
349 | goto failed_acl; /* never occur. When supporting | 349 | goto failed_acl; /* never occur. When supporting |
350 | nilfs_init_acl(), proper cancellation of | 350 | nilfs_init_acl(), proper cancellation of |
351 | above jobs should be considered */ | 351 | above jobs should be considered */ |
352 | 352 | ||
353 | return inode; | 353 | return inode; |
354 | 354 | ||
355 | failed_acl: | 355 | failed_acl: |
356 | failed_bmap: | 356 | failed_bmap: |
357 | inode->i_nlink = 0; | 357 | inode->i_nlink = 0; |
358 | iput(inode); /* raw_inode will be deleted through | 358 | iput(inode); /* raw_inode will be deleted through |
359 | generic_delete_inode() */ | 359 | generic_delete_inode() */ |
360 | goto failed; | 360 | goto failed; |
361 | 361 | ||
362 | failed_ifile_create_inode: | 362 | failed_ifile_create_inode: |
363 | make_bad_inode(inode); | 363 | make_bad_inode(inode); |
364 | iput(inode); /* if i_nlink == 1, generic_forget_inode() will be | 364 | iput(inode); /* if i_nlink == 1, generic_forget_inode() will be |
365 | called */ | 365 | called */ |
366 | failed: | 366 | failed: |
367 | return ERR_PTR(err); | 367 | return ERR_PTR(err); |
368 | } | 368 | } |
369 | 369 | ||
370 | void nilfs_set_inode_flags(struct inode *inode) | 370 | void nilfs_set_inode_flags(struct inode *inode) |
371 | { | 371 | { |
372 | unsigned int flags = NILFS_I(inode)->i_flags; | 372 | unsigned int flags = NILFS_I(inode)->i_flags; |
373 | 373 | ||
374 | inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | | 374 | inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | |
375 | S_DIRSYNC); | 375 | S_DIRSYNC); |
376 | if (flags & FS_SYNC_FL) | 376 | if (flags & FS_SYNC_FL) |
377 | inode->i_flags |= S_SYNC; | 377 | inode->i_flags |= S_SYNC; |
378 | if (flags & FS_APPEND_FL) | 378 | if (flags & FS_APPEND_FL) |
379 | inode->i_flags |= S_APPEND; | 379 | inode->i_flags |= S_APPEND; |
380 | if (flags & FS_IMMUTABLE_FL) | 380 | if (flags & FS_IMMUTABLE_FL) |
381 | inode->i_flags |= S_IMMUTABLE; | 381 | inode->i_flags |= S_IMMUTABLE; |
382 | if (flags & FS_NOATIME_FL) | 382 | if (flags & FS_NOATIME_FL) |
383 | inode->i_flags |= S_NOATIME; | 383 | inode->i_flags |= S_NOATIME; |
384 | if (flags & FS_DIRSYNC_FL) | 384 | if (flags & FS_DIRSYNC_FL) |
385 | inode->i_flags |= S_DIRSYNC; | 385 | inode->i_flags |= S_DIRSYNC; |
386 | mapping_set_gfp_mask(inode->i_mapping, | 386 | mapping_set_gfp_mask(inode->i_mapping, |
387 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | 387 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); |
388 | } | 388 | } |
389 | 389 | ||
390 | int nilfs_read_inode_common(struct inode *inode, | 390 | int nilfs_read_inode_common(struct inode *inode, |
391 | struct nilfs_inode *raw_inode) | 391 | struct nilfs_inode *raw_inode) |
392 | { | 392 | { |
393 | struct nilfs_inode_info *ii = NILFS_I(inode); | 393 | struct nilfs_inode_info *ii = NILFS_I(inode); |
394 | int err; | 394 | int err; |
395 | 395 | ||
396 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 396 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
397 | inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); | 397 | inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); |
398 | inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); | 398 | inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); |
399 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 399 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
400 | inode->i_size = le64_to_cpu(raw_inode->i_size); | 400 | inode->i_size = le64_to_cpu(raw_inode->i_size); |
401 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | 401 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); |
402 | inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); | 402 | inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); |
403 | inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | 403 | inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); |
404 | inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | 404 | inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); |
405 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); | 405 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); |
406 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | 406 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); |
407 | if (inode->i_nlink == 0 && inode->i_mode == 0) | 407 | if (inode->i_nlink == 0 && inode->i_mode == 0) |
408 | return -EINVAL; /* this inode is deleted */ | 408 | return -EINVAL; /* this inode is deleted */ |
409 | 409 | ||
410 | inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); | 410 | inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); |
411 | ii->i_flags = le32_to_cpu(raw_inode->i_flags); | 411 | ii->i_flags = le32_to_cpu(raw_inode->i_flags); |
412 | #if 0 | 412 | #if 0 |
413 | ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); | 413 | ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); |
414 | ii->i_dir_acl = S_ISREG(inode->i_mode) ? | 414 | ii->i_dir_acl = S_ISREG(inode->i_mode) ? |
415 | 0 : le32_to_cpu(raw_inode->i_dir_acl); | 415 | 0 : le32_to_cpu(raw_inode->i_dir_acl); |
416 | #endif | 416 | #endif |
417 | ii->i_dir_start_lookup = 0; | 417 | ii->i_dir_start_lookup = 0; |
418 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 418 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
419 | 419 | ||
420 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 420 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
421 | S_ISLNK(inode->i_mode)) { | 421 | S_ISLNK(inode->i_mode)) { |
422 | err = nilfs_bmap_read(ii->i_bmap, raw_inode); | 422 | err = nilfs_bmap_read(ii->i_bmap, raw_inode); |
423 | if (err < 0) | 423 | if (err < 0) |
424 | return err; | 424 | return err; |
425 | set_bit(NILFS_I_BMAP, &ii->i_state); | 425 | set_bit(NILFS_I_BMAP, &ii->i_state); |
426 | /* No lock is needed; iget() ensures it. */ | 426 | /* No lock is needed; iget() ensures it. */ |
427 | } | 427 | } |
428 | return 0; | 428 | return 0; |
429 | } | 429 | } |
430 | 430 | ||
431 | static int __nilfs_read_inode(struct super_block *sb, | 431 | static int __nilfs_read_inode(struct super_block *sb, |
432 | struct nilfs_root *root, unsigned long ino, | 432 | struct nilfs_root *root, unsigned long ino, |
433 | struct inode *inode) | 433 | struct inode *inode) |
434 | { | 434 | { |
435 | struct the_nilfs *nilfs = sb->s_fs_info; | 435 | struct the_nilfs *nilfs = sb->s_fs_info; |
436 | struct buffer_head *bh; | 436 | struct buffer_head *bh; |
437 | struct nilfs_inode *raw_inode; | 437 | struct nilfs_inode *raw_inode; |
438 | int err; | 438 | int err; |
439 | 439 | ||
440 | down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); | 440 | down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
441 | err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); | 441 | err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); |
442 | if (unlikely(err)) | 442 | if (unlikely(err)) |
443 | goto bad_inode; | 443 | goto bad_inode; |
444 | 444 | ||
445 | raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); | 445 | raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); |
446 | 446 | ||
447 | err = nilfs_read_inode_common(inode, raw_inode); | 447 | err = nilfs_read_inode_common(inode, raw_inode); |
448 | if (err) | 448 | if (err) |
449 | goto failed_unmap; | 449 | goto failed_unmap; |
450 | 450 | ||
451 | if (S_ISREG(inode->i_mode)) { | 451 | if (S_ISREG(inode->i_mode)) { |
452 | inode->i_op = &nilfs_file_inode_operations; | 452 | inode->i_op = &nilfs_file_inode_operations; |
453 | inode->i_fop = &nilfs_file_operations; | 453 | inode->i_fop = &nilfs_file_operations; |
454 | inode->i_mapping->a_ops = &nilfs_aops; | 454 | inode->i_mapping->a_ops = &nilfs_aops; |
455 | } else if (S_ISDIR(inode->i_mode)) { | 455 | } else if (S_ISDIR(inode->i_mode)) { |
456 | inode->i_op = &nilfs_dir_inode_operations; | 456 | inode->i_op = &nilfs_dir_inode_operations; |
457 | inode->i_fop = &nilfs_dir_operations; | 457 | inode->i_fop = &nilfs_dir_operations; |
458 | inode->i_mapping->a_ops = &nilfs_aops; | 458 | inode->i_mapping->a_ops = &nilfs_aops; |
459 | } else if (S_ISLNK(inode->i_mode)) { | 459 | } else if (S_ISLNK(inode->i_mode)) { |
460 | inode->i_op = &nilfs_symlink_inode_operations; | 460 | inode->i_op = &nilfs_symlink_inode_operations; |
461 | inode->i_mapping->a_ops = &nilfs_aops; | 461 | inode->i_mapping->a_ops = &nilfs_aops; |
462 | } else { | 462 | } else { |
463 | inode->i_op = &nilfs_special_inode_operations; | 463 | inode->i_op = &nilfs_special_inode_operations; |
464 | init_special_inode( | 464 | init_special_inode( |
465 | inode, inode->i_mode, | 465 | inode, inode->i_mode, |
466 | huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); | 466 | huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); |
467 | } | 467 | } |
468 | nilfs_ifile_unmap_inode(root->ifile, ino, bh); | 468 | nilfs_ifile_unmap_inode(root->ifile, ino, bh); |
469 | brelse(bh); | 469 | brelse(bh); |
470 | up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); | 470 | up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
471 | nilfs_set_inode_flags(inode); | 471 | nilfs_set_inode_flags(inode); |
472 | return 0; | 472 | return 0; |
473 | 473 | ||
474 | failed_unmap: | 474 | failed_unmap: |
475 | nilfs_ifile_unmap_inode(root->ifile, ino, bh); | 475 | nilfs_ifile_unmap_inode(root->ifile, ino, bh); |
476 | brelse(bh); | 476 | brelse(bh); |
477 | 477 | ||
478 | bad_inode: | 478 | bad_inode: |
479 | up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); | 479 | up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
480 | return err; | 480 | return err; |
481 | } | 481 | } |
482 | 482 | ||
483 | static int nilfs_iget_test(struct inode *inode, void *opaque) | 483 | static int nilfs_iget_test(struct inode *inode, void *opaque) |
484 | { | 484 | { |
485 | struct nilfs_iget_args *args = opaque; | 485 | struct nilfs_iget_args *args = opaque; |
486 | struct nilfs_inode_info *ii; | 486 | struct nilfs_inode_info *ii; |
487 | 487 | ||
488 | if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) | 488 | if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) |
489 | return 0; | 489 | return 0; |
490 | 490 | ||
491 | ii = NILFS_I(inode); | 491 | ii = NILFS_I(inode); |
492 | if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) | 492 | if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) |
493 | return !args->for_gc; | 493 | return !args->for_gc; |
494 | 494 | ||
495 | return args->for_gc && args->cno == ii->i_cno; | 495 | return args->for_gc && args->cno == ii->i_cno; |
496 | } | 496 | } |
497 | 497 | ||
498 | static int nilfs_iget_set(struct inode *inode, void *opaque) | 498 | static int nilfs_iget_set(struct inode *inode, void *opaque) |
499 | { | 499 | { |
500 | struct nilfs_iget_args *args = opaque; | 500 | struct nilfs_iget_args *args = opaque; |
501 | 501 | ||
502 | inode->i_ino = args->ino; | 502 | inode->i_ino = args->ino; |
503 | if (args->for_gc) { | 503 | if (args->for_gc) { |
504 | NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE; | 504 | NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE; |
505 | NILFS_I(inode)->i_cno = args->cno; | 505 | NILFS_I(inode)->i_cno = args->cno; |
506 | NILFS_I(inode)->i_root = NULL; | 506 | NILFS_I(inode)->i_root = NULL; |
507 | } else { | 507 | } else { |
508 | if (args->root && args->ino == NILFS_ROOT_INO) | 508 | if (args->root && args->ino == NILFS_ROOT_INO) |
509 | nilfs_get_root(args->root); | 509 | nilfs_get_root(args->root); |
510 | NILFS_I(inode)->i_root = args->root; | 510 | NILFS_I(inode)->i_root = args->root; |
511 | } | 511 | } |
512 | return 0; | 512 | return 0; |
513 | } | 513 | } |
514 | 514 | ||
515 | struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, | 515 | struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, |
516 | unsigned long ino) | 516 | unsigned long ino) |
517 | { | 517 | { |
518 | struct nilfs_iget_args args = { | 518 | struct nilfs_iget_args args = { |
519 | .ino = ino, .root = root, .cno = 0, .for_gc = 0 | 519 | .ino = ino, .root = root, .cno = 0, .for_gc = 0 |
520 | }; | 520 | }; |
521 | 521 | ||
522 | return ilookup5(sb, ino, nilfs_iget_test, &args); | 522 | return ilookup5(sb, ino, nilfs_iget_test, &args); |
523 | } | 523 | } |
524 | 524 | ||
525 | struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, | 525 | struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, |
526 | unsigned long ino) | 526 | unsigned long ino) |
527 | { | 527 | { |
528 | struct nilfs_iget_args args = { | 528 | struct nilfs_iget_args args = { |
529 | .ino = ino, .root = root, .cno = 0, .for_gc = 0 | 529 | .ino = ino, .root = root, .cno = 0, .for_gc = 0 |
530 | }; | 530 | }; |
531 | 531 | ||
532 | return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); | 532 | return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); |
533 | } | 533 | } |
534 | 534 | ||
535 | struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, | 535 | struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, |
536 | unsigned long ino) | 536 | unsigned long ino) |
537 | { | 537 | { |
538 | struct inode *inode; | 538 | struct inode *inode; |
539 | int err; | 539 | int err; |
540 | 540 | ||
541 | inode = nilfs_iget_locked(sb, root, ino); | 541 | inode = nilfs_iget_locked(sb, root, ino); |
542 | if (unlikely(!inode)) | 542 | if (unlikely(!inode)) |
543 | return ERR_PTR(-ENOMEM); | 543 | return ERR_PTR(-ENOMEM); |
544 | if (!(inode->i_state & I_NEW)) | 544 | if (!(inode->i_state & I_NEW)) |
545 | return inode; | 545 | return inode; |
546 | 546 | ||
547 | err = __nilfs_read_inode(sb, root, ino, inode); | 547 | err = __nilfs_read_inode(sb, root, ino, inode); |
548 | if (unlikely(err)) { | 548 | if (unlikely(err)) { |
549 | iget_failed(inode); | 549 | iget_failed(inode); |
550 | return ERR_PTR(err); | 550 | return ERR_PTR(err); |
551 | } | 551 | } |
552 | unlock_new_inode(inode); | 552 | unlock_new_inode(inode); |
553 | return inode; | 553 | return inode; |
554 | } | 554 | } |
555 | 555 | ||
556 | struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, | 556 | struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, |
557 | __u64 cno) | 557 | __u64 cno) |
558 | { | 558 | { |
559 | struct nilfs_iget_args args = { | 559 | struct nilfs_iget_args args = { |
560 | .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 | 560 | .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 |
561 | }; | 561 | }; |
562 | struct inode *inode; | 562 | struct inode *inode; |
563 | int err; | 563 | int err; |
564 | 564 | ||
565 | inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); | 565 | inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); |
566 | if (unlikely(!inode)) | 566 | if (unlikely(!inode)) |
567 | return ERR_PTR(-ENOMEM); | 567 | return ERR_PTR(-ENOMEM); |
568 | if (!(inode->i_state & I_NEW)) | 568 | if (!(inode->i_state & I_NEW)) |
569 | return inode; | 569 | return inode; |
570 | 570 | ||
571 | err = nilfs_init_gcinode(inode); | 571 | err = nilfs_init_gcinode(inode); |
572 | if (unlikely(err)) { | 572 | if (unlikely(err)) { |
573 | iget_failed(inode); | 573 | iget_failed(inode); |
574 | return ERR_PTR(err); | 574 | return ERR_PTR(err); |
575 | } | 575 | } |
576 | unlock_new_inode(inode); | 576 | unlock_new_inode(inode); |
577 | return inode; | 577 | return inode; |
578 | } | 578 | } |
579 | 579 | ||
580 | void nilfs_write_inode_common(struct inode *inode, | 580 | void nilfs_write_inode_common(struct inode *inode, |
581 | struct nilfs_inode *raw_inode, int has_bmap) | 581 | struct nilfs_inode *raw_inode, int has_bmap) |
582 | { | 582 | { |
583 | struct nilfs_inode_info *ii = NILFS_I(inode); | 583 | struct nilfs_inode_info *ii = NILFS_I(inode); |
584 | 584 | ||
585 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 585 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
586 | raw_inode->i_uid = cpu_to_le32(inode->i_uid); | 586 | raw_inode->i_uid = cpu_to_le32(inode->i_uid); |
587 | raw_inode->i_gid = cpu_to_le32(inode->i_gid); | 587 | raw_inode->i_gid = cpu_to_le32(inode->i_gid); |
588 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | 588 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); |
589 | raw_inode->i_size = cpu_to_le64(inode->i_size); | 589 | raw_inode->i_size = cpu_to_le64(inode->i_size); |
590 | raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | 590 | raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); |
591 | raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); | 591 | raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); |
592 | raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 592 | raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
593 | raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 593 | raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
594 | raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); | 594 | raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); |
595 | 595 | ||
596 | raw_inode->i_flags = cpu_to_le32(ii->i_flags); | 596 | raw_inode->i_flags = cpu_to_le32(ii->i_flags); |
597 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | 597 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
598 | 598 | ||
599 | if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { | 599 | if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { |
600 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | 600 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
601 | 601 | ||
602 | /* zero-fill unused portion in the case of super root block */ | 602 | /* zero-fill unused portion in the case of super root block */ |
603 | raw_inode->i_xattr = 0; | 603 | raw_inode->i_xattr = 0; |
604 | raw_inode->i_pad = 0; | 604 | raw_inode->i_pad = 0; |
605 | memset((void *)raw_inode + sizeof(*raw_inode), 0, | 605 | memset((void *)raw_inode + sizeof(*raw_inode), 0, |
606 | nilfs->ns_inode_size - sizeof(*raw_inode)); | 606 | nilfs->ns_inode_size - sizeof(*raw_inode)); |
607 | } | 607 | } |
608 | 608 | ||
609 | if (has_bmap) | 609 | if (has_bmap) |
610 | nilfs_bmap_write(ii->i_bmap, raw_inode); | 610 | nilfs_bmap_write(ii->i_bmap, raw_inode); |
611 | else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | 611 | else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) |
612 | raw_inode->i_device_code = | 612 | raw_inode->i_device_code = |
613 | cpu_to_le64(huge_encode_dev(inode->i_rdev)); | 613 | cpu_to_le64(huge_encode_dev(inode->i_rdev)); |
614 | /* When extending inode, nilfs->ns_inode_size should be checked | 614 | /* When extending inode, nilfs->ns_inode_size should be checked |
615 | for substitutions of appended fields */ | 615 | for substitutions of appended fields */ |
616 | } | 616 | } |
617 | 617 | ||
618 | void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) | 618 | void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) |
619 | { | 619 | { |
620 | ino_t ino = inode->i_ino; | 620 | ino_t ino = inode->i_ino; |
621 | struct nilfs_inode_info *ii = NILFS_I(inode); | 621 | struct nilfs_inode_info *ii = NILFS_I(inode); |
622 | struct inode *ifile = ii->i_root->ifile; | 622 | struct inode *ifile = ii->i_root->ifile; |
623 | struct nilfs_inode *raw_inode; | 623 | struct nilfs_inode *raw_inode; |
624 | 624 | ||
625 | raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); | 625 | raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); |
626 | 626 | ||
627 | if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) | 627 | if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) |
628 | memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); | 628 | memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); |
629 | set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); | 629 | set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); |
630 | 630 | ||
631 | nilfs_write_inode_common(inode, raw_inode, 0); | 631 | nilfs_write_inode_common(inode, raw_inode, 0); |
632 | /* XXX: call with has_bmap = 0 is a workaround to avoid | 632 | /* XXX: call with has_bmap = 0 is a workaround to avoid |
633 | deadlock of bmap. This delays update of i_bmap to just | 633 | deadlock of bmap. This delays update of i_bmap to just |
634 | before writing */ | 634 | before writing */ |
635 | nilfs_ifile_unmap_inode(ifile, ino, ibh); | 635 | nilfs_ifile_unmap_inode(ifile, ino, ibh); |
636 | } | 636 | } |
637 | 637 | ||
638 | #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ | 638 | #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ |
639 | 639 | ||
640 | static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, | 640 | static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, |
641 | unsigned long from) | 641 | unsigned long from) |
642 | { | 642 | { |
643 | unsigned long b; | 643 | unsigned long b; |
644 | int ret; | 644 | int ret; |
645 | 645 | ||
646 | if (!test_bit(NILFS_I_BMAP, &ii->i_state)) | 646 | if (!test_bit(NILFS_I_BMAP, &ii->i_state)) |
647 | return; | 647 | return; |
648 | repeat: | 648 | repeat: |
649 | ret = nilfs_bmap_last_key(ii->i_bmap, &b); | 649 | ret = nilfs_bmap_last_key(ii->i_bmap, &b); |
650 | if (ret == -ENOENT) | 650 | if (ret == -ENOENT) |
651 | return; | 651 | return; |
652 | else if (ret < 0) | 652 | else if (ret < 0) |
653 | goto failed; | 653 | goto failed; |
654 | 654 | ||
655 | if (b < from) | 655 | if (b < from) |
656 | return; | 656 | return; |
657 | 657 | ||
658 | b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from); | 658 | b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from); |
659 | ret = nilfs_bmap_truncate(ii->i_bmap, b); | 659 | ret = nilfs_bmap_truncate(ii->i_bmap, b); |
660 | nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); | 660 | nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); |
661 | if (!ret || (ret == -ENOMEM && | 661 | if (!ret || (ret == -ENOMEM && |
662 | nilfs_bmap_truncate(ii->i_bmap, b) == 0)) | 662 | nilfs_bmap_truncate(ii->i_bmap, b) == 0)) |
663 | goto repeat; | 663 | goto repeat; |
664 | 664 | ||
665 | failed: | 665 | failed: |
666 | nilfs_warning(ii->vfs_inode.i_sb, __func__, | 666 | nilfs_warning(ii->vfs_inode.i_sb, __func__, |
667 | "failed to truncate bmap (ino=%lu, err=%d)", | 667 | "failed to truncate bmap (ino=%lu, err=%d)", |
668 | ii->vfs_inode.i_ino, ret); | 668 | ii->vfs_inode.i_ino, ret); |
669 | } | 669 | } |
670 | 670 | ||
671 | void nilfs_truncate(struct inode *inode) | 671 | void nilfs_truncate(struct inode *inode) |
672 | { | 672 | { |
673 | unsigned long blkoff; | 673 | unsigned long blkoff; |
674 | unsigned int blocksize; | 674 | unsigned int blocksize; |
675 | struct nilfs_transaction_info ti; | 675 | struct nilfs_transaction_info ti; |
676 | struct super_block *sb = inode->i_sb; | 676 | struct super_block *sb = inode->i_sb; |
677 | struct nilfs_inode_info *ii = NILFS_I(inode); | 677 | struct nilfs_inode_info *ii = NILFS_I(inode); |
678 | 678 | ||
679 | if (!test_bit(NILFS_I_BMAP, &ii->i_state)) | 679 | if (!test_bit(NILFS_I_BMAP, &ii->i_state)) |
680 | return; | 680 | return; |
681 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 681 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
682 | return; | 682 | return; |
683 | 683 | ||
684 | blocksize = sb->s_blocksize; | 684 | blocksize = sb->s_blocksize; |
685 | blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; | 685 | blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; |
686 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ | 686 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ |
687 | 687 | ||
688 | block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); | 688 | block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); |
689 | 689 | ||
690 | nilfs_truncate_bmap(ii, blkoff); | 690 | nilfs_truncate_bmap(ii, blkoff); |
691 | 691 | ||
692 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 692 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
693 | if (IS_SYNC(inode)) | 693 | if (IS_SYNC(inode)) |
694 | nilfs_set_transaction_flag(NILFS_TI_SYNC); | 694 | nilfs_set_transaction_flag(NILFS_TI_SYNC); |
695 | 695 | ||
696 | nilfs_mark_inode_dirty(inode); | 696 | nilfs_mark_inode_dirty(inode); |
697 | nilfs_set_file_dirty(inode, 0); | 697 | nilfs_set_file_dirty(inode, 0); |
698 | nilfs_transaction_commit(sb); | 698 | nilfs_transaction_commit(sb); |
699 | /* May construct a logical segment and may fail in sync mode. | 699 | /* May construct a logical segment and may fail in sync mode. |
700 | But truncate has no return value. */ | 700 | But truncate has no return value. */ |
701 | } | 701 | } |
702 | 702 | ||
703 | static void nilfs_clear_inode(struct inode *inode) | 703 | static void nilfs_clear_inode(struct inode *inode) |
704 | { | 704 | { |
705 | struct nilfs_inode_info *ii = NILFS_I(inode); | 705 | struct nilfs_inode_info *ii = NILFS_I(inode); |
706 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); | 706 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); |
707 | 707 | ||
708 | /* | 708 | /* |
709 | * Free resources allocated in nilfs_read_inode(), here. | 709 | * Free resources allocated in nilfs_read_inode(), here. |
710 | */ | 710 | */ |
711 | BUG_ON(!list_empty(&ii->i_dirty)); | 711 | BUG_ON(!list_empty(&ii->i_dirty)); |
712 | brelse(ii->i_bh); | 712 | brelse(ii->i_bh); |
713 | ii->i_bh = NULL; | 713 | ii->i_bh = NULL; |
714 | 714 | ||
715 | if (mdi && mdi->mi_palloc_cache) | 715 | if (mdi && mdi->mi_palloc_cache) |
716 | nilfs_palloc_destroy_cache(inode); | 716 | nilfs_palloc_destroy_cache(inode); |
717 | 717 | ||
718 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) | 718 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) |
719 | nilfs_bmap_clear(ii->i_bmap); | 719 | nilfs_bmap_clear(ii->i_bmap); |
720 | 720 | ||
721 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); | 721 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); |
722 | 722 | ||
723 | if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) | 723 | if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) |
724 | nilfs_put_root(ii->i_root); | 724 | nilfs_put_root(ii->i_root); |
725 | } | 725 | } |
726 | 726 | ||
727 | void nilfs_evict_inode(struct inode *inode) | 727 | void nilfs_evict_inode(struct inode *inode) |
728 | { | 728 | { |
729 | struct nilfs_transaction_info ti; | 729 | struct nilfs_transaction_info ti; |
730 | struct super_block *sb = inode->i_sb; | 730 | struct super_block *sb = inode->i_sb; |
731 | struct nilfs_inode_info *ii = NILFS_I(inode); | 731 | struct nilfs_inode_info *ii = NILFS_I(inode); |
732 | int ret; | 732 | int ret; |
733 | 733 | ||
734 | if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { | 734 | if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { |
735 | if (inode->i_data.nrpages) | 735 | if (inode->i_data.nrpages) |
736 | truncate_inode_pages(&inode->i_data, 0); | 736 | truncate_inode_pages(&inode->i_data, 0); |
737 | end_writeback(inode); | 737 | end_writeback(inode); |
738 | nilfs_clear_inode(inode); | 738 | nilfs_clear_inode(inode); |
739 | return; | 739 | return; |
740 | } | 740 | } |
741 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ | 741 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ |
742 | 742 | ||
743 | if (inode->i_data.nrpages) | 743 | if (inode->i_data.nrpages) |
744 | truncate_inode_pages(&inode->i_data, 0); | 744 | truncate_inode_pages(&inode->i_data, 0); |
745 | 745 | ||
746 | /* TODO: some of the following operations may fail. */ | 746 | /* TODO: some of the following operations may fail. */ |
747 | nilfs_truncate_bmap(ii, 0); | 747 | nilfs_truncate_bmap(ii, 0); |
748 | nilfs_mark_inode_dirty(inode); | 748 | nilfs_mark_inode_dirty(inode); |
749 | end_writeback(inode); | 749 | end_writeback(inode); |
750 | 750 | ||
751 | ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); | 751 | ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); |
752 | if (!ret) | 752 | if (!ret) |
753 | atomic_dec(&ii->i_root->inodes_count); | 753 | atomic_dec(&ii->i_root->inodes_count); |
754 | 754 | ||
755 | nilfs_clear_inode(inode); | 755 | nilfs_clear_inode(inode); |
756 | 756 | ||
757 | if (IS_SYNC(inode)) | 757 | if (IS_SYNC(inode)) |
758 | nilfs_set_transaction_flag(NILFS_TI_SYNC); | 758 | nilfs_set_transaction_flag(NILFS_TI_SYNC); |
759 | nilfs_transaction_commit(sb); | 759 | nilfs_transaction_commit(sb); |
760 | /* May construct a logical segment and may fail in sync mode. | 760 | /* May construct a logical segment and may fail in sync mode. |
761 | But delete_inode has no return value. */ | 761 | But delete_inode has no return value. */ |
762 | } | 762 | } |
763 | 763 | ||
764 | int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) | 764 | int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) |
765 | { | 765 | { |
766 | struct nilfs_transaction_info ti; | 766 | struct nilfs_transaction_info ti; |
767 | struct inode *inode = dentry->d_inode; | 767 | struct inode *inode = dentry->d_inode; |
768 | struct super_block *sb = inode->i_sb; | 768 | struct super_block *sb = inode->i_sb; |
769 | int err; | 769 | int err; |
770 | 770 | ||
771 | err = inode_change_ok(inode, iattr); | 771 | err = inode_change_ok(inode, iattr); |
772 | if (err) | 772 | if (err) |
773 | return err; | 773 | return err; |
774 | 774 | ||
775 | err = nilfs_transaction_begin(sb, &ti, 0); | 775 | err = nilfs_transaction_begin(sb, &ti, 0); |
776 | if (unlikely(err)) | 776 | if (unlikely(err)) |
777 | return err; | 777 | return err; |
778 | 778 | ||
779 | if ((iattr->ia_valid & ATTR_SIZE) && | 779 | if ((iattr->ia_valid & ATTR_SIZE) && |
780 | iattr->ia_size != i_size_read(inode)) { | 780 | iattr->ia_size != i_size_read(inode)) { |
781 | inode_dio_wait(inode); | ||
782 | |||
781 | err = vmtruncate(inode, iattr->ia_size); | 783 | err = vmtruncate(inode, iattr->ia_size); |
782 | if (unlikely(err)) | 784 | if (unlikely(err)) |
783 | goto out_err; | 785 | goto out_err; |
784 | } | 786 | } |
785 | 787 | ||
786 | setattr_copy(inode, iattr); | 788 | setattr_copy(inode, iattr); |
787 | mark_inode_dirty(inode); | 789 | mark_inode_dirty(inode); |
788 | 790 | ||
789 | if (iattr->ia_valid & ATTR_MODE) { | 791 | if (iattr->ia_valid & ATTR_MODE) { |
790 | err = nilfs_acl_chmod(inode); | 792 | err = nilfs_acl_chmod(inode); |
791 | if (unlikely(err)) | 793 | if (unlikely(err)) |
792 | goto out_err; | 794 | goto out_err; |
793 | } | 795 | } |
794 | 796 | ||
795 | return nilfs_transaction_commit(sb); | 797 | return nilfs_transaction_commit(sb); |
796 | 798 | ||
797 | out_err: | 799 | out_err: |
798 | nilfs_transaction_abort(sb); | 800 | nilfs_transaction_abort(sb); |
799 | return err; | 801 | return err; |
800 | } | 802 | } |
801 | 803 | ||
802 | int nilfs_permission(struct inode *inode, int mask) | 804 | int nilfs_permission(struct inode *inode, int mask) |
803 | { | 805 | { |
804 | struct nilfs_root *root = NILFS_I(inode)->i_root; | 806 | struct nilfs_root *root = NILFS_I(inode)->i_root; |
805 | if ((mask & MAY_WRITE) && root && | 807 | if ((mask & MAY_WRITE) && root && |
806 | root->cno != NILFS_CPTREE_CURRENT_CNO) | 808 | root->cno != NILFS_CPTREE_CURRENT_CNO) |
807 | return -EROFS; /* snapshot is not writable */ | 809 | return -EROFS; /* snapshot is not writable */ |
808 | 810 | ||
809 | return generic_permission(inode, mask); | 811 | return generic_permission(inode, mask); |
810 | } | 812 | } |
811 | 813 | ||
812 | int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) | 814 | int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) |
813 | { | 815 | { |
814 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | 816 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
815 | struct nilfs_inode_info *ii = NILFS_I(inode); | 817 | struct nilfs_inode_info *ii = NILFS_I(inode); |
816 | int err; | 818 | int err; |
817 | 819 | ||
818 | spin_lock(&nilfs->ns_inode_lock); | 820 | spin_lock(&nilfs->ns_inode_lock); |
819 | if (ii->i_bh == NULL) { | 821 | if (ii->i_bh == NULL) { |
820 | spin_unlock(&nilfs->ns_inode_lock); | 822 | spin_unlock(&nilfs->ns_inode_lock); |
821 | err = nilfs_ifile_get_inode_block(ii->i_root->ifile, | 823 | err = nilfs_ifile_get_inode_block(ii->i_root->ifile, |
822 | inode->i_ino, pbh); | 824 | inode->i_ino, pbh); |
823 | if (unlikely(err)) | 825 | if (unlikely(err)) |
824 | return err; | 826 | return err; |
825 | spin_lock(&nilfs->ns_inode_lock); | 827 | spin_lock(&nilfs->ns_inode_lock); |
826 | if (ii->i_bh == NULL) | 828 | if (ii->i_bh == NULL) |
827 | ii->i_bh = *pbh; | 829 | ii->i_bh = *pbh; |
828 | else { | 830 | else { |
829 | brelse(*pbh); | 831 | brelse(*pbh); |
830 | *pbh = ii->i_bh; | 832 | *pbh = ii->i_bh; |
831 | } | 833 | } |
832 | } else | 834 | } else |
833 | *pbh = ii->i_bh; | 835 | *pbh = ii->i_bh; |
834 | 836 | ||
835 | get_bh(*pbh); | 837 | get_bh(*pbh); |
836 | spin_unlock(&nilfs->ns_inode_lock); | 838 | spin_unlock(&nilfs->ns_inode_lock); |
837 | return 0; | 839 | return 0; |
838 | } | 840 | } |
839 | 841 | ||
840 | int nilfs_inode_dirty(struct inode *inode) | 842 | int nilfs_inode_dirty(struct inode *inode) |
841 | { | 843 | { |
842 | struct nilfs_inode_info *ii = NILFS_I(inode); | 844 | struct nilfs_inode_info *ii = NILFS_I(inode); |
843 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | 845 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
844 | int ret = 0; | 846 | int ret = 0; |
845 | 847 | ||
846 | if (!list_empty(&ii->i_dirty)) { | 848 | if (!list_empty(&ii->i_dirty)) { |
847 | spin_lock(&nilfs->ns_inode_lock); | 849 | spin_lock(&nilfs->ns_inode_lock); |
848 | ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || | 850 | ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || |
849 | test_bit(NILFS_I_BUSY, &ii->i_state); | 851 | test_bit(NILFS_I_BUSY, &ii->i_state); |
850 | spin_unlock(&nilfs->ns_inode_lock); | 852 | spin_unlock(&nilfs->ns_inode_lock); |
851 | } | 853 | } |
852 | return ret; | 854 | return ret; |
853 | } | 855 | } |
854 | 856 | ||
855 | int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) | 857 | int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) |
856 | { | 858 | { |
857 | struct nilfs_inode_info *ii = NILFS_I(inode); | 859 | struct nilfs_inode_info *ii = NILFS_I(inode); |
858 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | 860 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
859 | 861 | ||
860 | atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); | 862 | atomic_add(nr_dirty, &nilfs->ns_ndirtyblks); |
861 | 863 | ||
862 | if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) | 864 | if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) |
863 | return 0; | 865 | return 0; |
864 | 866 | ||
865 | spin_lock(&nilfs->ns_inode_lock); | 867 | spin_lock(&nilfs->ns_inode_lock); |
866 | if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && | 868 | if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && |
867 | !test_bit(NILFS_I_BUSY, &ii->i_state)) { | 869 | !test_bit(NILFS_I_BUSY, &ii->i_state)) { |
868 | /* Because this routine may race with nilfs_dispose_list(), | 870 | /* Because this routine may race with nilfs_dispose_list(), |
869 | we have to check NILFS_I_QUEUED here, too. */ | 871 | we have to check NILFS_I_QUEUED here, too. */ |
870 | if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { | 872 | if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { |
871 | /* This will happen when somebody is freeing | 873 | /* This will happen when somebody is freeing |
872 | this inode. */ | 874 | this inode. */ |
873 | nilfs_warning(inode->i_sb, __func__, | 875 | nilfs_warning(inode->i_sb, __func__, |
874 | "cannot get inode (ino=%lu)\n", | 876 | "cannot get inode (ino=%lu)\n", |
875 | inode->i_ino); | 877 | inode->i_ino); |
876 | spin_unlock(&nilfs->ns_inode_lock); | 878 | spin_unlock(&nilfs->ns_inode_lock); |
877 | return -EINVAL; /* NILFS_I_DIRTY may remain for | 879 | return -EINVAL; /* NILFS_I_DIRTY may remain for |
878 | freeing inode */ | 880 | freeing inode */ |
879 | } | 881 | } |
880 | list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); | 882 | list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files); |
881 | set_bit(NILFS_I_QUEUED, &ii->i_state); | 883 | set_bit(NILFS_I_QUEUED, &ii->i_state); |
882 | } | 884 | } |
883 | spin_unlock(&nilfs->ns_inode_lock); | 885 | spin_unlock(&nilfs->ns_inode_lock); |
884 | return 0; | 886 | return 0; |
885 | } | 887 | } |
886 | 888 | ||
887 | int nilfs_mark_inode_dirty(struct inode *inode) | 889 | int nilfs_mark_inode_dirty(struct inode *inode) |
888 | { | 890 | { |
889 | struct buffer_head *ibh; | 891 | struct buffer_head *ibh; |
890 | int err; | 892 | int err; |
891 | 893 | ||
892 | err = nilfs_load_inode_block(inode, &ibh); | 894 | err = nilfs_load_inode_block(inode, &ibh); |
893 | if (unlikely(err)) { | 895 | if (unlikely(err)) { |
894 | nilfs_warning(inode->i_sb, __func__, | 896 | nilfs_warning(inode->i_sb, __func__, |
895 | "failed to reget inode block.\n"); | 897 | "failed to reget inode block.\n"); |
896 | return err; | 898 | return err; |
897 | } | 899 | } |
898 | nilfs_update_inode(inode, ibh); | 900 | nilfs_update_inode(inode, ibh); |
899 | mark_buffer_dirty(ibh); | 901 | mark_buffer_dirty(ibh); |
900 | nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); | 902 | nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); |
901 | brelse(ibh); | 903 | brelse(ibh); |
902 | return 0; | 904 | return 0; |
903 | } | 905 | } |
904 | 906 | ||
905 | /** | 907 | /** |
906 | * nilfs_dirty_inode - reflect changes on given inode to an inode block. | 908 | * nilfs_dirty_inode - reflect changes on given inode to an inode block. |
907 | * @inode: inode of the file to be registered. | 909 | * @inode: inode of the file to be registered. |
908 | * | 910 | * |
909 | * nilfs_dirty_inode() loads a inode block containing the specified | 911 | * nilfs_dirty_inode() loads a inode block containing the specified |
910 | * @inode and copies data from a nilfs_inode to a corresponding inode | 912 | * @inode and copies data from a nilfs_inode to a corresponding inode |
911 | * entry in the inode block. This operation is excluded from the segment | 913 | * entry in the inode block. This operation is excluded from the segment |
912 | * construction. This function can be called both as a single operation | 914 | * construction. This function can be called both as a single operation |
913 | * and as a part of indivisible file operations. | 915 | * and as a part of indivisible file operations. |
914 | */ | 916 | */ |
915 | void nilfs_dirty_inode(struct inode *inode, int flags) | 917 | void nilfs_dirty_inode(struct inode *inode, int flags) |
916 | { | 918 | { |
917 | struct nilfs_transaction_info ti; | 919 | struct nilfs_transaction_info ti; |
918 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); | 920 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); |
919 | 921 | ||
920 | if (is_bad_inode(inode)) { | 922 | if (is_bad_inode(inode)) { |
921 | nilfs_warning(inode->i_sb, __func__, | 923 | nilfs_warning(inode->i_sb, __func__, |
922 | "tried to mark bad_inode dirty. ignored.\n"); | 924 | "tried to mark bad_inode dirty. ignored.\n"); |
923 | dump_stack(); | 925 | dump_stack(); |
924 | return; | 926 | return; |
925 | } | 927 | } |
926 | if (mdi) { | 928 | if (mdi) { |
927 | nilfs_mdt_mark_dirty(inode); | 929 | nilfs_mdt_mark_dirty(inode); |
928 | return; | 930 | return; |
929 | } | 931 | } |
930 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | 932 | nilfs_transaction_begin(inode->i_sb, &ti, 0); |
931 | nilfs_mark_inode_dirty(inode); | 933 | nilfs_mark_inode_dirty(inode); |
932 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | 934 | nilfs_transaction_commit(inode->i_sb); /* never fails */ |
933 | } | 935 | } |
934 | 936 | ||
935 | int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 937 | int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
936 | __u64 start, __u64 len) | 938 | __u64 start, __u64 len) |
937 | { | 939 | { |
938 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | 940 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; |
939 | __u64 logical = 0, phys = 0, size = 0; | 941 | __u64 logical = 0, phys = 0, size = 0; |
940 | __u32 flags = 0; | 942 | __u32 flags = 0; |
941 | loff_t isize; | 943 | loff_t isize; |
942 | sector_t blkoff, end_blkoff; | 944 | sector_t blkoff, end_blkoff; |
943 | sector_t delalloc_blkoff; | 945 | sector_t delalloc_blkoff; |
944 | unsigned long delalloc_blklen; | 946 | unsigned long delalloc_blklen; |
945 | unsigned int blkbits = inode->i_blkbits; | 947 | unsigned int blkbits = inode->i_blkbits; |
946 | int ret, n; | 948 | int ret, n; |
947 | 949 | ||
948 | ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); | 950 | ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); |
949 | if (ret) | 951 | if (ret) |
950 | return ret; | 952 | return ret; |
951 | 953 | ||
952 | mutex_lock(&inode->i_mutex); | 954 | mutex_lock(&inode->i_mutex); |
953 | 955 | ||
954 | isize = i_size_read(inode); | 956 | isize = i_size_read(inode); |
955 | 957 | ||
956 | blkoff = start >> blkbits; | 958 | blkoff = start >> blkbits; |
957 | end_blkoff = (start + len - 1) >> blkbits; | 959 | end_blkoff = (start + len - 1) >> blkbits; |
958 | 960 | ||
959 | delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, | 961 | delalloc_blklen = nilfs_find_uncommitted_extent(inode, blkoff, |
960 | &delalloc_blkoff); | 962 | &delalloc_blkoff); |
961 | 963 | ||
962 | do { | 964 | do { |
963 | __u64 blkphy; | 965 | __u64 blkphy; |
964 | unsigned int maxblocks; | 966 | unsigned int maxblocks; |
965 | 967 | ||
966 | if (delalloc_blklen && blkoff == delalloc_blkoff) { | 968 | if (delalloc_blklen && blkoff == delalloc_blkoff) { |
967 | if (size) { | 969 | if (size) { |
968 | /* End of the current extent */ | 970 | /* End of the current extent */ |
969 | ret = fiemap_fill_next_extent( | 971 | ret = fiemap_fill_next_extent( |
970 | fieinfo, logical, phys, size, flags); | 972 | fieinfo, logical, phys, size, flags); |
971 | if (ret) | 973 | if (ret) |
972 | break; | 974 | break; |
973 | } | 975 | } |
974 | if (blkoff > end_blkoff) | 976 | if (blkoff > end_blkoff) |
975 | break; | 977 | break; |
976 | 978 | ||
977 | flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; | 979 | flags = FIEMAP_EXTENT_MERGED | FIEMAP_EXTENT_DELALLOC; |
978 | logical = blkoff << blkbits; | 980 | logical = blkoff << blkbits; |
979 | phys = 0; | 981 | phys = 0; |
980 | size = delalloc_blklen << blkbits; | 982 | size = delalloc_blklen << blkbits; |
981 | 983 | ||
982 | blkoff = delalloc_blkoff + delalloc_blklen; | 984 | blkoff = delalloc_blkoff + delalloc_blklen; |
983 | delalloc_blklen = nilfs_find_uncommitted_extent( | 985 | delalloc_blklen = nilfs_find_uncommitted_extent( |
984 | inode, blkoff, &delalloc_blkoff); | 986 | inode, blkoff, &delalloc_blkoff); |
985 | continue; | 987 | continue; |
986 | } | 988 | } |
987 | 989 | ||
988 | /* | 990 | /* |
989 | * Limit the number of blocks that we look up so as | 991 | * Limit the number of blocks that we look up so as |
990 | * not to get into the next delayed allocation extent. | 992 | * not to get into the next delayed allocation extent. |
991 | */ | 993 | */ |
992 | maxblocks = INT_MAX; | 994 | maxblocks = INT_MAX; |
993 | if (delalloc_blklen) | 995 | if (delalloc_blklen) |
994 | maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, | 996 | maxblocks = min_t(sector_t, delalloc_blkoff - blkoff, |
995 | maxblocks); | 997 | maxblocks); |
996 | blkphy = 0; | 998 | blkphy = 0; |
997 | 999 | ||
998 | down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); | 1000 | down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
999 | n = nilfs_bmap_lookup_contig( | 1001 | n = nilfs_bmap_lookup_contig( |
1000 | NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); | 1002 | NILFS_I(inode)->i_bmap, blkoff, &blkphy, maxblocks); |
1001 | up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); | 1003 | up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); |
1002 | 1004 | ||
1003 | if (n < 0) { | 1005 | if (n < 0) { |
1004 | int past_eof; | 1006 | int past_eof; |
1005 | 1007 | ||
1006 | if (unlikely(n != -ENOENT)) | 1008 | if (unlikely(n != -ENOENT)) |
1007 | break; /* error */ | 1009 | break; /* error */ |
1008 | 1010 | ||
1009 | /* HOLE */ | 1011 | /* HOLE */ |
1010 | blkoff++; | 1012 | blkoff++; |
1011 | past_eof = ((blkoff << blkbits) >= isize); | 1013 | past_eof = ((blkoff << blkbits) >= isize); |
1012 | 1014 | ||
1013 | if (size) { | 1015 | if (size) { |
1014 | /* End of the current extent */ | 1016 | /* End of the current extent */ |
1015 | 1017 | ||
1016 | if (past_eof) | 1018 | if (past_eof) |
1017 | flags |= FIEMAP_EXTENT_LAST; | 1019 | flags |= FIEMAP_EXTENT_LAST; |
1018 | 1020 | ||
1019 | ret = fiemap_fill_next_extent( | 1021 | ret = fiemap_fill_next_extent( |
1020 | fieinfo, logical, phys, size, flags); | 1022 | fieinfo, logical, phys, size, flags); |
1021 | if (ret) | 1023 | if (ret) |
1022 | break; | 1024 | break; |
1023 | size = 0; | 1025 | size = 0; |
1024 | } | 1026 | } |
1025 | if (blkoff > end_blkoff || past_eof) | 1027 | if (blkoff > end_blkoff || past_eof) |
1026 | break; | 1028 | break; |
1027 | } else { | 1029 | } else { |
1028 | if (size) { | 1030 | if (size) { |
1029 | if (phys && blkphy << blkbits == phys + size) { | 1031 | if (phys && blkphy << blkbits == phys + size) { |
1030 | /* The current extent goes on */ | 1032 | /* The current extent goes on */ |
1031 | size += n << blkbits; | 1033 | size += n << blkbits; |
1032 | } else { | 1034 | } else { |
1033 | /* Terminate the current extent */ | 1035 | /* Terminate the current extent */ |
1034 | ret = fiemap_fill_next_extent( | 1036 | ret = fiemap_fill_next_extent( |
1035 | fieinfo, logical, phys, size, | 1037 | fieinfo, logical, phys, size, |
1036 | flags); | 1038 | flags); |
1037 | if (ret || blkoff > end_blkoff) | 1039 | if (ret || blkoff > end_blkoff) |
1038 | break; | 1040 | break; |
1039 | 1041 | ||
1040 | /* Start another extent */ | 1042 | /* Start another extent */ |
1041 | flags = FIEMAP_EXTENT_MERGED; | 1043 | flags = FIEMAP_EXTENT_MERGED; |
1042 | logical = blkoff << blkbits; | 1044 | logical = blkoff << blkbits; |
1043 | phys = blkphy << blkbits; | 1045 | phys = blkphy << blkbits; |
1044 | size = n << blkbits; | 1046 | size = n << blkbits; |
1045 | } | 1047 | } |
1046 | } else { | 1048 | } else { |
1047 | /* Start a new extent */ | 1049 | /* Start a new extent */ |
1048 | flags = FIEMAP_EXTENT_MERGED; | 1050 | flags = FIEMAP_EXTENT_MERGED; |
1049 | logical = blkoff << blkbits; | 1051 | logical = blkoff << blkbits; |
1050 | phys = blkphy << blkbits; | 1052 | phys = blkphy << blkbits; |
1051 | size = n << blkbits; | 1053 | size = n << blkbits; |
1052 | } | 1054 | } |
1053 | blkoff += n; | 1055 | blkoff += n; |
1054 | } | 1056 | } |
1055 | cond_resched(); | 1057 | cond_resched(); |
1056 | } while (true); | 1058 | } while (true); |
1057 | 1059 | ||
1058 | /* If ret is 1 then we just hit the end of the extent array */ | 1060 | /* If ret is 1 then we just hit the end of the extent array */ |
1059 | if (ret == 1) | 1061 | if (ret == 1) |
1060 | ret = 0; | 1062 | ret = 0; |
1061 | 1063 | ||
1062 | mutex_unlock(&inode->i_mutex); | 1064 | mutex_unlock(&inode->i_mutex); |
1063 | return ret; | 1065 | return ret; |
1064 | } | 1066 | } |
1065 | 1067 |
fs/ocfs2/file.c
1 | /* -*- mode: c; c-basic-offset: 8; -*- | 1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | 2 | * vim: noexpandtab sw=8 ts=8 sts=0: |
3 | * | 3 | * |
4 | * file.c | 4 | * file.c |
5 | * | 5 | * |
6 | * File open, close, extend, truncate | 6 | * File open, close, extend, truncate |
7 | * | 7 | * |
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
12 | * License as published by the Free Software Foundation; either | 12 | * License as published by the Free Software Foundation; either |
13 | * version 2 of the License, or (at your option) any later version. | 13 | * version 2 of the License, or (at your option) any later version. |
14 | * | 14 | * |
15 | * This program is distributed in the hope that it will be useful, | 15 | * This program is distributed in the hope that it will be useful, |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 | * General Public License for more details. | 18 | * General Public License for more details. |
19 | * | 19 | * |
20 | * You should have received a copy of the GNU General Public | 20 | * You should have received a copy of the GNU General Public |
21 | * License along with this program; if not, write to the | 21 | * License along with this program; if not, write to the |
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
23 | * Boston, MA 021110-1307, USA. | 23 | * Boston, MA 021110-1307, USA. |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/capability.h> | 26 | #include <linux/capability.h> |
27 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
31 | #include <linux/pagemap.h> | 31 | #include <linux/pagemap.h> |
32 | #include <linux/uio.h> | 32 | #include <linux/uio.h> |
33 | #include <linux/sched.h> | 33 | #include <linux/sched.h> |
34 | #include <linux/splice.h> | 34 | #include <linux/splice.h> |
35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
36 | #include <linux/writeback.h> | 36 | #include <linux/writeback.h> |
37 | #include <linux/falloc.h> | 37 | #include <linux/falloc.h> |
38 | #include <linux/quotaops.h> | 38 | #include <linux/quotaops.h> |
39 | #include <linux/blkdev.h> | 39 | #include <linux/blkdev.h> |
40 | 40 | ||
41 | #include <cluster/masklog.h> | 41 | #include <cluster/masklog.h> |
42 | 42 | ||
43 | #include "ocfs2.h" | 43 | #include "ocfs2.h" |
44 | 44 | ||
45 | #include "alloc.h" | 45 | #include "alloc.h" |
46 | #include "aops.h" | 46 | #include "aops.h" |
47 | #include "dir.h" | 47 | #include "dir.h" |
48 | #include "dlmglue.h" | 48 | #include "dlmglue.h" |
49 | #include "extent_map.h" | 49 | #include "extent_map.h" |
50 | #include "file.h" | 50 | #include "file.h" |
51 | #include "sysfile.h" | 51 | #include "sysfile.h" |
52 | #include "inode.h" | 52 | #include "inode.h" |
53 | #include "ioctl.h" | 53 | #include "ioctl.h" |
54 | #include "journal.h" | 54 | #include "journal.h" |
55 | #include "locks.h" | 55 | #include "locks.h" |
56 | #include "mmap.h" | 56 | #include "mmap.h" |
57 | #include "suballoc.h" | 57 | #include "suballoc.h" |
58 | #include "super.h" | 58 | #include "super.h" |
59 | #include "xattr.h" | 59 | #include "xattr.h" |
60 | #include "acl.h" | 60 | #include "acl.h" |
61 | #include "quota.h" | 61 | #include "quota.h" |
62 | #include "refcounttree.h" | 62 | #include "refcounttree.h" |
63 | #include "ocfs2_trace.h" | 63 | #include "ocfs2_trace.h" |
64 | 64 | ||
65 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
66 | 66 | ||
67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) | 67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) |
68 | { | 68 | { |
69 | struct ocfs2_file_private *fp; | 69 | struct ocfs2_file_private *fp; |
70 | 70 | ||
71 | fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL); | 71 | fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL); |
72 | if (!fp) | 72 | if (!fp) |
73 | return -ENOMEM; | 73 | return -ENOMEM; |
74 | 74 | ||
75 | fp->fp_file = file; | 75 | fp->fp_file = file; |
76 | mutex_init(&fp->fp_mutex); | 76 | mutex_init(&fp->fp_mutex); |
77 | ocfs2_file_lock_res_init(&fp->fp_flock, fp); | 77 | ocfs2_file_lock_res_init(&fp->fp_flock, fp); |
78 | file->private_data = fp; | 78 | file->private_data = fp; |
79 | 79 | ||
80 | return 0; | 80 | return 0; |
81 | } | 81 | } |
82 | 82 | ||
83 | static void ocfs2_free_file_private(struct inode *inode, struct file *file) | 83 | static void ocfs2_free_file_private(struct inode *inode, struct file *file) |
84 | { | 84 | { |
85 | struct ocfs2_file_private *fp = file->private_data; | 85 | struct ocfs2_file_private *fp = file->private_data; |
86 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 86 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
87 | 87 | ||
88 | if (fp) { | 88 | if (fp) { |
89 | ocfs2_simple_drop_lockres(osb, &fp->fp_flock); | 89 | ocfs2_simple_drop_lockres(osb, &fp->fp_flock); |
90 | ocfs2_lock_res_free(&fp->fp_flock); | 90 | ocfs2_lock_res_free(&fp->fp_flock); |
91 | kfree(fp); | 91 | kfree(fp); |
92 | file->private_data = NULL; | 92 | file->private_data = NULL; |
93 | } | 93 | } |
94 | } | 94 | } |
95 | 95 | ||
96 | static int ocfs2_file_open(struct inode *inode, struct file *file) | 96 | static int ocfs2_file_open(struct inode *inode, struct file *file) |
97 | { | 97 | { |
98 | int status; | 98 | int status; |
99 | int mode = file->f_flags; | 99 | int mode = file->f_flags; |
100 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 100 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
101 | 101 | ||
102 | trace_ocfs2_file_open(inode, file, file->f_path.dentry, | 102 | trace_ocfs2_file_open(inode, file, file->f_path.dentry, |
103 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 103 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
104 | file->f_path.dentry->d_name.len, | 104 | file->f_path.dentry->d_name.len, |
105 | file->f_path.dentry->d_name.name, mode); | 105 | file->f_path.dentry->d_name.name, mode); |
106 | 106 | ||
107 | if (file->f_mode & FMODE_WRITE) | 107 | if (file->f_mode & FMODE_WRITE) |
108 | dquot_initialize(inode); | 108 | dquot_initialize(inode); |
109 | 109 | ||
110 | spin_lock(&oi->ip_lock); | 110 | spin_lock(&oi->ip_lock); |
111 | 111 | ||
112 | /* Check that the inode hasn't been wiped from disk by another | 112 | /* Check that the inode hasn't been wiped from disk by another |
113 | * node. If it hasn't then we're safe as long as we hold the | 113 | * node. If it hasn't then we're safe as long as we hold the |
114 | * spin lock until our increment of open count. */ | 114 | * spin lock until our increment of open count. */ |
115 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { | 115 | if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { |
116 | spin_unlock(&oi->ip_lock); | 116 | spin_unlock(&oi->ip_lock); |
117 | 117 | ||
118 | status = -ENOENT; | 118 | status = -ENOENT; |
119 | goto leave; | 119 | goto leave; |
120 | } | 120 | } |
121 | 121 | ||
122 | if (mode & O_DIRECT) | 122 | if (mode & O_DIRECT) |
123 | oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT; | 123 | oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT; |
124 | 124 | ||
125 | oi->ip_open_count++; | 125 | oi->ip_open_count++; |
126 | spin_unlock(&oi->ip_lock); | 126 | spin_unlock(&oi->ip_lock); |
127 | 127 | ||
128 | status = ocfs2_init_file_private(inode, file); | 128 | status = ocfs2_init_file_private(inode, file); |
129 | if (status) { | 129 | if (status) { |
130 | /* | 130 | /* |
131 | * We want to set open count back if we're failing the | 131 | * We want to set open count back if we're failing the |
132 | * open. | 132 | * open. |
133 | */ | 133 | */ |
134 | spin_lock(&oi->ip_lock); | 134 | spin_lock(&oi->ip_lock); |
135 | oi->ip_open_count--; | 135 | oi->ip_open_count--; |
136 | spin_unlock(&oi->ip_lock); | 136 | spin_unlock(&oi->ip_lock); |
137 | } | 137 | } |
138 | 138 | ||
139 | leave: | 139 | leave: |
140 | return status; | 140 | return status; |
141 | } | 141 | } |
142 | 142 | ||
143 | static int ocfs2_file_release(struct inode *inode, struct file *file) | 143 | static int ocfs2_file_release(struct inode *inode, struct file *file) |
144 | { | 144 | { |
145 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 145 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
146 | 146 | ||
147 | spin_lock(&oi->ip_lock); | 147 | spin_lock(&oi->ip_lock); |
148 | if (!--oi->ip_open_count) | 148 | if (!--oi->ip_open_count) |
149 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; | 149 | oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; |
150 | 150 | ||
151 | trace_ocfs2_file_release(inode, file, file->f_path.dentry, | 151 | trace_ocfs2_file_release(inode, file, file->f_path.dentry, |
152 | oi->ip_blkno, | 152 | oi->ip_blkno, |
153 | file->f_path.dentry->d_name.len, | 153 | file->f_path.dentry->d_name.len, |
154 | file->f_path.dentry->d_name.name, | 154 | file->f_path.dentry->d_name.name, |
155 | oi->ip_open_count); | 155 | oi->ip_open_count); |
156 | spin_unlock(&oi->ip_lock); | 156 | spin_unlock(&oi->ip_lock); |
157 | 157 | ||
158 | ocfs2_free_file_private(inode, file); | 158 | ocfs2_free_file_private(inode, file); |
159 | 159 | ||
160 | return 0; | 160 | return 0; |
161 | } | 161 | } |
162 | 162 | ||
163 | static int ocfs2_dir_open(struct inode *inode, struct file *file) | 163 | static int ocfs2_dir_open(struct inode *inode, struct file *file) |
164 | { | 164 | { |
165 | return ocfs2_init_file_private(inode, file); | 165 | return ocfs2_init_file_private(inode, file); |
166 | } | 166 | } |
167 | 167 | ||
168 | static int ocfs2_dir_release(struct inode *inode, struct file *file) | 168 | static int ocfs2_dir_release(struct inode *inode, struct file *file) |
169 | { | 169 | { |
170 | ocfs2_free_file_private(inode, file); | 170 | ocfs2_free_file_private(inode, file); |
171 | return 0; | 171 | return 0; |
172 | } | 172 | } |
173 | 173 | ||
174 | static int ocfs2_sync_file(struct file *file, int datasync) | 174 | static int ocfs2_sync_file(struct file *file, int datasync) |
175 | { | 175 | { |
176 | int err = 0; | 176 | int err = 0; |
177 | journal_t *journal; | 177 | journal_t *journal; |
178 | struct inode *inode = file->f_mapping->host; | 178 | struct inode *inode = file->f_mapping->host; |
179 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 179 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
180 | 180 | ||
181 | trace_ocfs2_sync_file(inode, file, file->f_path.dentry, | 181 | trace_ocfs2_sync_file(inode, file, file->f_path.dentry, |
182 | OCFS2_I(inode)->ip_blkno, | 182 | OCFS2_I(inode)->ip_blkno, |
183 | file->f_path.dentry->d_name.len, | 183 | file->f_path.dentry->d_name.len, |
184 | file->f_path.dentry->d_name.name, | 184 | file->f_path.dentry->d_name.name, |
185 | (unsigned long long)datasync); | 185 | (unsigned long long)datasync); |
186 | 186 | ||
187 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { | 187 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { |
188 | /* | 188 | /* |
189 | * We still have to flush drive's caches to get data to the | 189 | * We still have to flush drive's caches to get data to the |
190 | * platter | 190 | * platter |
191 | */ | 191 | */ |
192 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 192 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) |
193 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 193 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
194 | goto bail; | 194 | goto bail; |
195 | } | 195 | } |
196 | 196 | ||
197 | journal = osb->journal->j_journal; | 197 | journal = osb->journal->j_journal; |
198 | err = jbd2_journal_force_commit(journal); | 198 | err = jbd2_journal_force_commit(journal); |
199 | 199 | ||
200 | bail: | 200 | bail: |
201 | if (err) | 201 | if (err) |
202 | mlog_errno(err); | 202 | mlog_errno(err); |
203 | 203 | ||
204 | return (err < 0) ? -EIO : 0; | 204 | return (err < 0) ? -EIO : 0; |
205 | } | 205 | } |
206 | 206 | ||
207 | int ocfs2_should_update_atime(struct inode *inode, | 207 | int ocfs2_should_update_atime(struct inode *inode, |
208 | struct vfsmount *vfsmnt) | 208 | struct vfsmount *vfsmnt) |
209 | { | 209 | { |
210 | struct timespec now; | 210 | struct timespec now; |
211 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 211 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
212 | 212 | ||
213 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | 213 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) |
214 | return 0; | 214 | return 0; |
215 | 215 | ||
216 | if ((inode->i_flags & S_NOATIME) || | 216 | if ((inode->i_flags & S_NOATIME) || |
217 | ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) | 217 | ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) |
218 | return 0; | 218 | return 0; |
219 | 219 | ||
220 | /* | 220 | /* |
221 | * We can be called with no vfsmnt structure - NFSD will | 221 | * We can be called with no vfsmnt structure - NFSD will |
222 | * sometimes do this. | 222 | * sometimes do this. |
223 | * | 223 | * |
224 | * Note that our action here is different than touch_atime() - | 224 | * Note that our action here is different than touch_atime() - |
225 | * if we can't tell whether this is a noatime mount, then we | 225 | * if we can't tell whether this is a noatime mount, then we |
226 | * don't know whether to trust the value of s_atime_quantum. | 226 | * don't know whether to trust the value of s_atime_quantum. |
227 | */ | 227 | */ |
228 | if (vfsmnt == NULL) | 228 | if (vfsmnt == NULL) |
229 | return 0; | 229 | return 0; |
230 | 230 | ||
231 | if ((vfsmnt->mnt_flags & MNT_NOATIME) || | 231 | if ((vfsmnt->mnt_flags & MNT_NOATIME) || |
232 | ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) | 232 | ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) |
233 | return 0; | 233 | return 0; |
234 | 234 | ||
235 | if (vfsmnt->mnt_flags & MNT_RELATIME) { | 235 | if (vfsmnt->mnt_flags & MNT_RELATIME) { |
236 | if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) || | 236 | if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) || |
237 | (timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0)) | 237 | (timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0)) |
238 | return 1; | 238 | return 1; |
239 | 239 | ||
240 | return 0; | 240 | return 0; |
241 | } | 241 | } |
242 | 242 | ||
243 | now = CURRENT_TIME; | 243 | now = CURRENT_TIME; |
244 | if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum)) | 244 | if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum)) |
245 | return 0; | 245 | return 0; |
246 | else | 246 | else |
247 | return 1; | 247 | return 1; |
248 | } | 248 | } |
249 | 249 | ||
250 | int ocfs2_update_inode_atime(struct inode *inode, | 250 | int ocfs2_update_inode_atime(struct inode *inode, |
251 | struct buffer_head *bh) | 251 | struct buffer_head *bh) |
252 | { | 252 | { |
253 | int ret; | 253 | int ret; |
254 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 254 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
255 | handle_t *handle; | 255 | handle_t *handle; |
256 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data; | 256 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data; |
257 | 257 | ||
258 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 258 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
259 | if (IS_ERR(handle)) { | 259 | if (IS_ERR(handle)) { |
260 | ret = PTR_ERR(handle); | 260 | ret = PTR_ERR(handle); |
261 | mlog_errno(ret); | 261 | mlog_errno(ret); |
262 | goto out; | 262 | goto out; |
263 | } | 263 | } |
264 | 264 | ||
265 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, | 265 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, |
266 | OCFS2_JOURNAL_ACCESS_WRITE); | 266 | OCFS2_JOURNAL_ACCESS_WRITE); |
267 | if (ret) { | 267 | if (ret) { |
268 | mlog_errno(ret); | 268 | mlog_errno(ret); |
269 | goto out_commit; | 269 | goto out_commit; |
270 | } | 270 | } |
271 | 271 | ||
272 | /* | 272 | /* |
273 | * Don't use ocfs2_mark_inode_dirty() here as we don't always | 273 | * Don't use ocfs2_mark_inode_dirty() here as we don't always |
274 | * have i_mutex to guard against concurrent changes to other | 274 | * have i_mutex to guard against concurrent changes to other |
275 | * inode fields. | 275 | * inode fields. |
276 | */ | 276 | */ |
277 | inode->i_atime = CURRENT_TIME; | 277 | inode->i_atime = CURRENT_TIME; |
278 | di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); | 278 | di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); |
279 | di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); | 279 | di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); |
280 | ocfs2_journal_dirty(handle, bh); | 280 | ocfs2_journal_dirty(handle, bh); |
281 | 281 | ||
282 | out_commit: | 282 | out_commit: |
283 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 283 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
284 | out: | 284 | out: |
285 | return ret; | 285 | return ret; |
286 | } | 286 | } |
287 | 287 | ||
288 | static int ocfs2_set_inode_size(handle_t *handle, | 288 | static int ocfs2_set_inode_size(handle_t *handle, |
289 | struct inode *inode, | 289 | struct inode *inode, |
290 | struct buffer_head *fe_bh, | 290 | struct buffer_head *fe_bh, |
291 | u64 new_i_size) | 291 | u64 new_i_size) |
292 | { | 292 | { |
293 | int status; | 293 | int status; |
294 | 294 | ||
295 | i_size_write(inode, new_i_size); | 295 | i_size_write(inode, new_i_size); |
296 | inode->i_blocks = ocfs2_inode_sector_count(inode); | 296 | inode->i_blocks = ocfs2_inode_sector_count(inode); |
297 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 297 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
298 | 298 | ||
299 | status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); | 299 | status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); |
300 | if (status < 0) { | 300 | if (status < 0) { |
301 | mlog_errno(status); | 301 | mlog_errno(status); |
302 | goto bail; | 302 | goto bail; |
303 | } | 303 | } |
304 | 304 | ||
305 | bail: | 305 | bail: |
306 | return status; | 306 | return status; |
307 | } | 307 | } |
308 | 308 | ||
309 | int ocfs2_simple_size_update(struct inode *inode, | 309 | int ocfs2_simple_size_update(struct inode *inode, |
310 | struct buffer_head *di_bh, | 310 | struct buffer_head *di_bh, |
311 | u64 new_i_size) | 311 | u64 new_i_size) |
312 | { | 312 | { |
313 | int ret; | 313 | int ret; |
314 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 314 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
315 | handle_t *handle = NULL; | 315 | handle_t *handle = NULL; |
316 | 316 | ||
317 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 317 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
318 | if (IS_ERR(handle)) { | 318 | if (IS_ERR(handle)) { |
319 | ret = PTR_ERR(handle); | 319 | ret = PTR_ERR(handle); |
320 | mlog_errno(ret); | 320 | mlog_errno(ret); |
321 | goto out; | 321 | goto out; |
322 | } | 322 | } |
323 | 323 | ||
324 | ret = ocfs2_set_inode_size(handle, inode, di_bh, | 324 | ret = ocfs2_set_inode_size(handle, inode, di_bh, |
325 | new_i_size); | 325 | new_i_size); |
326 | if (ret < 0) | 326 | if (ret < 0) |
327 | mlog_errno(ret); | 327 | mlog_errno(ret); |
328 | 328 | ||
329 | ocfs2_commit_trans(osb, handle); | 329 | ocfs2_commit_trans(osb, handle); |
330 | out: | 330 | out: |
331 | return ret; | 331 | return ret; |
332 | } | 332 | } |
333 | 333 | ||
334 | static int ocfs2_cow_file_pos(struct inode *inode, | 334 | static int ocfs2_cow_file_pos(struct inode *inode, |
335 | struct buffer_head *fe_bh, | 335 | struct buffer_head *fe_bh, |
336 | u64 offset) | 336 | u64 offset) |
337 | { | 337 | { |
338 | int status; | 338 | int status; |
339 | u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | 339 | u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; |
340 | unsigned int num_clusters = 0; | 340 | unsigned int num_clusters = 0; |
341 | unsigned int ext_flags = 0; | 341 | unsigned int ext_flags = 0; |
342 | 342 | ||
343 | /* | 343 | /* |
344 | * If the new offset is aligned to the range of the cluster, there is | 344 | * If the new offset is aligned to the range of the cluster, there is |
345 | * no space for ocfs2_zero_range_for_truncate to fill, so no need to | 345 | * no space for ocfs2_zero_range_for_truncate to fill, so no need to |
346 | * CoW either. | 346 | * CoW either. |
347 | */ | 347 | */ |
348 | if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0) | 348 | if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0) |
349 | return 0; | 349 | return 0; |
350 | 350 | ||
351 | status = ocfs2_get_clusters(inode, cpos, &phys, | 351 | status = ocfs2_get_clusters(inode, cpos, &phys, |
352 | &num_clusters, &ext_flags); | 352 | &num_clusters, &ext_flags); |
353 | if (status) { | 353 | if (status) { |
354 | mlog_errno(status); | 354 | mlog_errno(status); |
355 | goto out; | 355 | goto out; |
356 | } | 356 | } |
357 | 357 | ||
358 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | 358 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) |
359 | goto out; | 359 | goto out; |
360 | 360 | ||
361 | return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); | 361 | return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); |
362 | 362 | ||
363 | out: | 363 | out: |
364 | return status; | 364 | return status; |
365 | } | 365 | } |
366 | 366 | ||
367 | static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | 367 | static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, |
368 | struct inode *inode, | 368 | struct inode *inode, |
369 | struct buffer_head *fe_bh, | 369 | struct buffer_head *fe_bh, |
370 | u64 new_i_size) | 370 | u64 new_i_size) |
371 | { | 371 | { |
372 | int status; | 372 | int status; |
373 | handle_t *handle; | 373 | handle_t *handle; |
374 | struct ocfs2_dinode *di; | 374 | struct ocfs2_dinode *di; |
375 | u64 cluster_bytes; | 375 | u64 cluster_bytes; |
376 | 376 | ||
377 | /* | 377 | /* |
378 | * We need to CoW the cluster contains the offset if it is reflinked | 378 | * We need to CoW the cluster contains the offset if it is reflinked |
379 | * since we will call ocfs2_zero_range_for_truncate later which will | 379 | * since we will call ocfs2_zero_range_for_truncate later which will |
380 | * write "0" from offset to the end of the cluster. | 380 | * write "0" from offset to the end of the cluster. |
381 | */ | 381 | */ |
382 | status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size); | 382 | status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size); |
383 | if (status) { | 383 | if (status) { |
384 | mlog_errno(status); | 384 | mlog_errno(status); |
385 | return status; | 385 | return status; |
386 | } | 386 | } |
387 | 387 | ||
388 | /* TODO: This needs to actually orphan the inode in this | 388 | /* TODO: This needs to actually orphan the inode in this |
389 | * transaction. */ | 389 | * transaction. */ |
390 | 390 | ||
391 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 391 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
392 | if (IS_ERR(handle)) { | 392 | if (IS_ERR(handle)) { |
393 | status = PTR_ERR(handle); | 393 | status = PTR_ERR(handle); |
394 | mlog_errno(status); | 394 | mlog_errno(status); |
395 | goto out; | 395 | goto out; |
396 | } | 396 | } |
397 | 397 | ||
398 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh, | 398 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh, |
399 | OCFS2_JOURNAL_ACCESS_WRITE); | 399 | OCFS2_JOURNAL_ACCESS_WRITE); |
400 | if (status < 0) { | 400 | if (status < 0) { |
401 | mlog_errno(status); | 401 | mlog_errno(status); |
402 | goto out_commit; | 402 | goto out_commit; |
403 | } | 403 | } |
404 | 404 | ||
405 | /* | 405 | /* |
406 | * Do this before setting i_size. | 406 | * Do this before setting i_size. |
407 | */ | 407 | */ |
408 | cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); | 408 | cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); |
409 | status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size, | 409 | status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size, |
410 | cluster_bytes); | 410 | cluster_bytes); |
411 | if (status) { | 411 | if (status) { |
412 | mlog_errno(status); | 412 | mlog_errno(status); |
413 | goto out_commit; | 413 | goto out_commit; |
414 | } | 414 | } |
415 | 415 | ||
416 | i_size_write(inode, new_i_size); | 416 | i_size_write(inode, new_i_size); |
417 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 417 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
418 | 418 | ||
419 | di = (struct ocfs2_dinode *) fe_bh->b_data; | 419 | di = (struct ocfs2_dinode *) fe_bh->b_data; |
420 | di->i_size = cpu_to_le64(new_i_size); | 420 | di->i_size = cpu_to_le64(new_i_size); |
421 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); | 421 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); |
422 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 422 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
423 | 423 | ||
424 | ocfs2_journal_dirty(handle, fe_bh); | 424 | ocfs2_journal_dirty(handle, fe_bh); |
425 | 425 | ||
426 | out_commit: | 426 | out_commit: |
427 | ocfs2_commit_trans(osb, handle); | 427 | ocfs2_commit_trans(osb, handle); |
428 | out: | 428 | out: |
429 | return status; | 429 | return status; |
430 | } | 430 | } |
431 | 431 | ||
432 | static int ocfs2_truncate_file(struct inode *inode, | 432 | static int ocfs2_truncate_file(struct inode *inode, |
433 | struct buffer_head *di_bh, | 433 | struct buffer_head *di_bh, |
434 | u64 new_i_size) | 434 | u64 new_i_size) |
435 | { | 435 | { |
436 | int status = 0; | 436 | int status = 0; |
437 | struct ocfs2_dinode *fe = NULL; | 437 | struct ocfs2_dinode *fe = NULL; |
438 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 438 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
439 | 439 | ||
440 | /* We trust di_bh because it comes from ocfs2_inode_lock(), which | 440 | /* We trust di_bh because it comes from ocfs2_inode_lock(), which |
441 | * already validated it */ | 441 | * already validated it */ |
442 | fe = (struct ocfs2_dinode *) di_bh->b_data; | 442 | fe = (struct ocfs2_dinode *) di_bh->b_data; |
443 | 443 | ||
444 | trace_ocfs2_truncate_file((unsigned long long)OCFS2_I(inode)->ip_blkno, | 444 | trace_ocfs2_truncate_file((unsigned long long)OCFS2_I(inode)->ip_blkno, |
445 | (unsigned long long)le64_to_cpu(fe->i_size), | 445 | (unsigned long long)le64_to_cpu(fe->i_size), |
446 | (unsigned long long)new_i_size); | 446 | (unsigned long long)new_i_size); |
447 | 447 | ||
448 | mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), | 448 | mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), |
449 | "Inode %llu, inode i_size = %lld != di " | 449 | "Inode %llu, inode i_size = %lld != di " |
450 | "i_size = %llu, i_flags = 0x%x\n", | 450 | "i_size = %llu, i_flags = 0x%x\n", |
451 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 451 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
452 | i_size_read(inode), | 452 | i_size_read(inode), |
453 | (unsigned long long)le64_to_cpu(fe->i_size), | 453 | (unsigned long long)le64_to_cpu(fe->i_size), |
454 | le32_to_cpu(fe->i_flags)); | 454 | le32_to_cpu(fe->i_flags)); |
455 | 455 | ||
456 | if (new_i_size > le64_to_cpu(fe->i_size)) { | 456 | if (new_i_size > le64_to_cpu(fe->i_size)) { |
457 | trace_ocfs2_truncate_file_error( | 457 | trace_ocfs2_truncate_file_error( |
458 | (unsigned long long)le64_to_cpu(fe->i_size), | 458 | (unsigned long long)le64_to_cpu(fe->i_size), |
459 | (unsigned long long)new_i_size); | 459 | (unsigned long long)new_i_size); |
460 | status = -EINVAL; | 460 | status = -EINVAL; |
461 | mlog_errno(status); | 461 | mlog_errno(status); |
462 | goto bail; | 462 | goto bail; |
463 | } | 463 | } |
464 | 464 | ||
465 | /* lets handle the simple truncate cases before doing any more | 465 | /* lets handle the simple truncate cases before doing any more |
466 | * cluster locking. */ | 466 | * cluster locking. */ |
467 | if (new_i_size == le64_to_cpu(fe->i_size)) | 467 | if (new_i_size == le64_to_cpu(fe->i_size)) |
468 | goto bail; | 468 | goto bail; |
469 | 469 | ||
470 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 470 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
471 | 471 | ||
472 | ocfs2_resv_discard(&osb->osb_la_resmap, | 472 | ocfs2_resv_discard(&osb->osb_la_resmap, |
473 | &OCFS2_I(inode)->ip_la_data_resv); | 473 | &OCFS2_I(inode)->ip_la_data_resv); |
474 | 474 | ||
475 | /* | 475 | /* |
476 | * The inode lock forced other nodes to sync and drop their | 476 | * The inode lock forced other nodes to sync and drop their |
477 | * pages, which (correctly) happens even if we have a truncate | 477 | * pages, which (correctly) happens even if we have a truncate |
478 | * without allocation change - ocfs2 cluster sizes can be much | 478 | * without allocation change - ocfs2 cluster sizes can be much |
479 | * greater than page size, so we have to truncate them | 479 | * greater than page size, so we have to truncate them |
480 | * anyway. | 480 | * anyway. |
481 | */ | 481 | */ |
482 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | 482 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); |
483 | truncate_inode_pages(inode->i_mapping, new_i_size); | 483 | truncate_inode_pages(inode->i_mapping, new_i_size); |
484 | 484 | ||
485 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 485 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
486 | status = ocfs2_truncate_inline(inode, di_bh, new_i_size, | 486 | status = ocfs2_truncate_inline(inode, di_bh, new_i_size, |
487 | i_size_read(inode), 1); | 487 | i_size_read(inode), 1); |
488 | if (status) | 488 | if (status) |
489 | mlog_errno(status); | 489 | mlog_errno(status); |
490 | 490 | ||
491 | goto bail_unlock_sem; | 491 | goto bail_unlock_sem; |
492 | } | 492 | } |
493 | 493 | ||
494 | /* alright, we're going to need to do a full blown alloc size | 494 | /* alright, we're going to need to do a full blown alloc size |
495 | * change. Orphan the inode so that recovery can complete the | 495 | * change. Orphan the inode so that recovery can complete the |
496 | * truncate if necessary. This does the task of marking | 496 | * truncate if necessary. This does the task of marking |
497 | * i_size. */ | 497 | * i_size. */ |
498 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); | 498 | status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); |
499 | if (status < 0) { | 499 | if (status < 0) { |
500 | mlog_errno(status); | 500 | mlog_errno(status); |
501 | goto bail_unlock_sem; | 501 | goto bail_unlock_sem; |
502 | } | 502 | } |
503 | 503 | ||
504 | status = ocfs2_commit_truncate(osb, inode, di_bh); | 504 | status = ocfs2_commit_truncate(osb, inode, di_bh); |
505 | if (status < 0) { | 505 | if (status < 0) { |
506 | mlog_errno(status); | 506 | mlog_errno(status); |
507 | goto bail_unlock_sem; | 507 | goto bail_unlock_sem; |
508 | } | 508 | } |
509 | 509 | ||
510 | /* TODO: orphan dir cleanup here. */ | 510 | /* TODO: orphan dir cleanup here. */ |
511 | bail_unlock_sem: | 511 | bail_unlock_sem: |
512 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 512 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
513 | 513 | ||
514 | bail: | 514 | bail: |
515 | if (!status && OCFS2_I(inode)->ip_clusters == 0) | 515 | if (!status && OCFS2_I(inode)->ip_clusters == 0) |
516 | status = ocfs2_try_remove_refcount_tree(inode, di_bh); | 516 | status = ocfs2_try_remove_refcount_tree(inode, di_bh); |
517 | 517 | ||
518 | return status; | 518 | return status; |
519 | } | 519 | } |
520 | 520 | ||
521 | /* | 521 | /* |
522 | * extend file allocation only here. | 522 | * extend file allocation only here. |
523 | * we'll update all the disk stuff, and oip->alloc_size | 523 | * we'll update all the disk stuff, and oip->alloc_size |
524 | * | 524 | * |
525 | * expect stuff to be locked, a transaction started and enough data / | 525 | * expect stuff to be locked, a transaction started and enough data / |
526 | * metadata reservations in the contexts. | 526 | * metadata reservations in the contexts. |
527 | * | 527 | * |
528 | * Will return -EAGAIN, and a reason if a restart is needed. | 528 | * Will return -EAGAIN, and a reason if a restart is needed. |
529 | * If passed in, *reason will always be set, even in error. | 529 | * If passed in, *reason will always be set, even in error. |
530 | */ | 530 | */ |
531 | int ocfs2_add_inode_data(struct ocfs2_super *osb, | 531 | int ocfs2_add_inode_data(struct ocfs2_super *osb, |
532 | struct inode *inode, | 532 | struct inode *inode, |
533 | u32 *logical_offset, | 533 | u32 *logical_offset, |
534 | u32 clusters_to_add, | 534 | u32 clusters_to_add, |
535 | int mark_unwritten, | 535 | int mark_unwritten, |
536 | struct buffer_head *fe_bh, | 536 | struct buffer_head *fe_bh, |
537 | handle_t *handle, | 537 | handle_t *handle, |
538 | struct ocfs2_alloc_context *data_ac, | 538 | struct ocfs2_alloc_context *data_ac, |
539 | struct ocfs2_alloc_context *meta_ac, | 539 | struct ocfs2_alloc_context *meta_ac, |
540 | enum ocfs2_alloc_restarted *reason_ret) | 540 | enum ocfs2_alloc_restarted *reason_ret) |
541 | { | 541 | { |
542 | int ret; | 542 | int ret; |
543 | struct ocfs2_extent_tree et; | 543 | struct ocfs2_extent_tree et; |
544 | 544 | ||
545 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh); | 545 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh); |
546 | ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset, | 546 | ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset, |
547 | clusters_to_add, mark_unwritten, | 547 | clusters_to_add, mark_unwritten, |
548 | data_ac, meta_ac, reason_ret); | 548 | data_ac, meta_ac, reason_ret); |
549 | 549 | ||
550 | return ret; | 550 | return ret; |
551 | } | 551 | } |
552 | 552 | ||
553 | static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | 553 | static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, |
554 | u32 clusters_to_add, int mark_unwritten) | 554 | u32 clusters_to_add, int mark_unwritten) |
555 | { | 555 | { |
556 | int status = 0; | 556 | int status = 0; |
557 | int restart_func = 0; | 557 | int restart_func = 0; |
558 | int credits; | 558 | int credits; |
559 | u32 prev_clusters; | 559 | u32 prev_clusters; |
560 | struct buffer_head *bh = NULL; | 560 | struct buffer_head *bh = NULL; |
561 | struct ocfs2_dinode *fe = NULL; | 561 | struct ocfs2_dinode *fe = NULL; |
562 | handle_t *handle = NULL; | 562 | handle_t *handle = NULL; |
563 | struct ocfs2_alloc_context *data_ac = NULL; | 563 | struct ocfs2_alloc_context *data_ac = NULL; |
564 | struct ocfs2_alloc_context *meta_ac = NULL; | 564 | struct ocfs2_alloc_context *meta_ac = NULL; |
565 | enum ocfs2_alloc_restarted why; | 565 | enum ocfs2_alloc_restarted why; |
566 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 566 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
567 | struct ocfs2_extent_tree et; | 567 | struct ocfs2_extent_tree et; |
568 | int did_quota = 0; | 568 | int did_quota = 0; |
569 | 569 | ||
570 | /* | 570 | /* |
571 | * This function only exists for file systems which don't | 571 | * This function only exists for file systems which don't |
572 | * support holes. | 572 | * support holes. |
573 | */ | 573 | */ |
574 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); | 574 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); |
575 | 575 | ||
576 | status = ocfs2_read_inode_block(inode, &bh); | 576 | status = ocfs2_read_inode_block(inode, &bh); |
577 | if (status < 0) { | 577 | if (status < 0) { |
578 | mlog_errno(status); | 578 | mlog_errno(status); |
579 | goto leave; | 579 | goto leave; |
580 | } | 580 | } |
581 | fe = (struct ocfs2_dinode *) bh->b_data; | 581 | fe = (struct ocfs2_dinode *) bh->b_data; |
582 | 582 | ||
583 | restart_all: | 583 | restart_all: |
584 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); | 584 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); |
585 | 585 | ||
586 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh); | 586 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh); |
587 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | 587 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, |
588 | &data_ac, &meta_ac); | 588 | &data_ac, &meta_ac); |
589 | if (status) { | 589 | if (status) { |
590 | mlog_errno(status); | 590 | mlog_errno(status); |
591 | goto leave; | 591 | goto leave; |
592 | } | 592 | } |
593 | 593 | ||
594 | credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list, | 594 | credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list, |
595 | clusters_to_add); | 595 | clusters_to_add); |
596 | handle = ocfs2_start_trans(osb, credits); | 596 | handle = ocfs2_start_trans(osb, credits); |
597 | if (IS_ERR(handle)) { | 597 | if (IS_ERR(handle)) { |
598 | status = PTR_ERR(handle); | 598 | status = PTR_ERR(handle); |
599 | handle = NULL; | 599 | handle = NULL; |
600 | mlog_errno(status); | 600 | mlog_errno(status); |
601 | goto leave; | 601 | goto leave; |
602 | } | 602 | } |
603 | 603 | ||
604 | restarted_transaction: | 604 | restarted_transaction: |
605 | trace_ocfs2_extend_allocation( | 605 | trace_ocfs2_extend_allocation( |
606 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 606 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
607 | (unsigned long long)i_size_read(inode), | 607 | (unsigned long long)i_size_read(inode), |
608 | le32_to_cpu(fe->i_clusters), clusters_to_add, | 608 | le32_to_cpu(fe->i_clusters), clusters_to_add, |
609 | why, restart_func); | 609 | why, restart_func); |
610 | 610 | ||
611 | status = dquot_alloc_space_nodirty(inode, | 611 | status = dquot_alloc_space_nodirty(inode, |
612 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); | 612 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); |
613 | if (status) | 613 | if (status) |
614 | goto leave; | 614 | goto leave; |
615 | did_quota = 1; | 615 | did_quota = 1; |
616 | 616 | ||
617 | /* reserve a write to the file entry early on - that we if we | 617 | /* reserve a write to the file entry early on - that we if we |
618 | * run out of credits in the allocation path, we can still | 618 | * run out of credits in the allocation path, we can still |
619 | * update i_size. */ | 619 | * update i_size. */ |
620 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, | 620 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, |
621 | OCFS2_JOURNAL_ACCESS_WRITE); | 621 | OCFS2_JOURNAL_ACCESS_WRITE); |
622 | if (status < 0) { | 622 | if (status < 0) { |
623 | mlog_errno(status); | 623 | mlog_errno(status); |
624 | goto leave; | 624 | goto leave; |
625 | } | 625 | } |
626 | 626 | ||
627 | prev_clusters = OCFS2_I(inode)->ip_clusters; | 627 | prev_clusters = OCFS2_I(inode)->ip_clusters; |
628 | 628 | ||
629 | status = ocfs2_add_inode_data(osb, | 629 | status = ocfs2_add_inode_data(osb, |
630 | inode, | 630 | inode, |
631 | &logical_start, | 631 | &logical_start, |
632 | clusters_to_add, | 632 | clusters_to_add, |
633 | mark_unwritten, | 633 | mark_unwritten, |
634 | bh, | 634 | bh, |
635 | handle, | 635 | handle, |
636 | data_ac, | 636 | data_ac, |
637 | meta_ac, | 637 | meta_ac, |
638 | &why); | 638 | &why); |
639 | if ((status < 0) && (status != -EAGAIN)) { | 639 | if ((status < 0) && (status != -EAGAIN)) { |
640 | if (status != -ENOSPC) | 640 | if (status != -ENOSPC) |
641 | mlog_errno(status); | 641 | mlog_errno(status); |
642 | goto leave; | 642 | goto leave; |
643 | } | 643 | } |
644 | 644 | ||
645 | ocfs2_journal_dirty(handle, bh); | 645 | ocfs2_journal_dirty(handle, bh); |
646 | 646 | ||
647 | spin_lock(&OCFS2_I(inode)->ip_lock); | 647 | spin_lock(&OCFS2_I(inode)->ip_lock); |
648 | clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); | 648 | clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); |
649 | spin_unlock(&OCFS2_I(inode)->ip_lock); | 649 | spin_unlock(&OCFS2_I(inode)->ip_lock); |
650 | /* Release unused quota reservation */ | 650 | /* Release unused quota reservation */ |
651 | dquot_free_space(inode, | 651 | dquot_free_space(inode, |
652 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); | 652 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); |
653 | did_quota = 0; | 653 | did_quota = 0; |
654 | 654 | ||
655 | if (why != RESTART_NONE && clusters_to_add) { | 655 | if (why != RESTART_NONE && clusters_to_add) { |
656 | if (why == RESTART_META) { | 656 | if (why == RESTART_META) { |
657 | restart_func = 1; | 657 | restart_func = 1; |
658 | status = 0; | 658 | status = 0; |
659 | } else { | 659 | } else { |
660 | BUG_ON(why != RESTART_TRANS); | 660 | BUG_ON(why != RESTART_TRANS); |
661 | 661 | ||
662 | /* TODO: This can be more intelligent. */ | 662 | /* TODO: This can be more intelligent. */ |
663 | credits = ocfs2_calc_extend_credits(osb->sb, | 663 | credits = ocfs2_calc_extend_credits(osb->sb, |
664 | &fe->id2.i_list, | 664 | &fe->id2.i_list, |
665 | clusters_to_add); | 665 | clusters_to_add); |
666 | status = ocfs2_extend_trans(handle, credits); | 666 | status = ocfs2_extend_trans(handle, credits); |
667 | if (status < 0) { | 667 | if (status < 0) { |
668 | /* handle still has to be committed at | 668 | /* handle still has to be committed at |
669 | * this point. */ | 669 | * this point. */ |
670 | status = -ENOMEM; | 670 | status = -ENOMEM; |
671 | mlog_errno(status); | 671 | mlog_errno(status); |
672 | goto leave; | 672 | goto leave; |
673 | } | 673 | } |
674 | goto restarted_transaction; | 674 | goto restarted_transaction; |
675 | } | 675 | } |
676 | } | 676 | } |
677 | 677 | ||
678 | trace_ocfs2_extend_allocation_end(OCFS2_I(inode)->ip_blkno, | 678 | trace_ocfs2_extend_allocation_end(OCFS2_I(inode)->ip_blkno, |
679 | le32_to_cpu(fe->i_clusters), | 679 | le32_to_cpu(fe->i_clusters), |
680 | (unsigned long long)le64_to_cpu(fe->i_size), | 680 | (unsigned long long)le64_to_cpu(fe->i_size), |
681 | OCFS2_I(inode)->ip_clusters, | 681 | OCFS2_I(inode)->ip_clusters, |
682 | (unsigned long long)i_size_read(inode)); | 682 | (unsigned long long)i_size_read(inode)); |
683 | 683 | ||
684 | leave: | 684 | leave: |
685 | if (status < 0 && did_quota) | 685 | if (status < 0 && did_quota) |
686 | dquot_free_space(inode, | 686 | dquot_free_space(inode, |
687 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); | 687 | ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); |
688 | if (handle) { | 688 | if (handle) { |
689 | ocfs2_commit_trans(osb, handle); | 689 | ocfs2_commit_trans(osb, handle); |
690 | handle = NULL; | 690 | handle = NULL; |
691 | } | 691 | } |
692 | if (data_ac) { | 692 | if (data_ac) { |
693 | ocfs2_free_alloc_context(data_ac); | 693 | ocfs2_free_alloc_context(data_ac); |
694 | data_ac = NULL; | 694 | data_ac = NULL; |
695 | } | 695 | } |
696 | if (meta_ac) { | 696 | if (meta_ac) { |
697 | ocfs2_free_alloc_context(meta_ac); | 697 | ocfs2_free_alloc_context(meta_ac); |
698 | meta_ac = NULL; | 698 | meta_ac = NULL; |
699 | } | 699 | } |
700 | if ((!status) && restart_func) { | 700 | if ((!status) && restart_func) { |
701 | restart_func = 0; | 701 | restart_func = 0; |
702 | goto restart_all; | 702 | goto restart_all; |
703 | } | 703 | } |
704 | brelse(bh); | 704 | brelse(bh); |
705 | bh = NULL; | 705 | bh = NULL; |
706 | 706 | ||
707 | return status; | 707 | return status; |
708 | } | 708 | } |
709 | 709 | ||
710 | /* | 710 | /* |
711 | * While a write will already be ordering the data, a truncate will not. | 711 | * While a write will already be ordering the data, a truncate will not. |
712 | * Thus, we need to explicitly order the zeroed pages. | 712 | * Thus, we need to explicitly order the zeroed pages. |
713 | */ | 713 | */ |
714 | static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) | 714 | static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) |
715 | { | 715 | { |
716 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 716 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
717 | handle_t *handle = NULL; | 717 | handle_t *handle = NULL; |
718 | int ret = 0; | 718 | int ret = 0; |
719 | 719 | ||
720 | if (!ocfs2_should_order_data(inode)) | 720 | if (!ocfs2_should_order_data(inode)) |
721 | goto out; | 721 | goto out; |
722 | 722 | ||
723 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 723 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
724 | if (IS_ERR(handle)) { | 724 | if (IS_ERR(handle)) { |
725 | ret = -ENOMEM; | 725 | ret = -ENOMEM; |
726 | mlog_errno(ret); | 726 | mlog_errno(ret); |
727 | goto out; | 727 | goto out; |
728 | } | 728 | } |
729 | 729 | ||
730 | ret = ocfs2_jbd2_file_inode(handle, inode); | 730 | ret = ocfs2_jbd2_file_inode(handle, inode); |
731 | if (ret < 0) | 731 | if (ret < 0) |
732 | mlog_errno(ret); | 732 | mlog_errno(ret); |
733 | 733 | ||
734 | out: | 734 | out: |
735 | if (ret) { | 735 | if (ret) { |
736 | if (!IS_ERR(handle)) | 736 | if (!IS_ERR(handle)) |
737 | ocfs2_commit_trans(osb, handle); | 737 | ocfs2_commit_trans(osb, handle); |
738 | handle = ERR_PTR(ret); | 738 | handle = ERR_PTR(ret); |
739 | } | 739 | } |
740 | return handle; | 740 | return handle; |
741 | } | 741 | } |
742 | 742 | ||
743 | /* Some parts of this taken from generic_cont_expand, which turned out | 743 | /* Some parts of this taken from generic_cont_expand, which turned out |
744 | * to be too fragile to do exactly what we need without us having to | 744 | * to be too fragile to do exactly what we need without us having to |
745 | * worry about recursive locking in ->write_begin() and ->write_end(). */ | 745 | * worry about recursive locking in ->write_begin() and ->write_end(). */ |
746 | static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | 746 | static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, |
747 | u64 abs_to) | 747 | u64 abs_to) |
748 | { | 748 | { |
749 | struct address_space *mapping = inode->i_mapping; | 749 | struct address_space *mapping = inode->i_mapping; |
750 | struct page *page; | 750 | struct page *page; |
751 | unsigned long index = abs_from >> PAGE_CACHE_SHIFT; | 751 | unsigned long index = abs_from >> PAGE_CACHE_SHIFT; |
752 | handle_t *handle = NULL; | 752 | handle_t *handle = NULL; |
753 | int ret = 0; | 753 | int ret = 0; |
754 | unsigned zero_from, zero_to, block_start, block_end; | 754 | unsigned zero_from, zero_to, block_start, block_end; |
755 | 755 | ||
756 | BUG_ON(abs_from >= abs_to); | 756 | BUG_ON(abs_from >= abs_to); |
757 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); | 757 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); |
758 | BUG_ON(abs_from & (inode->i_blkbits - 1)); | 758 | BUG_ON(abs_from & (inode->i_blkbits - 1)); |
759 | 759 | ||
760 | page = find_or_create_page(mapping, index, GFP_NOFS); | 760 | page = find_or_create_page(mapping, index, GFP_NOFS); |
761 | if (!page) { | 761 | if (!page) { |
762 | ret = -ENOMEM; | 762 | ret = -ENOMEM; |
763 | mlog_errno(ret); | 763 | mlog_errno(ret); |
764 | goto out; | 764 | goto out; |
765 | } | 765 | } |
766 | 766 | ||
767 | /* Get the offsets within the page that we want to zero */ | 767 | /* Get the offsets within the page that we want to zero */ |
768 | zero_from = abs_from & (PAGE_CACHE_SIZE - 1); | 768 | zero_from = abs_from & (PAGE_CACHE_SIZE - 1); |
769 | zero_to = abs_to & (PAGE_CACHE_SIZE - 1); | 769 | zero_to = abs_to & (PAGE_CACHE_SIZE - 1); |
770 | if (!zero_to) | 770 | if (!zero_to) |
771 | zero_to = PAGE_CACHE_SIZE; | 771 | zero_to = PAGE_CACHE_SIZE; |
772 | 772 | ||
773 | trace_ocfs2_write_zero_page( | 773 | trace_ocfs2_write_zero_page( |
774 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 774 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
775 | (unsigned long long)abs_from, | 775 | (unsigned long long)abs_from, |
776 | (unsigned long long)abs_to, | 776 | (unsigned long long)abs_to, |
777 | index, zero_from, zero_to); | 777 | index, zero_from, zero_to); |
778 | 778 | ||
779 | /* We know that zero_from is block aligned */ | 779 | /* We know that zero_from is block aligned */ |
780 | for (block_start = zero_from; block_start < zero_to; | 780 | for (block_start = zero_from; block_start < zero_to; |
781 | block_start = block_end) { | 781 | block_start = block_end) { |
782 | block_end = block_start + (1 << inode->i_blkbits); | 782 | block_end = block_start + (1 << inode->i_blkbits); |
783 | 783 | ||
784 | /* | 784 | /* |
785 | * block_start is block-aligned. Bump it by one to force | 785 | * block_start is block-aligned. Bump it by one to force |
786 | * __block_write_begin and block_commit_write to zero the | 786 | * __block_write_begin and block_commit_write to zero the |
787 | * whole block. | 787 | * whole block. |
788 | */ | 788 | */ |
789 | ret = __block_write_begin(page, block_start + 1, 0, | 789 | ret = __block_write_begin(page, block_start + 1, 0, |
790 | ocfs2_get_block); | 790 | ocfs2_get_block); |
791 | if (ret < 0) { | 791 | if (ret < 0) { |
792 | mlog_errno(ret); | 792 | mlog_errno(ret); |
793 | goto out_unlock; | 793 | goto out_unlock; |
794 | } | 794 | } |
795 | 795 | ||
796 | if (!handle) { | 796 | if (!handle) { |
797 | handle = ocfs2_zero_start_ordered_transaction(inode); | 797 | handle = ocfs2_zero_start_ordered_transaction(inode); |
798 | if (IS_ERR(handle)) { | 798 | if (IS_ERR(handle)) { |
799 | ret = PTR_ERR(handle); | 799 | ret = PTR_ERR(handle); |
800 | handle = NULL; | 800 | handle = NULL; |
801 | break; | 801 | break; |
802 | } | 802 | } |
803 | } | 803 | } |
804 | 804 | ||
805 | /* must not update i_size! */ | 805 | /* must not update i_size! */ |
806 | ret = block_commit_write(page, block_start + 1, | 806 | ret = block_commit_write(page, block_start + 1, |
807 | block_start + 1); | 807 | block_start + 1); |
808 | if (ret < 0) | 808 | if (ret < 0) |
809 | mlog_errno(ret); | 809 | mlog_errno(ret); |
810 | else | 810 | else |
811 | ret = 0; | 811 | ret = 0; |
812 | } | 812 | } |
813 | 813 | ||
814 | if (handle) | 814 | if (handle) |
815 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 815 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
816 | 816 | ||
817 | out_unlock: | 817 | out_unlock: |
818 | unlock_page(page); | 818 | unlock_page(page); |
819 | page_cache_release(page); | 819 | page_cache_release(page); |
820 | out: | 820 | out: |
821 | return ret; | 821 | return ret; |
822 | } | 822 | } |
823 | 823 | ||
824 | /* | 824 | /* |
825 | * Find the next range to zero. We do this in terms of bytes because | 825 | * Find the next range to zero. We do this in terms of bytes because |
826 | * that's what ocfs2_zero_extend() wants, and it is dealing with the | 826 | * that's what ocfs2_zero_extend() wants, and it is dealing with the |
827 | * pagecache. We may return multiple extents. | 827 | * pagecache. We may return multiple extents. |
828 | * | 828 | * |
829 | * zero_start and zero_end are ocfs2_zero_extend()s current idea of what | 829 | * zero_start and zero_end are ocfs2_zero_extend()s current idea of what |
830 | * needs to be zeroed. range_start and range_end return the next zeroing | 830 | * needs to be zeroed. range_start and range_end return the next zeroing |
831 | * range. A subsequent call should pass the previous range_end as its | 831 | * range. A subsequent call should pass the previous range_end as its |
832 | * zero_start. If range_end is 0, there's nothing to do. | 832 | * zero_start. If range_end is 0, there's nothing to do. |
833 | * | 833 | * |
834 | * Unwritten extents are skipped over. Refcounted extents are CoWd. | 834 | * Unwritten extents are skipped over. Refcounted extents are CoWd. |
835 | */ | 835 | */ |
836 | static int ocfs2_zero_extend_get_range(struct inode *inode, | 836 | static int ocfs2_zero_extend_get_range(struct inode *inode, |
837 | struct buffer_head *di_bh, | 837 | struct buffer_head *di_bh, |
838 | u64 zero_start, u64 zero_end, | 838 | u64 zero_start, u64 zero_end, |
839 | u64 *range_start, u64 *range_end) | 839 | u64 *range_start, u64 *range_end) |
840 | { | 840 | { |
841 | int rc = 0, needs_cow = 0; | 841 | int rc = 0, needs_cow = 0; |
842 | u32 p_cpos, zero_clusters = 0; | 842 | u32 p_cpos, zero_clusters = 0; |
843 | u32 zero_cpos = | 843 | u32 zero_cpos = |
844 | zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | 844 | zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; |
845 | u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end); | 845 | u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end); |
846 | unsigned int num_clusters = 0; | 846 | unsigned int num_clusters = 0; |
847 | unsigned int ext_flags = 0; | 847 | unsigned int ext_flags = 0; |
848 | 848 | ||
849 | while (zero_cpos < last_cpos) { | 849 | while (zero_cpos < last_cpos) { |
850 | rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos, | 850 | rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos, |
851 | &num_clusters, &ext_flags); | 851 | &num_clusters, &ext_flags); |
852 | if (rc) { | 852 | if (rc) { |
853 | mlog_errno(rc); | 853 | mlog_errno(rc); |
854 | goto out; | 854 | goto out; |
855 | } | 855 | } |
856 | 856 | ||
857 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | 857 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { |
858 | zero_clusters = num_clusters; | 858 | zero_clusters = num_clusters; |
859 | if (ext_flags & OCFS2_EXT_REFCOUNTED) | 859 | if (ext_flags & OCFS2_EXT_REFCOUNTED) |
860 | needs_cow = 1; | 860 | needs_cow = 1; |
861 | break; | 861 | break; |
862 | } | 862 | } |
863 | 863 | ||
864 | zero_cpos += num_clusters; | 864 | zero_cpos += num_clusters; |
865 | } | 865 | } |
866 | if (!zero_clusters) { | 866 | if (!zero_clusters) { |
867 | *range_end = 0; | 867 | *range_end = 0; |
868 | goto out; | 868 | goto out; |
869 | } | 869 | } |
870 | 870 | ||
871 | while ((zero_cpos + zero_clusters) < last_cpos) { | 871 | while ((zero_cpos + zero_clusters) < last_cpos) { |
872 | rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters, | 872 | rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters, |
873 | &p_cpos, &num_clusters, | 873 | &p_cpos, &num_clusters, |
874 | &ext_flags); | 874 | &ext_flags); |
875 | if (rc) { | 875 | if (rc) { |
876 | mlog_errno(rc); | 876 | mlog_errno(rc); |
877 | goto out; | 877 | goto out; |
878 | } | 878 | } |
879 | 879 | ||
880 | if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)) | 880 | if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)) |
881 | break; | 881 | break; |
882 | if (ext_flags & OCFS2_EXT_REFCOUNTED) | 882 | if (ext_flags & OCFS2_EXT_REFCOUNTED) |
883 | needs_cow = 1; | 883 | needs_cow = 1; |
884 | zero_clusters += num_clusters; | 884 | zero_clusters += num_clusters; |
885 | } | 885 | } |
886 | if ((zero_cpos + zero_clusters) > last_cpos) | 886 | if ((zero_cpos + zero_clusters) > last_cpos) |
887 | zero_clusters = last_cpos - zero_cpos; | 887 | zero_clusters = last_cpos - zero_cpos; |
888 | 888 | ||
889 | if (needs_cow) { | 889 | if (needs_cow) { |
890 | rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, | 890 | rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, |
891 | zero_clusters, UINT_MAX); | 891 | zero_clusters, UINT_MAX); |
892 | if (rc) { | 892 | if (rc) { |
893 | mlog_errno(rc); | 893 | mlog_errno(rc); |
894 | goto out; | 894 | goto out; |
895 | } | 895 | } |
896 | } | 896 | } |
897 | 897 | ||
898 | *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos); | 898 | *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos); |
899 | *range_end = ocfs2_clusters_to_bytes(inode->i_sb, | 899 | *range_end = ocfs2_clusters_to_bytes(inode->i_sb, |
900 | zero_cpos + zero_clusters); | 900 | zero_cpos + zero_clusters); |
901 | 901 | ||
902 | out: | 902 | out: |
903 | return rc; | 903 | return rc; |
904 | } | 904 | } |
905 | 905 | ||
906 | /* | 906 | /* |
907 | * Zero one range returned from ocfs2_zero_extend_get_range(). The caller | 907 | * Zero one range returned from ocfs2_zero_extend_get_range(). The caller |
908 | * has made sure that the entire range needs zeroing. | 908 | * has made sure that the entire range needs zeroing. |
909 | */ | 909 | */ |
910 | static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start, | 910 | static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start, |
911 | u64 range_end) | 911 | u64 range_end) |
912 | { | 912 | { |
913 | int rc = 0; | 913 | int rc = 0; |
914 | u64 next_pos; | 914 | u64 next_pos; |
915 | u64 zero_pos = range_start; | 915 | u64 zero_pos = range_start; |
916 | 916 | ||
917 | trace_ocfs2_zero_extend_range( | 917 | trace_ocfs2_zero_extend_range( |
918 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 918 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
919 | (unsigned long long)range_start, | 919 | (unsigned long long)range_start, |
920 | (unsigned long long)range_end); | 920 | (unsigned long long)range_end); |
921 | BUG_ON(range_start >= range_end); | 921 | BUG_ON(range_start >= range_end); |
922 | 922 | ||
923 | while (zero_pos < range_end) { | 923 | while (zero_pos < range_end) { |
924 | next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE; | 924 | next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE; |
925 | if (next_pos > range_end) | 925 | if (next_pos > range_end) |
926 | next_pos = range_end; | 926 | next_pos = range_end; |
927 | rc = ocfs2_write_zero_page(inode, zero_pos, next_pos); | 927 | rc = ocfs2_write_zero_page(inode, zero_pos, next_pos); |
928 | if (rc < 0) { | 928 | if (rc < 0) { |
929 | mlog_errno(rc); | 929 | mlog_errno(rc); |
930 | break; | 930 | break; |
931 | } | 931 | } |
932 | zero_pos = next_pos; | 932 | zero_pos = next_pos; |
933 | 933 | ||
934 | /* | 934 | /* |
935 | * Very large extends have the potential to lock up | 935 | * Very large extends have the potential to lock up |
936 | * the cpu for extended periods of time. | 936 | * the cpu for extended periods of time. |
937 | */ | 937 | */ |
938 | cond_resched(); | 938 | cond_resched(); |
939 | } | 939 | } |
940 | 940 | ||
941 | return rc; | 941 | return rc; |
942 | } | 942 | } |
943 | 943 | ||
944 | int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, | 944 | int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, |
945 | loff_t zero_to_size) | 945 | loff_t zero_to_size) |
946 | { | 946 | { |
947 | int ret = 0; | 947 | int ret = 0; |
948 | u64 zero_start, range_start = 0, range_end = 0; | 948 | u64 zero_start, range_start = 0, range_end = 0; |
949 | struct super_block *sb = inode->i_sb; | 949 | struct super_block *sb = inode->i_sb; |
950 | 950 | ||
951 | zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); | 951 | zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); |
952 | trace_ocfs2_zero_extend((unsigned long long)OCFS2_I(inode)->ip_blkno, | 952 | trace_ocfs2_zero_extend((unsigned long long)OCFS2_I(inode)->ip_blkno, |
953 | (unsigned long long)zero_start, | 953 | (unsigned long long)zero_start, |
954 | (unsigned long long)i_size_read(inode)); | 954 | (unsigned long long)i_size_read(inode)); |
955 | while (zero_start < zero_to_size) { | 955 | while (zero_start < zero_to_size) { |
956 | ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start, | 956 | ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start, |
957 | zero_to_size, | 957 | zero_to_size, |
958 | &range_start, | 958 | &range_start, |
959 | &range_end); | 959 | &range_end); |
960 | if (ret) { | 960 | if (ret) { |
961 | mlog_errno(ret); | 961 | mlog_errno(ret); |
962 | break; | 962 | break; |
963 | } | 963 | } |
964 | if (!range_end) | 964 | if (!range_end) |
965 | break; | 965 | break; |
966 | /* Trim the ends */ | 966 | /* Trim the ends */ |
967 | if (range_start < zero_start) | 967 | if (range_start < zero_start) |
968 | range_start = zero_start; | 968 | range_start = zero_start; |
969 | if (range_end > zero_to_size) | 969 | if (range_end > zero_to_size) |
970 | range_end = zero_to_size; | 970 | range_end = zero_to_size; |
971 | 971 | ||
972 | ret = ocfs2_zero_extend_range(inode, range_start, | 972 | ret = ocfs2_zero_extend_range(inode, range_start, |
973 | range_end); | 973 | range_end); |
974 | if (ret) { | 974 | if (ret) { |
975 | mlog_errno(ret); | 975 | mlog_errno(ret); |
976 | break; | 976 | break; |
977 | } | 977 | } |
978 | zero_start = range_end; | 978 | zero_start = range_end; |
979 | } | 979 | } |
980 | 980 | ||
981 | return ret; | 981 | return ret; |
982 | } | 982 | } |
983 | 983 | ||
984 | int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, | 984 | int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, |
985 | u64 new_i_size, u64 zero_to) | 985 | u64 new_i_size, u64 zero_to) |
986 | { | 986 | { |
987 | int ret; | 987 | int ret; |
988 | u32 clusters_to_add; | 988 | u32 clusters_to_add; |
989 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 989 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
990 | 990 | ||
991 | /* | 991 | /* |
992 | * Only quota files call this without a bh, and they can't be | 992 | * Only quota files call this without a bh, and they can't be |
993 | * refcounted. | 993 | * refcounted. |
994 | */ | 994 | */ |
995 | BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | 995 | BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); |
996 | BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE)); | 996 | BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE)); |
997 | 997 | ||
998 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); | 998 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); |
999 | if (clusters_to_add < oi->ip_clusters) | 999 | if (clusters_to_add < oi->ip_clusters) |
1000 | clusters_to_add = 0; | 1000 | clusters_to_add = 0; |
1001 | else | 1001 | else |
1002 | clusters_to_add -= oi->ip_clusters; | 1002 | clusters_to_add -= oi->ip_clusters; |
1003 | 1003 | ||
1004 | if (clusters_to_add) { | 1004 | if (clusters_to_add) { |
1005 | ret = __ocfs2_extend_allocation(inode, oi->ip_clusters, | 1005 | ret = __ocfs2_extend_allocation(inode, oi->ip_clusters, |
1006 | clusters_to_add, 0); | 1006 | clusters_to_add, 0); |
1007 | if (ret) { | 1007 | if (ret) { |
1008 | mlog_errno(ret); | 1008 | mlog_errno(ret); |
1009 | goto out; | 1009 | goto out; |
1010 | } | 1010 | } |
1011 | } | 1011 | } |
1012 | 1012 | ||
1013 | /* | 1013 | /* |
1014 | * Call this even if we don't add any clusters to the tree. We | 1014 | * Call this even if we don't add any clusters to the tree. We |
1015 | * still need to zero the area between the old i_size and the | 1015 | * still need to zero the area between the old i_size and the |
1016 | * new i_size. | 1016 | * new i_size. |
1017 | */ | 1017 | */ |
1018 | ret = ocfs2_zero_extend(inode, di_bh, zero_to); | 1018 | ret = ocfs2_zero_extend(inode, di_bh, zero_to); |
1019 | if (ret < 0) | 1019 | if (ret < 0) |
1020 | mlog_errno(ret); | 1020 | mlog_errno(ret); |
1021 | 1021 | ||
1022 | out: | 1022 | out: |
1023 | return ret; | 1023 | return ret; |
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | static int ocfs2_extend_file(struct inode *inode, | 1026 | static int ocfs2_extend_file(struct inode *inode, |
1027 | struct buffer_head *di_bh, | 1027 | struct buffer_head *di_bh, |
1028 | u64 new_i_size) | 1028 | u64 new_i_size) |
1029 | { | 1029 | { |
1030 | int ret = 0; | 1030 | int ret = 0; |
1031 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1031 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1032 | 1032 | ||
1033 | BUG_ON(!di_bh); | 1033 | BUG_ON(!di_bh); |
1034 | 1034 | ||
1035 | /* setattr sometimes calls us like this. */ | 1035 | /* setattr sometimes calls us like this. */ |
1036 | if (new_i_size == 0) | 1036 | if (new_i_size == 0) |
1037 | goto out; | 1037 | goto out; |
1038 | 1038 | ||
1039 | if (i_size_read(inode) == new_i_size) | 1039 | if (i_size_read(inode) == new_i_size) |
1040 | goto out; | 1040 | goto out; |
1041 | BUG_ON(new_i_size < i_size_read(inode)); | 1041 | BUG_ON(new_i_size < i_size_read(inode)); |
1042 | 1042 | ||
1043 | /* | 1043 | /* |
1044 | * The alloc sem blocks people in read/write from reading our | 1044 | * The alloc sem blocks people in read/write from reading our |
1045 | * allocation until we're done changing it. We depend on | 1045 | * allocation until we're done changing it. We depend on |
1046 | * i_mutex to block other extend/truncate calls while we're | 1046 | * i_mutex to block other extend/truncate calls while we're |
1047 | * here. We even have to hold it for sparse files because there | 1047 | * here. We even have to hold it for sparse files because there |
1048 | * might be some tail zeroing. | 1048 | * might be some tail zeroing. |
1049 | */ | 1049 | */ |
1050 | down_write(&oi->ip_alloc_sem); | 1050 | down_write(&oi->ip_alloc_sem); |
1051 | 1051 | ||
1052 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 1052 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
1053 | /* | 1053 | /* |
1054 | * We can optimize small extends by keeping the inodes | 1054 | * We can optimize small extends by keeping the inodes |
1055 | * inline data. | 1055 | * inline data. |
1056 | */ | 1056 | */ |
1057 | if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) { | 1057 | if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) { |
1058 | up_write(&oi->ip_alloc_sem); | 1058 | up_write(&oi->ip_alloc_sem); |
1059 | goto out_update_size; | 1059 | goto out_update_size; |
1060 | } | 1060 | } |
1061 | 1061 | ||
1062 | ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); | 1062 | ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); |
1063 | if (ret) { | 1063 | if (ret) { |
1064 | up_write(&oi->ip_alloc_sem); | 1064 | up_write(&oi->ip_alloc_sem); |
1065 | mlog_errno(ret); | 1065 | mlog_errno(ret); |
1066 | goto out; | 1066 | goto out; |
1067 | } | 1067 | } |
1068 | } | 1068 | } |
1069 | 1069 | ||
1070 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 1070 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) |
1071 | ret = ocfs2_zero_extend(inode, di_bh, new_i_size); | 1071 | ret = ocfs2_zero_extend(inode, di_bh, new_i_size); |
1072 | else | 1072 | else |
1073 | ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size, | 1073 | ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size, |
1074 | new_i_size); | 1074 | new_i_size); |
1075 | 1075 | ||
1076 | up_write(&oi->ip_alloc_sem); | 1076 | up_write(&oi->ip_alloc_sem); |
1077 | 1077 | ||
1078 | if (ret < 0) { | 1078 | if (ret < 0) { |
1079 | mlog_errno(ret); | 1079 | mlog_errno(ret); |
1080 | goto out; | 1080 | goto out; |
1081 | } | 1081 | } |
1082 | 1082 | ||
1083 | out_update_size: | 1083 | out_update_size: |
1084 | ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); | 1084 | ret = ocfs2_simple_size_update(inode, di_bh, new_i_size); |
1085 | if (ret < 0) | 1085 | if (ret < 0) |
1086 | mlog_errno(ret); | 1086 | mlog_errno(ret); |
1087 | 1087 | ||
1088 | out: | 1088 | out: |
1089 | return ret; | 1089 | return ret; |
1090 | } | 1090 | } |
1091 | 1091 | ||
1092 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | 1092 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) |
1093 | { | 1093 | { |
1094 | int status = 0, size_change; | 1094 | int status = 0, size_change; |
1095 | struct inode *inode = dentry->d_inode; | 1095 | struct inode *inode = dentry->d_inode; |
1096 | struct super_block *sb = inode->i_sb; | 1096 | struct super_block *sb = inode->i_sb; |
1097 | struct ocfs2_super *osb = OCFS2_SB(sb); | 1097 | struct ocfs2_super *osb = OCFS2_SB(sb); |
1098 | struct buffer_head *bh = NULL; | 1098 | struct buffer_head *bh = NULL; |
1099 | handle_t *handle = NULL; | 1099 | handle_t *handle = NULL; |
1100 | struct dquot *transfer_to[MAXQUOTAS] = { }; | 1100 | struct dquot *transfer_to[MAXQUOTAS] = { }; |
1101 | int qtype; | 1101 | int qtype; |
1102 | 1102 | ||
1103 | trace_ocfs2_setattr(inode, dentry, | 1103 | trace_ocfs2_setattr(inode, dentry, |
1104 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1104 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1105 | dentry->d_name.len, dentry->d_name.name, | 1105 | dentry->d_name.len, dentry->d_name.name, |
1106 | attr->ia_valid, attr->ia_mode, | 1106 | attr->ia_valid, attr->ia_mode, |
1107 | attr->ia_uid, attr->ia_gid); | 1107 | attr->ia_uid, attr->ia_gid); |
1108 | 1108 | ||
1109 | /* ensuring we don't even attempt to truncate a symlink */ | 1109 | /* ensuring we don't even attempt to truncate a symlink */ |
1110 | if (S_ISLNK(inode->i_mode)) | 1110 | if (S_ISLNK(inode->i_mode)) |
1111 | attr->ia_valid &= ~ATTR_SIZE; | 1111 | attr->ia_valid &= ~ATTR_SIZE; |
1112 | 1112 | ||
1113 | #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ | 1113 | #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ |
1114 | | ATTR_GID | ATTR_UID | ATTR_MODE) | 1114 | | ATTR_GID | ATTR_UID | ATTR_MODE) |
1115 | if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) | 1115 | if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) |
1116 | return 0; | 1116 | return 0; |
1117 | 1117 | ||
1118 | status = inode_change_ok(inode, attr); | 1118 | status = inode_change_ok(inode, attr); |
1119 | if (status) | 1119 | if (status) |
1120 | return status; | 1120 | return status; |
1121 | 1121 | ||
1122 | if (is_quota_modification(inode, attr)) | 1122 | if (is_quota_modification(inode, attr)) |
1123 | dquot_initialize(inode); | 1123 | dquot_initialize(inode); |
1124 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; | 1124 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; |
1125 | if (size_change) { | 1125 | if (size_change) { |
1126 | status = ocfs2_rw_lock(inode, 1); | 1126 | status = ocfs2_rw_lock(inode, 1); |
1127 | if (status < 0) { | 1127 | if (status < 0) { |
1128 | mlog_errno(status); | 1128 | mlog_errno(status); |
1129 | goto bail; | 1129 | goto bail; |
1130 | } | 1130 | } |
1131 | } | 1131 | } |
1132 | 1132 | ||
1133 | status = ocfs2_inode_lock(inode, &bh, 1); | 1133 | status = ocfs2_inode_lock(inode, &bh, 1); |
1134 | if (status < 0) { | 1134 | if (status < 0) { |
1135 | if (status != -ENOENT) | 1135 | if (status != -ENOENT) |
1136 | mlog_errno(status); | 1136 | mlog_errno(status); |
1137 | goto bail_unlock_rw; | 1137 | goto bail_unlock_rw; |
1138 | } | 1138 | } |
1139 | 1139 | ||
1140 | if (size_change && attr->ia_size != i_size_read(inode)) { | 1140 | if (size_change && attr->ia_size != i_size_read(inode)) { |
1141 | status = inode_newsize_ok(inode, attr->ia_size); | 1141 | status = inode_newsize_ok(inode, attr->ia_size); |
1142 | if (status) | 1142 | if (status) |
1143 | goto bail_unlock; | 1143 | goto bail_unlock; |
1144 | 1144 | ||
1145 | inode_dio_wait(inode); | ||
1146 | |||
1145 | if (i_size_read(inode) > attr->ia_size) { | 1147 | if (i_size_read(inode) > attr->ia_size) { |
1146 | if (ocfs2_should_order_data(inode)) { | 1148 | if (ocfs2_should_order_data(inode)) { |
1147 | status = ocfs2_begin_ordered_truncate(inode, | 1149 | status = ocfs2_begin_ordered_truncate(inode, |
1148 | attr->ia_size); | 1150 | attr->ia_size); |
1149 | if (status) | 1151 | if (status) |
1150 | goto bail_unlock; | 1152 | goto bail_unlock; |
1151 | } | 1153 | } |
1152 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); | 1154 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); |
1153 | } else | 1155 | } else |
1154 | status = ocfs2_extend_file(inode, bh, attr->ia_size); | 1156 | status = ocfs2_extend_file(inode, bh, attr->ia_size); |
1155 | if (status < 0) { | 1157 | if (status < 0) { |
1156 | if (status != -ENOSPC) | 1158 | if (status != -ENOSPC) |
1157 | mlog_errno(status); | 1159 | mlog_errno(status); |
1158 | status = -ENOSPC; | 1160 | status = -ENOSPC; |
1159 | goto bail_unlock; | 1161 | goto bail_unlock; |
1160 | } | 1162 | } |
1161 | } | 1163 | } |
1162 | 1164 | ||
1163 | if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 1165 | if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
1164 | (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 1166 | (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
1165 | /* | 1167 | /* |
1166 | * Gather pointers to quota structures so that allocation / | 1168 | * Gather pointers to quota structures so that allocation / |
1167 | * freeing of quota structures happens here and not inside | 1169 | * freeing of quota structures happens here and not inside |
1168 | * dquot_transfer() where we have problems with lock ordering | 1170 | * dquot_transfer() where we have problems with lock ordering |
1169 | */ | 1171 | */ |
1170 | if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid | 1172 | if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid |
1171 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, | 1173 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, |
1172 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { | 1174 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { |
1173 | transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, | 1175 | transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, |
1174 | USRQUOTA); | 1176 | USRQUOTA); |
1175 | if (!transfer_to[USRQUOTA]) { | 1177 | if (!transfer_to[USRQUOTA]) { |
1176 | status = -ESRCH; | 1178 | status = -ESRCH; |
1177 | goto bail_unlock; | 1179 | goto bail_unlock; |
1178 | } | 1180 | } |
1179 | } | 1181 | } |
1180 | if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid | 1182 | if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid |
1181 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, | 1183 | && OCFS2_HAS_RO_COMPAT_FEATURE(sb, |
1182 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { | 1184 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { |
1183 | transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, | 1185 | transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, |
1184 | GRPQUOTA); | 1186 | GRPQUOTA); |
1185 | if (!transfer_to[GRPQUOTA]) { | 1187 | if (!transfer_to[GRPQUOTA]) { |
1186 | status = -ESRCH; | 1188 | status = -ESRCH; |
1187 | goto bail_unlock; | 1189 | goto bail_unlock; |
1188 | } | 1190 | } |
1189 | } | 1191 | } |
1190 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS + | 1192 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS + |
1191 | 2 * ocfs2_quota_trans_credits(sb)); | 1193 | 2 * ocfs2_quota_trans_credits(sb)); |
1192 | if (IS_ERR(handle)) { | 1194 | if (IS_ERR(handle)) { |
1193 | status = PTR_ERR(handle); | 1195 | status = PTR_ERR(handle); |
1194 | mlog_errno(status); | 1196 | mlog_errno(status); |
1195 | goto bail_unlock; | 1197 | goto bail_unlock; |
1196 | } | 1198 | } |
1197 | status = __dquot_transfer(inode, transfer_to); | 1199 | status = __dquot_transfer(inode, transfer_to); |
1198 | if (status < 0) | 1200 | if (status < 0) |
1199 | goto bail_commit; | 1201 | goto bail_commit; |
1200 | } else { | 1202 | } else { |
1201 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 1203 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
1202 | if (IS_ERR(handle)) { | 1204 | if (IS_ERR(handle)) { |
1203 | status = PTR_ERR(handle); | 1205 | status = PTR_ERR(handle); |
1204 | mlog_errno(status); | 1206 | mlog_errno(status); |
1205 | goto bail_unlock; | 1207 | goto bail_unlock; |
1206 | } | 1208 | } |
1207 | } | 1209 | } |
1208 | 1210 | ||
1209 | /* | 1211 | /* |
1210 | * This will intentionally not wind up calling truncate_setsize(), | 1212 | * This will intentionally not wind up calling truncate_setsize(), |
1211 | * since all the work for a size change has been done above. | 1213 | * since all the work for a size change has been done above. |
1212 | * Otherwise, we could get into problems with truncate as | 1214 | * Otherwise, we could get into problems with truncate as |
1213 | * ip_alloc_sem is used there to protect against i_size | 1215 | * ip_alloc_sem is used there to protect against i_size |
1214 | * changes. | 1216 | * changes. |
1215 | * | 1217 | * |
1216 | * XXX: this means the conditional below can probably be removed. | 1218 | * XXX: this means the conditional below can probably be removed. |
1217 | */ | 1219 | */ |
1218 | if ((attr->ia_valid & ATTR_SIZE) && | 1220 | if ((attr->ia_valid & ATTR_SIZE) && |
1219 | attr->ia_size != i_size_read(inode)) { | 1221 | attr->ia_size != i_size_read(inode)) { |
1220 | status = vmtruncate(inode, attr->ia_size); | 1222 | status = vmtruncate(inode, attr->ia_size); |
1221 | if (status) { | 1223 | if (status) { |
1222 | mlog_errno(status); | 1224 | mlog_errno(status); |
1223 | goto bail_commit; | 1225 | goto bail_commit; |
1224 | } | 1226 | } |
1225 | } | 1227 | } |
1226 | 1228 | ||
1227 | setattr_copy(inode, attr); | 1229 | setattr_copy(inode, attr); |
1228 | mark_inode_dirty(inode); | 1230 | mark_inode_dirty(inode); |
1229 | 1231 | ||
1230 | status = ocfs2_mark_inode_dirty(handle, inode, bh); | 1232 | status = ocfs2_mark_inode_dirty(handle, inode, bh); |
1231 | if (status < 0) | 1233 | if (status < 0) |
1232 | mlog_errno(status); | 1234 | mlog_errno(status); |
1233 | 1235 | ||
1234 | bail_commit: | 1236 | bail_commit: |
1235 | ocfs2_commit_trans(osb, handle); | 1237 | ocfs2_commit_trans(osb, handle); |
1236 | bail_unlock: | 1238 | bail_unlock: |
1237 | ocfs2_inode_unlock(inode, 1); | 1239 | ocfs2_inode_unlock(inode, 1); |
1238 | bail_unlock_rw: | 1240 | bail_unlock_rw: |
1239 | if (size_change) | 1241 | if (size_change) |
1240 | ocfs2_rw_unlock(inode, 1); | 1242 | ocfs2_rw_unlock(inode, 1); |
1241 | bail: | 1243 | bail: |
1242 | brelse(bh); | 1244 | brelse(bh); |
1243 | 1245 | ||
1244 | /* Release quota pointers in case we acquired them */ | 1246 | /* Release quota pointers in case we acquired them */ |
1245 | for (qtype = 0; qtype < MAXQUOTAS; qtype++) | 1247 | for (qtype = 0; qtype < MAXQUOTAS; qtype++) |
1246 | dqput(transfer_to[qtype]); | 1248 | dqput(transfer_to[qtype]); |
1247 | 1249 | ||
1248 | if (!status && attr->ia_valid & ATTR_MODE) { | 1250 | if (!status && attr->ia_valid & ATTR_MODE) { |
1249 | status = ocfs2_acl_chmod(inode); | 1251 | status = ocfs2_acl_chmod(inode); |
1250 | if (status < 0) | 1252 | if (status < 0) |
1251 | mlog_errno(status); | 1253 | mlog_errno(status); |
1252 | } | 1254 | } |
1253 | 1255 | ||
1254 | return status; | 1256 | return status; |
1255 | } | 1257 | } |
1256 | 1258 | ||
1257 | int ocfs2_getattr(struct vfsmount *mnt, | 1259 | int ocfs2_getattr(struct vfsmount *mnt, |
1258 | struct dentry *dentry, | 1260 | struct dentry *dentry, |
1259 | struct kstat *stat) | 1261 | struct kstat *stat) |
1260 | { | 1262 | { |
1261 | struct inode *inode = dentry->d_inode; | 1263 | struct inode *inode = dentry->d_inode; |
1262 | struct super_block *sb = dentry->d_inode->i_sb; | 1264 | struct super_block *sb = dentry->d_inode->i_sb; |
1263 | struct ocfs2_super *osb = sb->s_fs_info; | 1265 | struct ocfs2_super *osb = sb->s_fs_info; |
1264 | int err; | 1266 | int err; |
1265 | 1267 | ||
1266 | err = ocfs2_inode_revalidate(dentry); | 1268 | err = ocfs2_inode_revalidate(dentry); |
1267 | if (err) { | 1269 | if (err) { |
1268 | if (err != -ENOENT) | 1270 | if (err != -ENOENT) |
1269 | mlog_errno(err); | 1271 | mlog_errno(err); |
1270 | goto bail; | 1272 | goto bail; |
1271 | } | 1273 | } |
1272 | 1274 | ||
1273 | generic_fillattr(inode, stat); | 1275 | generic_fillattr(inode, stat); |
1274 | 1276 | ||
1275 | /* We set the blksize from the cluster size for performance */ | 1277 | /* We set the blksize from the cluster size for performance */ |
1276 | stat->blksize = osb->s_clustersize; | 1278 | stat->blksize = osb->s_clustersize; |
1277 | 1279 | ||
1278 | bail: | 1280 | bail: |
1279 | return err; | 1281 | return err; |
1280 | } | 1282 | } |
1281 | 1283 | ||
1282 | int ocfs2_permission(struct inode *inode, int mask) | 1284 | int ocfs2_permission(struct inode *inode, int mask) |
1283 | { | 1285 | { |
1284 | int ret; | 1286 | int ret; |
1285 | 1287 | ||
1286 | if (mask & MAY_NOT_BLOCK) | 1288 | if (mask & MAY_NOT_BLOCK) |
1287 | return -ECHILD; | 1289 | return -ECHILD; |
1288 | 1290 | ||
1289 | ret = ocfs2_inode_lock(inode, NULL, 0); | 1291 | ret = ocfs2_inode_lock(inode, NULL, 0); |
1290 | if (ret) { | 1292 | if (ret) { |
1291 | if (ret != -ENOENT) | 1293 | if (ret != -ENOENT) |
1292 | mlog_errno(ret); | 1294 | mlog_errno(ret); |
1293 | goto out; | 1295 | goto out; |
1294 | } | 1296 | } |
1295 | 1297 | ||
1296 | ret = generic_permission(inode, mask); | 1298 | ret = generic_permission(inode, mask); |
1297 | 1299 | ||
1298 | ocfs2_inode_unlock(inode, 0); | 1300 | ocfs2_inode_unlock(inode, 0); |
1299 | out: | 1301 | out: |
1300 | return ret; | 1302 | return ret; |
1301 | } | 1303 | } |
1302 | 1304 | ||
1303 | static int __ocfs2_write_remove_suid(struct inode *inode, | 1305 | static int __ocfs2_write_remove_suid(struct inode *inode, |
1304 | struct buffer_head *bh) | 1306 | struct buffer_head *bh) |
1305 | { | 1307 | { |
1306 | int ret; | 1308 | int ret; |
1307 | handle_t *handle; | 1309 | handle_t *handle; |
1308 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1310 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1309 | struct ocfs2_dinode *di; | 1311 | struct ocfs2_dinode *di; |
1310 | 1312 | ||
1311 | trace_ocfs2_write_remove_suid( | 1313 | trace_ocfs2_write_remove_suid( |
1312 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1314 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1313 | inode->i_mode); | 1315 | inode->i_mode); |
1314 | 1316 | ||
1315 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 1317 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
1316 | if (IS_ERR(handle)) { | 1318 | if (IS_ERR(handle)) { |
1317 | ret = PTR_ERR(handle); | 1319 | ret = PTR_ERR(handle); |
1318 | mlog_errno(ret); | 1320 | mlog_errno(ret); |
1319 | goto out; | 1321 | goto out; |
1320 | } | 1322 | } |
1321 | 1323 | ||
1322 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, | 1324 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh, |
1323 | OCFS2_JOURNAL_ACCESS_WRITE); | 1325 | OCFS2_JOURNAL_ACCESS_WRITE); |
1324 | if (ret < 0) { | 1326 | if (ret < 0) { |
1325 | mlog_errno(ret); | 1327 | mlog_errno(ret); |
1326 | goto out_trans; | 1328 | goto out_trans; |
1327 | } | 1329 | } |
1328 | 1330 | ||
1329 | inode->i_mode &= ~S_ISUID; | 1331 | inode->i_mode &= ~S_ISUID; |
1330 | if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP)) | 1332 | if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP)) |
1331 | inode->i_mode &= ~S_ISGID; | 1333 | inode->i_mode &= ~S_ISGID; |
1332 | 1334 | ||
1333 | di = (struct ocfs2_dinode *) bh->b_data; | 1335 | di = (struct ocfs2_dinode *) bh->b_data; |
1334 | di->i_mode = cpu_to_le16(inode->i_mode); | 1336 | di->i_mode = cpu_to_le16(inode->i_mode); |
1335 | 1337 | ||
1336 | ocfs2_journal_dirty(handle, bh); | 1338 | ocfs2_journal_dirty(handle, bh); |
1337 | 1339 | ||
1338 | out_trans: | 1340 | out_trans: |
1339 | ocfs2_commit_trans(osb, handle); | 1341 | ocfs2_commit_trans(osb, handle); |
1340 | out: | 1342 | out: |
1341 | return ret; | 1343 | return ret; |
1342 | } | 1344 | } |
1343 | 1345 | ||
1344 | /* | 1346 | /* |
1345 | * Will look for holes and unwritten extents in the range starting at | 1347 | * Will look for holes and unwritten extents in the range starting at |
1346 | * pos for count bytes (inclusive). | 1348 | * pos for count bytes (inclusive). |
1347 | */ | 1349 | */ |
1348 | static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos, | 1350 | static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos, |
1349 | size_t count) | 1351 | size_t count) |
1350 | { | 1352 | { |
1351 | int ret = 0; | 1353 | int ret = 0; |
1352 | unsigned int extent_flags; | 1354 | unsigned int extent_flags; |
1353 | u32 cpos, clusters, extent_len, phys_cpos; | 1355 | u32 cpos, clusters, extent_len, phys_cpos; |
1354 | struct super_block *sb = inode->i_sb; | 1356 | struct super_block *sb = inode->i_sb; |
1355 | 1357 | ||
1356 | cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; | 1358 | cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; |
1357 | clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; | 1359 | clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; |
1358 | 1360 | ||
1359 | while (clusters) { | 1361 | while (clusters) { |
1360 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, | 1362 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, |
1361 | &extent_flags); | 1363 | &extent_flags); |
1362 | if (ret < 0) { | 1364 | if (ret < 0) { |
1363 | mlog_errno(ret); | 1365 | mlog_errno(ret); |
1364 | goto out; | 1366 | goto out; |
1365 | } | 1367 | } |
1366 | 1368 | ||
1367 | if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) { | 1369 | if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) { |
1368 | ret = 1; | 1370 | ret = 1; |
1369 | break; | 1371 | break; |
1370 | } | 1372 | } |
1371 | 1373 | ||
1372 | if (extent_len > clusters) | 1374 | if (extent_len > clusters) |
1373 | extent_len = clusters; | 1375 | extent_len = clusters; |
1374 | 1376 | ||
1375 | clusters -= extent_len; | 1377 | clusters -= extent_len; |
1376 | cpos += extent_len; | 1378 | cpos += extent_len; |
1377 | } | 1379 | } |
1378 | out: | 1380 | out: |
1379 | return ret; | 1381 | return ret; |
1380 | } | 1382 | } |
1381 | 1383 | ||
1382 | static int ocfs2_write_remove_suid(struct inode *inode) | 1384 | static int ocfs2_write_remove_suid(struct inode *inode) |
1383 | { | 1385 | { |
1384 | int ret; | 1386 | int ret; |
1385 | struct buffer_head *bh = NULL; | 1387 | struct buffer_head *bh = NULL; |
1386 | 1388 | ||
1387 | ret = ocfs2_read_inode_block(inode, &bh); | 1389 | ret = ocfs2_read_inode_block(inode, &bh); |
1388 | if (ret < 0) { | 1390 | if (ret < 0) { |
1389 | mlog_errno(ret); | 1391 | mlog_errno(ret); |
1390 | goto out; | 1392 | goto out; |
1391 | } | 1393 | } |
1392 | 1394 | ||
1393 | ret = __ocfs2_write_remove_suid(inode, bh); | 1395 | ret = __ocfs2_write_remove_suid(inode, bh); |
1394 | out: | 1396 | out: |
1395 | brelse(bh); | 1397 | brelse(bh); |
1396 | return ret; | 1398 | return ret; |
1397 | } | 1399 | } |
1398 | 1400 | ||
1399 | /* | 1401 | /* |
1400 | * Allocate enough extents to cover the region starting at byte offset | 1402 | * Allocate enough extents to cover the region starting at byte offset |
1401 | * start for len bytes. Existing extents are skipped, any extents | 1403 | * start for len bytes. Existing extents are skipped, any extents |
1402 | * added are marked as "unwritten". | 1404 | * added are marked as "unwritten". |
1403 | */ | 1405 | */ |
1404 | static int ocfs2_allocate_unwritten_extents(struct inode *inode, | 1406 | static int ocfs2_allocate_unwritten_extents(struct inode *inode, |
1405 | u64 start, u64 len) | 1407 | u64 start, u64 len) |
1406 | { | 1408 | { |
1407 | int ret; | 1409 | int ret; |
1408 | u32 cpos, phys_cpos, clusters, alloc_size; | 1410 | u32 cpos, phys_cpos, clusters, alloc_size; |
1409 | u64 end = start + len; | 1411 | u64 end = start + len; |
1410 | struct buffer_head *di_bh = NULL; | 1412 | struct buffer_head *di_bh = NULL; |
1411 | 1413 | ||
1412 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 1414 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
1413 | ret = ocfs2_read_inode_block(inode, &di_bh); | 1415 | ret = ocfs2_read_inode_block(inode, &di_bh); |
1414 | if (ret) { | 1416 | if (ret) { |
1415 | mlog_errno(ret); | 1417 | mlog_errno(ret); |
1416 | goto out; | 1418 | goto out; |
1417 | } | 1419 | } |
1418 | 1420 | ||
1419 | /* | 1421 | /* |
1420 | * Nothing to do if the requested reservation range | 1422 | * Nothing to do if the requested reservation range |
1421 | * fits within the inode. | 1423 | * fits within the inode. |
1422 | */ | 1424 | */ |
1423 | if (ocfs2_size_fits_inline_data(di_bh, end)) | 1425 | if (ocfs2_size_fits_inline_data(di_bh, end)) |
1424 | goto out; | 1426 | goto out; |
1425 | 1427 | ||
1426 | ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); | 1428 | ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); |
1427 | if (ret) { | 1429 | if (ret) { |
1428 | mlog_errno(ret); | 1430 | mlog_errno(ret); |
1429 | goto out; | 1431 | goto out; |
1430 | } | 1432 | } |
1431 | } | 1433 | } |
1432 | 1434 | ||
1433 | /* | 1435 | /* |
1434 | * We consider both start and len to be inclusive. | 1436 | * We consider both start and len to be inclusive. |
1435 | */ | 1437 | */ |
1436 | cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | 1438 | cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; |
1437 | clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len); | 1439 | clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len); |
1438 | clusters -= cpos; | 1440 | clusters -= cpos; |
1439 | 1441 | ||
1440 | while (clusters) { | 1442 | while (clusters) { |
1441 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | 1443 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, |
1442 | &alloc_size, NULL); | 1444 | &alloc_size, NULL); |
1443 | if (ret) { | 1445 | if (ret) { |
1444 | mlog_errno(ret); | 1446 | mlog_errno(ret); |
1445 | goto out; | 1447 | goto out; |
1446 | } | 1448 | } |
1447 | 1449 | ||
1448 | /* | 1450 | /* |
1449 | * Hole or existing extent len can be arbitrary, so | 1451 | * Hole or existing extent len can be arbitrary, so |
1450 | * cap it to our own allocation request. | 1452 | * cap it to our own allocation request. |
1451 | */ | 1453 | */ |
1452 | if (alloc_size > clusters) | 1454 | if (alloc_size > clusters) |
1453 | alloc_size = clusters; | 1455 | alloc_size = clusters; |
1454 | 1456 | ||
1455 | if (phys_cpos) { | 1457 | if (phys_cpos) { |
1456 | /* | 1458 | /* |
1457 | * We already have an allocation at this | 1459 | * We already have an allocation at this |
1458 | * region so we can safely skip it. | 1460 | * region so we can safely skip it. |
1459 | */ | 1461 | */ |
1460 | goto next; | 1462 | goto next; |
1461 | } | 1463 | } |
1462 | 1464 | ||
1463 | ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); | 1465 | ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); |
1464 | if (ret) { | 1466 | if (ret) { |
1465 | if (ret != -ENOSPC) | 1467 | if (ret != -ENOSPC) |
1466 | mlog_errno(ret); | 1468 | mlog_errno(ret); |
1467 | goto out; | 1469 | goto out; |
1468 | } | 1470 | } |
1469 | 1471 | ||
1470 | next: | 1472 | next: |
1471 | cpos += alloc_size; | 1473 | cpos += alloc_size; |
1472 | clusters -= alloc_size; | 1474 | clusters -= alloc_size; |
1473 | } | 1475 | } |
1474 | 1476 | ||
1475 | ret = 0; | 1477 | ret = 0; |
1476 | out: | 1478 | out: |
1477 | 1479 | ||
1478 | brelse(di_bh); | 1480 | brelse(di_bh); |
1479 | return ret; | 1481 | return ret; |
1480 | } | 1482 | } |
1481 | 1483 | ||
1482 | /* | 1484 | /* |
1483 | * Truncate a byte range, avoiding pages within partial clusters. This | 1485 | * Truncate a byte range, avoiding pages within partial clusters. This |
1484 | * preserves those pages for the zeroing code to write to. | 1486 | * preserves those pages for the zeroing code to write to. |
1485 | */ | 1487 | */ |
1486 | static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, | 1488 | static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, |
1487 | u64 byte_len) | 1489 | u64 byte_len) |
1488 | { | 1490 | { |
1489 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1491 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1490 | loff_t start, end; | 1492 | loff_t start, end; |
1491 | struct address_space *mapping = inode->i_mapping; | 1493 | struct address_space *mapping = inode->i_mapping; |
1492 | 1494 | ||
1493 | start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); | 1495 | start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); |
1494 | end = byte_start + byte_len; | 1496 | end = byte_start + byte_len; |
1495 | end = end & ~(osb->s_clustersize - 1); | 1497 | end = end & ~(osb->s_clustersize - 1); |
1496 | 1498 | ||
1497 | if (start < end) { | 1499 | if (start < end) { |
1498 | unmap_mapping_range(mapping, start, end - start, 0); | 1500 | unmap_mapping_range(mapping, start, end - start, 0); |
1499 | truncate_inode_pages_range(mapping, start, end - 1); | 1501 | truncate_inode_pages_range(mapping, start, end - 1); |
1500 | } | 1502 | } |
1501 | } | 1503 | } |
1502 | 1504 | ||
1503 | static int ocfs2_zero_partial_clusters(struct inode *inode, | 1505 | static int ocfs2_zero_partial_clusters(struct inode *inode, |
1504 | u64 start, u64 len) | 1506 | u64 start, u64 len) |
1505 | { | 1507 | { |
1506 | int ret = 0; | 1508 | int ret = 0; |
1507 | u64 tmpend, end = start + len; | 1509 | u64 tmpend, end = start + len; |
1508 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1510 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1509 | unsigned int csize = osb->s_clustersize; | 1511 | unsigned int csize = osb->s_clustersize; |
1510 | handle_t *handle; | 1512 | handle_t *handle; |
1511 | 1513 | ||
1512 | /* | 1514 | /* |
1513 | * The "start" and "end" values are NOT necessarily part of | 1515 | * The "start" and "end" values are NOT necessarily part of |
1514 | * the range whose allocation is being deleted. Rather, this | 1516 | * the range whose allocation is being deleted. Rather, this |
1515 | * is what the user passed in with the request. We must zero | 1517 | * is what the user passed in with the request. We must zero |
1516 | * partial clusters here. There's no need to worry about | 1518 | * partial clusters here. There's no need to worry about |
1517 | * physical allocation - the zeroing code knows to skip holes. | 1519 | * physical allocation - the zeroing code knows to skip holes. |
1518 | */ | 1520 | */ |
1519 | trace_ocfs2_zero_partial_clusters( | 1521 | trace_ocfs2_zero_partial_clusters( |
1520 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1522 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1521 | (unsigned long long)start, (unsigned long long)end); | 1523 | (unsigned long long)start, (unsigned long long)end); |
1522 | 1524 | ||
1523 | /* | 1525 | /* |
1524 | * If both edges are on a cluster boundary then there's no | 1526 | * If both edges are on a cluster boundary then there's no |
1525 | * zeroing required as the region is part of the allocation to | 1527 | * zeroing required as the region is part of the allocation to |
1526 | * be truncated. | 1528 | * be truncated. |
1527 | */ | 1529 | */ |
1528 | if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) | 1530 | if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) |
1529 | goto out; | 1531 | goto out; |
1530 | 1532 | ||
1531 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 1533 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
1532 | if (IS_ERR(handle)) { | 1534 | if (IS_ERR(handle)) { |
1533 | ret = PTR_ERR(handle); | 1535 | ret = PTR_ERR(handle); |
1534 | mlog_errno(ret); | 1536 | mlog_errno(ret); |
1535 | goto out; | 1537 | goto out; |
1536 | } | 1538 | } |
1537 | 1539 | ||
1538 | /* | 1540 | /* |
1539 | * We want to get the byte offset of the end of the 1st cluster. | 1541 | * We want to get the byte offset of the end of the 1st cluster. |
1540 | */ | 1542 | */ |
1541 | tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); | 1543 | tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); |
1542 | if (tmpend > end) | 1544 | if (tmpend > end) |
1543 | tmpend = end; | 1545 | tmpend = end; |
1544 | 1546 | ||
1545 | trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start, | 1547 | trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start, |
1546 | (unsigned long long)tmpend); | 1548 | (unsigned long long)tmpend); |
1547 | 1549 | ||
1548 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); | 1550 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); |
1549 | if (ret) | 1551 | if (ret) |
1550 | mlog_errno(ret); | 1552 | mlog_errno(ret); |
1551 | 1553 | ||
1552 | if (tmpend < end) { | 1554 | if (tmpend < end) { |
1553 | /* | 1555 | /* |
1554 | * This may make start and end equal, but the zeroing | 1556 | * This may make start and end equal, but the zeroing |
1555 | * code will skip any work in that case so there's no | 1557 | * code will skip any work in that case so there's no |
1556 | * need to catch it up here. | 1558 | * need to catch it up here. |
1557 | */ | 1559 | */ |
1558 | start = end & ~(osb->s_clustersize - 1); | 1560 | start = end & ~(osb->s_clustersize - 1); |
1559 | 1561 | ||
1560 | trace_ocfs2_zero_partial_clusters_range2( | 1562 | trace_ocfs2_zero_partial_clusters_range2( |
1561 | (unsigned long long)start, (unsigned long long)end); | 1563 | (unsigned long long)start, (unsigned long long)end); |
1562 | 1564 | ||
1563 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); | 1565 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); |
1564 | if (ret) | 1566 | if (ret) |
1565 | mlog_errno(ret); | 1567 | mlog_errno(ret); |
1566 | } | 1568 | } |
1567 | 1569 | ||
1568 | ocfs2_commit_trans(osb, handle); | 1570 | ocfs2_commit_trans(osb, handle); |
1569 | out: | 1571 | out: |
1570 | return ret; | 1572 | return ret; |
1571 | } | 1573 | } |
1572 | 1574 | ||
1573 | static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos) | 1575 | static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos) |
1574 | { | 1576 | { |
1575 | int i; | 1577 | int i; |
1576 | struct ocfs2_extent_rec *rec = NULL; | 1578 | struct ocfs2_extent_rec *rec = NULL; |
1577 | 1579 | ||
1578 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | 1580 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { |
1579 | 1581 | ||
1580 | rec = &el->l_recs[i]; | 1582 | rec = &el->l_recs[i]; |
1581 | 1583 | ||
1582 | if (le32_to_cpu(rec->e_cpos) < pos) | 1584 | if (le32_to_cpu(rec->e_cpos) < pos) |
1583 | break; | 1585 | break; |
1584 | } | 1586 | } |
1585 | 1587 | ||
1586 | return i; | 1588 | return i; |
1587 | } | 1589 | } |
1588 | 1590 | ||
1589 | /* | 1591 | /* |
1590 | * Helper to calculate the punching pos and length in one run, we handle the | 1592 | * Helper to calculate the punching pos and length in one run, we handle the |
1591 | * following three cases in order: | 1593 | * following three cases in order: |
1592 | * | 1594 | * |
1593 | * - remove the entire record | 1595 | * - remove the entire record |
1594 | * - remove a partial record | 1596 | * - remove a partial record |
1595 | * - no record needs to be removed (hole-punching completed) | 1597 | * - no record needs to be removed (hole-punching completed) |
1596 | */ | 1598 | */ |
1597 | static void ocfs2_calc_trunc_pos(struct inode *inode, | 1599 | static void ocfs2_calc_trunc_pos(struct inode *inode, |
1598 | struct ocfs2_extent_list *el, | 1600 | struct ocfs2_extent_list *el, |
1599 | struct ocfs2_extent_rec *rec, | 1601 | struct ocfs2_extent_rec *rec, |
1600 | u32 trunc_start, u32 *trunc_cpos, | 1602 | u32 trunc_start, u32 *trunc_cpos, |
1601 | u32 *trunc_len, u32 *trunc_end, | 1603 | u32 *trunc_len, u32 *trunc_end, |
1602 | u64 *blkno, int *done) | 1604 | u64 *blkno, int *done) |
1603 | { | 1605 | { |
1604 | int ret = 0; | 1606 | int ret = 0; |
1605 | u32 coff, range; | 1607 | u32 coff, range; |
1606 | 1608 | ||
1607 | range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | 1609 | range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); |
1608 | 1610 | ||
1609 | if (le32_to_cpu(rec->e_cpos) >= trunc_start) { | 1611 | if (le32_to_cpu(rec->e_cpos) >= trunc_start) { |
1610 | /* | 1612 | /* |
1611 | * remove an entire extent record. | 1613 | * remove an entire extent record. |
1612 | */ | 1614 | */ |
1613 | *trunc_cpos = le32_to_cpu(rec->e_cpos); | 1615 | *trunc_cpos = le32_to_cpu(rec->e_cpos); |
1614 | /* | 1616 | /* |
1615 | * Skip holes if any. | 1617 | * Skip holes if any. |
1616 | */ | 1618 | */ |
1617 | if (range < *trunc_end) | 1619 | if (range < *trunc_end) |
1618 | *trunc_end = range; | 1620 | *trunc_end = range; |
1619 | *trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos); | 1621 | *trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos); |
1620 | *blkno = le64_to_cpu(rec->e_blkno); | 1622 | *blkno = le64_to_cpu(rec->e_blkno); |
1621 | *trunc_end = le32_to_cpu(rec->e_cpos); | 1623 | *trunc_end = le32_to_cpu(rec->e_cpos); |
1622 | } else if (range > trunc_start) { | 1624 | } else if (range > trunc_start) { |
1623 | /* | 1625 | /* |
1624 | * remove a partial extent record, which means we're | 1626 | * remove a partial extent record, which means we're |
1625 | * removing the last extent record. | 1627 | * removing the last extent record. |
1626 | */ | 1628 | */ |
1627 | *trunc_cpos = trunc_start; | 1629 | *trunc_cpos = trunc_start; |
1628 | /* | 1630 | /* |
1629 | * skip hole if any. | 1631 | * skip hole if any. |
1630 | */ | 1632 | */ |
1631 | if (range < *trunc_end) | 1633 | if (range < *trunc_end) |
1632 | *trunc_end = range; | 1634 | *trunc_end = range; |
1633 | *trunc_len = *trunc_end - trunc_start; | 1635 | *trunc_len = *trunc_end - trunc_start; |
1634 | coff = trunc_start - le32_to_cpu(rec->e_cpos); | 1636 | coff = trunc_start - le32_to_cpu(rec->e_cpos); |
1635 | *blkno = le64_to_cpu(rec->e_blkno) + | 1637 | *blkno = le64_to_cpu(rec->e_blkno) + |
1636 | ocfs2_clusters_to_blocks(inode->i_sb, coff); | 1638 | ocfs2_clusters_to_blocks(inode->i_sb, coff); |
1637 | *trunc_end = trunc_start; | 1639 | *trunc_end = trunc_start; |
1638 | } else { | 1640 | } else { |
1639 | /* | 1641 | /* |
1640 | * It may have two following possibilities: | 1642 | * It may have two following possibilities: |
1641 | * | 1643 | * |
1642 | * - last record has been removed | 1644 | * - last record has been removed |
1643 | * - trunc_start was within a hole | 1645 | * - trunc_start was within a hole |
1644 | * | 1646 | * |
1645 | * both two cases mean the completion of hole punching. | 1647 | * both two cases mean the completion of hole punching. |
1646 | */ | 1648 | */ |
1647 | ret = 1; | 1649 | ret = 1; |
1648 | } | 1650 | } |
1649 | 1651 | ||
1650 | *done = ret; | 1652 | *done = ret; |
1651 | } | 1653 | } |
1652 | 1654 | ||
1653 | static int ocfs2_remove_inode_range(struct inode *inode, | 1655 | static int ocfs2_remove_inode_range(struct inode *inode, |
1654 | struct buffer_head *di_bh, u64 byte_start, | 1656 | struct buffer_head *di_bh, u64 byte_start, |
1655 | u64 byte_len) | 1657 | u64 byte_len) |
1656 | { | 1658 | { |
1657 | int ret = 0, flags = 0, done = 0, i; | 1659 | int ret = 0, flags = 0, done = 0, i; |
1658 | u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos; | 1660 | u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos; |
1659 | u32 cluster_in_el; | 1661 | u32 cluster_in_el; |
1660 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1662 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1661 | struct ocfs2_cached_dealloc_ctxt dealloc; | 1663 | struct ocfs2_cached_dealloc_ctxt dealloc; |
1662 | struct address_space *mapping = inode->i_mapping; | 1664 | struct address_space *mapping = inode->i_mapping; |
1663 | struct ocfs2_extent_tree et; | 1665 | struct ocfs2_extent_tree et; |
1664 | struct ocfs2_path *path = NULL; | 1666 | struct ocfs2_path *path = NULL; |
1665 | struct ocfs2_extent_list *el = NULL; | 1667 | struct ocfs2_extent_list *el = NULL; |
1666 | struct ocfs2_extent_rec *rec = NULL; | 1668 | struct ocfs2_extent_rec *rec = NULL; |
1667 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1669 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
1668 | u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc); | 1670 | u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc); |
1669 | 1671 | ||
1670 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); | 1672 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); |
1671 | ocfs2_init_dealloc_ctxt(&dealloc); | 1673 | ocfs2_init_dealloc_ctxt(&dealloc); |
1672 | 1674 | ||
1673 | trace_ocfs2_remove_inode_range( | 1675 | trace_ocfs2_remove_inode_range( |
1674 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1676 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1675 | (unsigned long long)byte_start, | 1677 | (unsigned long long)byte_start, |
1676 | (unsigned long long)byte_len); | 1678 | (unsigned long long)byte_len); |
1677 | 1679 | ||
1678 | if (byte_len == 0) | 1680 | if (byte_len == 0) |
1679 | return 0; | 1681 | return 0; |
1680 | 1682 | ||
1681 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 1683 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
1682 | ret = ocfs2_truncate_inline(inode, di_bh, byte_start, | 1684 | ret = ocfs2_truncate_inline(inode, di_bh, byte_start, |
1683 | byte_start + byte_len, 0); | 1685 | byte_start + byte_len, 0); |
1684 | if (ret) { | 1686 | if (ret) { |
1685 | mlog_errno(ret); | 1687 | mlog_errno(ret); |
1686 | goto out; | 1688 | goto out; |
1687 | } | 1689 | } |
1688 | /* | 1690 | /* |
1689 | * There's no need to get fancy with the page cache | 1691 | * There's no need to get fancy with the page cache |
1690 | * truncate of an inline-data inode. We're talking | 1692 | * truncate of an inline-data inode. We're talking |
1691 | * about less than a page here, which will be cached | 1693 | * about less than a page here, which will be cached |
1692 | * in the dinode buffer anyway. | 1694 | * in the dinode buffer anyway. |
1693 | */ | 1695 | */ |
1694 | unmap_mapping_range(mapping, 0, 0, 0); | 1696 | unmap_mapping_range(mapping, 0, 0, 0); |
1695 | truncate_inode_pages(mapping, 0); | 1697 | truncate_inode_pages(mapping, 0); |
1696 | goto out; | 1698 | goto out; |
1697 | } | 1699 | } |
1698 | 1700 | ||
1699 | /* | 1701 | /* |
1700 | * For reflinks, we may need to CoW 2 clusters which might be | 1702 | * For reflinks, we may need to CoW 2 clusters which might be |
1701 | * partially zero'd later, if hole's start and end offset were | 1703 | * partially zero'd later, if hole's start and end offset were |
1702 | * within one cluster(means is not exactly aligned to clustersize). | 1704 | * within one cluster(means is not exactly aligned to clustersize). |
1703 | */ | 1705 | */ |
1704 | 1706 | ||
1705 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { | 1707 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { |
1706 | 1708 | ||
1707 | ret = ocfs2_cow_file_pos(inode, di_bh, byte_start); | 1709 | ret = ocfs2_cow_file_pos(inode, di_bh, byte_start); |
1708 | if (ret) { | 1710 | if (ret) { |
1709 | mlog_errno(ret); | 1711 | mlog_errno(ret); |
1710 | goto out; | 1712 | goto out; |
1711 | } | 1713 | } |
1712 | 1714 | ||
1713 | ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len); | 1715 | ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len); |
1714 | if (ret) { | 1716 | if (ret) { |
1715 | mlog_errno(ret); | 1717 | mlog_errno(ret); |
1716 | goto out; | 1718 | goto out; |
1717 | } | 1719 | } |
1718 | } | 1720 | } |
1719 | 1721 | ||
1720 | trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); | 1722 | trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); |
1721 | trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits; | 1723 | trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits; |
1722 | cluster_in_el = trunc_end; | 1724 | cluster_in_el = trunc_end; |
1723 | 1725 | ||
1724 | ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); | 1726 | ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); |
1725 | if (ret) { | 1727 | if (ret) { |
1726 | mlog_errno(ret); | 1728 | mlog_errno(ret); |
1727 | goto out; | 1729 | goto out; |
1728 | } | 1730 | } |
1729 | 1731 | ||
1730 | path = ocfs2_new_path_from_et(&et); | 1732 | path = ocfs2_new_path_from_et(&et); |
1731 | if (!path) { | 1733 | if (!path) { |
1732 | ret = -ENOMEM; | 1734 | ret = -ENOMEM; |
1733 | mlog_errno(ret); | 1735 | mlog_errno(ret); |
1734 | goto out; | 1736 | goto out; |
1735 | } | 1737 | } |
1736 | 1738 | ||
1737 | while (trunc_end > trunc_start) { | 1739 | while (trunc_end > trunc_start) { |
1738 | 1740 | ||
1739 | ret = ocfs2_find_path(INODE_CACHE(inode), path, | 1741 | ret = ocfs2_find_path(INODE_CACHE(inode), path, |
1740 | cluster_in_el); | 1742 | cluster_in_el); |
1741 | if (ret) { | 1743 | if (ret) { |
1742 | mlog_errno(ret); | 1744 | mlog_errno(ret); |
1743 | goto out; | 1745 | goto out; |
1744 | } | 1746 | } |
1745 | 1747 | ||
1746 | el = path_leaf_el(path); | 1748 | el = path_leaf_el(path); |
1747 | 1749 | ||
1748 | i = ocfs2_find_rec(el, trunc_end); | 1750 | i = ocfs2_find_rec(el, trunc_end); |
1749 | /* | 1751 | /* |
1750 | * Need to go to previous extent block. | 1752 | * Need to go to previous extent block. |
1751 | */ | 1753 | */ |
1752 | if (i < 0) { | 1754 | if (i < 0) { |
1753 | if (path->p_tree_depth == 0) | 1755 | if (path->p_tree_depth == 0) |
1754 | break; | 1756 | break; |
1755 | 1757 | ||
1756 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, | 1758 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, |
1757 | path, | 1759 | path, |
1758 | &cluster_in_el); | 1760 | &cluster_in_el); |
1759 | if (ret) { | 1761 | if (ret) { |
1760 | mlog_errno(ret); | 1762 | mlog_errno(ret); |
1761 | goto out; | 1763 | goto out; |
1762 | } | 1764 | } |
1763 | 1765 | ||
1764 | /* | 1766 | /* |
1765 | * We've reached the leftmost extent block, | 1767 | * We've reached the leftmost extent block, |
1766 | * it's safe to leave. | 1768 | * it's safe to leave. |
1767 | */ | 1769 | */ |
1768 | if (cluster_in_el == 0) | 1770 | if (cluster_in_el == 0) |
1769 | break; | 1771 | break; |
1770 | 1772 | ||
1771 | /* | 1773 | /* |
1772 | * The 'pos' searched for previous extent block is | 1774 | * The 'pos' searched for previous extent block is |
1773 | * always one cluster less than actual trunc_end. | 1775 | * always one cluster less than actual trunc_end. |
1774 | */ | 1776 | */ |
1775 | trunc_end = cluster_in_el + 1; | 1777 | trunc_end = cluster_in_el + 1; |
1776 | 1778 | ||
1777 | ocfs2_reinit_path(path, 1); | 1779 | ocfs2_reinit_path(path, 1); |
1778 | 1780 | ||
1779 | continue; | 1781 | continue; |
1780 | 1782 | ||
1781 | } else | 1783 | } else |
1782 | rec = &el->l_recs[i]; | 1784 | rec = &el->l_recs[i]; |
1783 | 1785 | ||
1784 | ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos, | 1786 | ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos, |
1785 | &trunc_len, &trunc_end, &blkno, &done); | 1787 | &trunc_len, &trunc_end, &blkno, &done); |
1786 | if (done) | 1788 | if (done) |
1787 | break; | 1789 | break; |
1788 | 1790 | ||
1789 | flags = rec->e_flags; | 1791 | flags = rec->e_flags; |
1790 | phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | 1792 | phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); |
1791 | 1793 | ||
1792 | ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos, | 1794 | ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos, |
1793 | phys_cpos, trunc_len, flags, | 1795 | phys_cpos, trunc_len, flags, |
1794 | &dealloc, refcount_loc); | 1796 | &dealloc, refcount_loc); |
1795 | if (ret < 0) { | 1797 | if (ret < 0) { |
1796 | mlog_errno(ret); | 1798 | mlog_errno(ret); |
1797 | goto out; | 1799 | goto out; |
1798 | } | 1800 | } |
1799 | 1801 | ||
1800 | cluster_in_el = trunc_end; | 1802 | cluster_in_el = trunc_end; |
1801 | 1803 | ||
1802 | ocfs2_reinit_path(path, 1); | 1804 | ocfs2_reinit_path(path, 1); |
1803 | } | 1805 | } |
1804 | 1806 | ||
1805 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); | 1807 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); |
1806 | 1808 | ||
1807 | out: | 1809 | out: |
1808 | ocfs2_schedule_truncate_log_flush(osb, 1); | 1810 | ocfs2_schedule_truncate_log_flush(osb, 1); |
1809 | ocfs2_run_deallocs(osb, &dealloc); | 1811 | ocfs2_run_deallocs(osb, &dealloc); |
1810 | 1812 | ||
1811 | return ret; | 1813 | return ret; |
1812 | } | 1814 | } |
1813 | 1815 | ||
1814 | /* | 1816 | /* |
1815 | * Parts of this function taken from xfs_change_file_space() | 1817 | * Parts of this function taken from xfs_change_file_space() |
1816 | */ | 1818 | */ |
1817 | static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | 1819 | static int __ocfs2_change_file_space(struct file *file, struct inode *inode, |
1818 | loff_t f_pos, unsigned int cmd, | 1820 | loff_t f_pos, unsigned int cmd, |
1819 | struct ocfs2_space_resv *sr, | 1821 | struct ocfs2_space_resv *sr, |
1820 | int change_size) | 1822 | int change_size) |
1821 | { | 1823 | { |
1822 | int ret; | 1824 | int ret; |
1823 | s64 llen; | 1825 | s64 llen; |
1824 | loff_t size; | 1826 | loff_t size; |
1825 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1827 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1826 | struct buffer_head *di_bh = NULL; | 1828 | struct buffer_head *di_bh = NULL; |
1827 | handle_t *handle; | 1829 | handle_t *handle; |
1828 | unsigned long long max_off = inode->i_sb->s_maxbytes; | 1830 | unsigned long long max_off = inode->i_sb->s_maxbytes; |
1829 | 1831 | ||
1830 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | 1832 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) |
1831 | return -EROFS; | 1833 | return -EROFS; |
1832 | 1834 | ||
1833 | mutex_lock(&inode->i_mutex); | 1835 | mutex_lock(&inode->i_mutex); |
1834 | 1836 | ||
1835 | /* | 1837 | /* |
1836 | * This prevents concurrent writes on other nodes | 1838 | * This prevents concurrent writes on other nodes |
1837 | */ | 1839 | */ |
1838 | ret = ocfs2_rw_lock(inode, 1); | 1840 | ret = ocfs2_rw_lock(inode, 1); |
1839 | if (ret) { | 1841 | if (ret) { |
1840 | mlog_errno(ret); | 1842 | mlog_errno(ret); |
1841 | goto out; | 1843 | goto out; |
1842 | } | 1844 | } |
1843 | 1845 | ||
1844 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 1846 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
1845 | if (ret) { | 1847 | if (ret) { |
1846 | mlog_errno(ret); | 1848 | mlog_errno(ret); |
1847 | goto out_rw_unlock; | 1849 | goto out_rw_unlock; |
1848 | } | 1850 | } |
1849 | 1851 | ||
1850 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | 1852 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { |
1851 | ret = -EPERM; | 1853 | ret = -EPERM; |
1852 | goto out_inode_unlock; | 1854 | goto out_inode_unlock; |
1853 | } | 1855 | } |
1854 | 1856 | ||
1855 | switch (sr->l_whence) { | 1857 | switch (sr->l_whence) { |
1856 | case 0: /*SEEK_SET*/ | 1858 | case 0: /*SEEK_SET*/ |
1857 | break; | 1859 | break; |
1858 | case 1: /*SEEK_CUR*/ | 1860 | case 1: /*SEEK_CUR*/ |
1859 | sr->l_start += f_pos; | 1861 | sr->l_start += f_pos; |
1860 | break; | 1862 | break; |
1861 | case 2: /*SEEK_END*/ | 1863 | case 2: /*SEEK_END*/ |
1862 | sr->l_start += i_size_read(inode); | 1864 | sr->l_start += i_size_read(inode); |
1863 | break; | 1865 | break; |
1864 | default: | 1866 | default: |
1865 | ret = -EINVAL; | 1867 | ret = -EINVAL; |
1866 | goto out_inode_unlock; | 1868 | goto out_inode_unlock; |
1867 | } | 1869 | } |
1868 | sr->l_whence = 0; | 1870 | sr->l_whence = 0; |
1869 | 1871 | ||
1870 | llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len; | 1872 | llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len; |
1871 | 1873 | ||
1872 | if (sr->l_start < 0 | 1874 | if (sr->l_start < 0 |
1873 | || sr->l_start > max_off | 1875 | || sr->l_start > max_off |
1874 | || (sr->l_start + llen) < 0 | 1876 | || (sr->l_start + llen) < 0 |
1875 | || (sr->l_start + llen) > max_off) { | 1877 | || (sr->l_start + llen) > max_off) { |
1876 | ret = -EINVAL; | 1878 | ret = -EINVAL; |
1877 | goto out_inode_unlock; | 1879 | goto out_inode_unlock; |
1878 | } | 1880 | } |
1879 | size = sr->l_start + sr->l_len; | 1881 | size = sr->l_start + sr->l_len; |
1880 | 1882 | ||
1881 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { | 1883 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { |
1882 | if (sr->l_len <= 0) { | 1884 | if (sr->l_len <= 0) { |
1883 | ret = -EINVAL; | 1885 | ret = -EINVAL; |
1884 | goto out_inode_unlock; | 1886 | goto out_inode_unlock; |
1885 | } | 1887 | } |
1886 | } | 1888 | } |
1887 | 1889 | ||
1888 | if (file && should_remove_suid(file->f_path.dentry)) { | 1890 | if (file && should_remove_suid(file->f_path.dentry)) { |
1889 | ret = __ocfs2_write_remove_suid(inode, di_bh); | 1891 | ret = __ocfs2_write_remove_suid(inode, di_bh); |
1890 | if (ret) { | 1892 | if (ret) { |
1891 | mlog_errno(ret); | 1893 | mlog_errno(ret); |
1892 | goto out_inode_unlock; | 1894 | goto out_inode_unlock; |
1893 | } | 1895 | } |
1894 | } | 1896 | } |
1895 | 1897 | ||
1896 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1898 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
1897 | switch (cmd) { | 1899 | switch (cmd) { |
1898 | case OCFS2_IOC_RESVSP: | 1900 | case OCFS2_IOC_RESVSP: |
1899 | case OCFS2_IOC_RESVSP64: | 1901 | case OCFS2_IOC_RESVSP64: |
1900 | /* | 1902 | /* |
1901 | * This takes unsigned offsets, but the signed ones we | 1903 | * This takes unsigned offsets, but the signed ones we |
1902 | * pass have been checked against overflow above. | 1904 | * pass have been checked against overflow above. |
1903 | */ | 1905 | */ |
1904 | ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, | 1906 | ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, |
1905 | sr->l_len); | 1907 | sr->l_len); |
1906 | break; | 1908 | break; |
1907 | case OCFS2_IOC_UNRESVSP: | 1909 | case OCFS2_IOC_UNRESVSP: |
1908 | case OCFS2_IOC_UNRESVSP64: | 1910 | case OCFS2_IOC_UNRESVSP64: |
1909 | ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start, | 1911 | ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start, |
1910 | sr->l_len); | 1912 | sr->l_len); |
1911 | break; | 1913 | break; |
1912 | default: | 1914 | default: |
1913 | ret = -EINVAL; | 1915 | ret = -EINVAL; |
1914 | } | 1916 | } |
1915 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 1917 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
1916 | if (ret) { | 1918 | if (ret) { |
1917 | mlog_errno(ret); | 1919 | mlog_errno(ret); |
1918 | goto out_inode_unlock; | 1920 | goto out_inode_unlock; |
1919 | } | 1921 | } |
1920 | 1922 | ||
1921 | /* | 1923 | /* |
1922 | * We update c/mtime for these changes | 1924 | * We update c/mtime for these changes |
1923 | */ | 1925 | */ |
1924 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 1926 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
1925 | if (IS_ERR(handle)) { | 1927 | if (IS_ERR(handle)) { |
1926 | ret = PTR_ERR(handle); | 1928 | ret = PTR_ERR(handle); |
1927 | mlog_errno(ret); | 1929 | mlog_errno(ret); |
1928 | goto out_inode_unlock; | 1930 | goto out_inode_unlock; |
1929 | } | 1931 | } |
1930 | 1932 | ||
1931 | if (change_size && i_size_read(inode) < size) | 1933 | if (change_size && i_size_read(inode) < size) |
1932 | i_size_write(inode, size); | 1934 | i_size_write(inode, size); |
1933 | 1935 | ||
1934 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 1936 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
1935 | ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); | 1937 | ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); |
1936 | if (ret < 0) | 1938 | if (ret < 0) |
1937 | mlog_errno(ret); | 1939 | mlog_errno(ret); |
1938 | 1940 | ||
1939 | ocfs2_commit_trans(osb, handle); | 1941 | ocfs2_commit_trans(osb, handle); |
1940 | 1942 | ||
1941 | out_inode_unlock: | 1943 | out_inode_unlock: |
1942 | brelse(di_bh); | 1944 | brelse(di_bh); |
1943 | ocfs2_inode_unlock(inode, 1); | 1945 | ocfs2_inode_unlock(inode, 1); |
1944 | out_rw_unlock: | 1946 | out_rw_unlock: |
1945 | ocfs2_rw_unlock(inode, 1); | 1947 | ocfs2_rw_unlock(inode, 1); |
1946 | 1948 | ||
1947 | out: | 1949 | out: |
1948 | mutex_unlock(&inode->i_mutex); | 1950 | mutex_unlock(&inode->i_mutex); |
1949 | return ret; | 1951 | return ret; |
1950 | } | 1952 | } |
1951 | 1953 | ||
1952 | int ocfs2_change_file_space(struct file *file, unsigned int cmd, | 1954 | int ocfs2_change_file_space(struct file *file, unsigned int cmd, |
1953 | struct ocfs2_space_resv *sr) | 1955 | struct ocfs2_space_resv *sr) |
1954 | { | 1956 | { |
1955 | struct inode *inode = file->f_path.dentry->d_inode; | 1957 | struct inode *inode = file->f_path.dentry->d_inode; |
1956 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1958 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1957 | 1959 | ||
1958 | if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && | 1960 | if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && |
1959 | !ocfs2_writes_unwritten_extents(osb)) | 1961 | !ocfs2_writes_unwritten_extents(osb)) |
1960 | return -ENOTTY; | 1962 | return -ENOTTY; |
1961 | else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) && | 1963 | else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) && |
1962 | !ocfs2_sparse_alloc(osb)) | 1964 | !ocfs2_sparse_alloc(osb)) |
1963 | return -ENOTTY; | 1965 | return -ENOTTY; |
1964 | 1966 | ||
1965 | if (!S_ISREG(inode->i_mode)) | 1967 | if (!S_ISREG(inode->i_mode)) |
1966 | return -EINVAL; | 1968 | return -EINVAL; |
1967 | 1969 | ||
1968 | if (!(file->f_mode & FMODE_WRITE)) | 1970 | if (!(file->f_mode & FMODE_WRITE)) |
1969 | return -EBADF; | 1971 | return -EBADF; |
1970 | 1972 | ||
1971 | return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); | 1973 | return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); |
1972 | } | 1974 | } |
1973 | 1975 | ||
1974 | static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, | 1976 | static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, |
1975 | loff_t len) | 1977 | loff_t len) |
1976 | { | 1978 | { |
1977 | struct inode *inode = file->f_path.dentry->d_inode; | 1979 | struct inode *inode = file->f_path.dentry->d_inode; |
1978 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1980 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1979 | struct ocfs2_space_resv sr; | 1981 | struct ocfs2_space_resv sr; |
1980 | int change_size = 1; | 1982 | int change_size = 1; |
1981 | int cmd = OCFS2_IOC_RESVSP64; | 1983 | int cmd = OCFS2_IOC_RESVSP64; |
1982 | 1984 | ||
1983 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 1985 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
1984 | return -EOPNOTSUPP; | 1986 | return -EOPNOTSUPP; |
1985 | if (!ocfs2_writes_unwritten_extents(osb)) | 1987 | if (!ocfs2_writes_unwritten_extents(osb)) |
1986 | return -EOPNOTSUPP; | 1988 | return -EOPNOTSUPP; |
1987 | 1989 | ||
1988 | if (mode & FALLOC_FL_KEEP_SIZE) | 1990 | if (mode & FALLOC_FL_KEEP_SIZE) |
1989 | change_size = 0; | 1991 | change_size = 0; |
1990 | 1992 | ||
1991 | if (mode & FALLOC_FL_PUNCH_HOLE) | 1993 | if (mode & FALLOC_FL_PUNCH_HOLE) |
1992 | cmd = OCFS2_IOC_UNRESVSP64; | 1994 | cmd = OCFS2_IOC_UNRESVSP64; |
1993 | 1995 | ||
1994 | sr.l_whence = 0; | 1996 | sr.l_whence = 0; |
1995 | sr.l_start = (s64)offset; | 1997 | sr.l_start = (s64)offset; |
1996 | sr.l_len = (s64)len; | 1998 | sr.l_len = (s64)len; |
1997 | 1999 | ||
1998 | return __ocfs2_change_file_space(NULL, inode, offset, cmd, &sr, | 2000 | return __ocfs2_change_file_space(NULL, inode, offset, cmd, &sr, |
1999 | change_size); | 2001 | change_size); |
2000 | } | 2002 | } |
2001 | 2003 | ||
2002 | int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos, | 2004 | int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos, |
2003 | size_t count) | 2005 | size_t count) |
2004 | { | 2006 | { |
2005 | int ret = 0; | 2007 | int ret = 0; |
2006 | unsigned int extent_flags; | 2008 | unsigned int extent_flags; |
2007 | u32 cpos, clusters, extent_len, phys_cpos; | 2009 | u32 cpos, clusters, extent_len, phys_cpos; |
2008 | struct super_block *sb = inode->i_sb; | 2010 | struct super_block *sb = inode->i_sb; |
2009 | 2011 | ||
2010 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) || | 2012 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) || |
2011 | !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) || | 2013 | !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) || |
2012 | OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 2014 | OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
2013 | return 0; | 2015 | return 0; |
2014 | 2016 | ||
2015 | cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; | 2017 | cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; |
2016 | clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; | 2018 | clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; |
2017 | 2019 | ||
2018 | while (clusters) { | 2020 | while (clusters) { |
2019 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, | 2021 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, |
2020 | &extent_flags); | 2022 | &extent_flags); |
2021 | if (ret < 0) { | 2023 | if (ret < 0) { |
2022 | mlog_errno(ret); | 2024 | mlog_errno(ret); |
2023 | goto out; | 2025 | goto out; |
2024 | } | 2026 | } |
2025 | 2027 | ||
2026 | if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) { | 2028 | if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) { |
2027 | ret = 1; | 2029 | ret = 1; |
2028 | break; | 2030 | break; |
2029 | } | 2031 | } |
2030 | 2032 | ||
2031 | if (extent_len > clusters) | 2033 | if (extent_len > clusters) |
2032 | extent_len = clusters; | 2034 | extent_len = clusters; |
2033 | 2035 | ||
2034 | clusters -= extent_len; | 2036 | clusters -= extent_len; |
2035 | cpos += extent_len; | 2037 | cpos += extent_len; |
2036 | } | 2038 | } |
2037 | out: | 2039 | out: |
2038 | return ret; | 2040 | return ret; |
2039 | } | 2041 | } |
2040 | 2042 | ||
2041 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | 2043 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, |
2042 | struct file *file, | 2044 | struct file *file, |
2043 | loff_t pos, size_t count, | 2045 | loff_t pos, size_t count, |
2044 | int *meta_level) | 2046 | int *meta_level) |
2045 | { | 2047 | { |
2046 | int ret; | 2048 | int ret; |
2047 | struct buffer_head *di_bh = NULL; | 2049 | struct buffer_head *di_bh = NULL; |
2048 | u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | 2050 | u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; |
2049 | u32 clusters = | 2051 | u32 clusters = |
2050 | ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos; | 2052 | ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos; |
2051 | 2053 | ||
2052 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | 2054 | ret = ocfs2_inode_lock(inode, &di_bh, 1); |
2053 | if (ret) { | 2055 | if (ret) { |
2054 | mlog_errno(ret); | 2056 | mlog_errno(ret); |
2055 | goto out; | 2057 | goto out; |
2056 | } | 2058 | } |
2057 | 2059 | ||
2058 | *meta_level = 1; | 2060 | *meta_level = 1; |
2059 | 2061 | ||
2060 | ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); | 2062 | ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); |
2061 | if (ret) | 2063 | if (ret) |
2062 | mlog_errno(ret); | 2064 | mlog_errno(ret); |
2063 | out: | 2065 | out: |
2064 | brelse(di_bh); | 2066 | brelse(di_bh); |
2065 | return ret; | 2067 | return ret; |
2066 | } | 2068 | } |
2067 | 2069 | ||
2068 | static int ocfs2_prepare_inode_for_write(struct file *file, | 2070 | static int ocfs2_prepare_inode_for_write(struct file *file, |
2069 | loff_t *ppos, | 2071 | loff_t *ppos, |
2070 | size_t count, | 2072 | size_t count, |
2071 | int appending, | 2073 | int appending, |
2072 | int *direct_io, | 2074 | int *direct_io, |
2073 | int *has_refcount) | 2075 | int *has_refcount) |
2074 | { | 2076 | { |
2075 | int ret = 0, meta_level = 0; | 2077 | int ret = 0, meta_level = 0; |
2076 | struct dentry *dentry = file->f_path.dentry; | 2078 | struct dentry *dentry = file->f_path.dentry; |
2077 | struct inode *inode = dentry->d_inode; | 2079 | struct inode *inode = dentry->d_inode; |
2078 | loff_t saved_pos = 0, end; | 2080 | loff_t saved_pos = 0, end; |
2079 | 2081 | ||
2080 | /* | 2082 | /* |
2081 | * We start with a read level meta lock and only jump to an ex | 2083 | * We start with a read level meta lock and only jump to an ex |
2082 | * if we need to make modifications here. | 2084 | * if we need to make modifications here. |
2083 | */ | 2085 | */ |
2084 | for(;;) { | 2086 | for(;;) { |
2085 | ret = ocfs2_inode_lock(inode, NULL, meta_level); | 2087 | ret = ocfs2_inode_lock(inode, NULL, meta_level); |
2086 | if (ret < 0) { | 2088 | if (ret < 0) { |
2087 | meta_level = -1; | 2089 | meta_level = -1; |
2088 | mlog_errno(ret); | 2090 | mlog_errno(ret); |
2089 | goto out; | 2091 | goto out; |
2090 | } | 2092 | } |
2091 | 2093 | ||
2092 | /* Clear suid / sgid if necessary. We do this here | 2094 | /* Clear suid / sgid if necessary. We do this here |
2093 | * instead of later in the write path because | 2095 | * instead of later in the write path because |
2094 | * remove_suid() calls ->setattr without any hint that | 2096 | * remove_suid() calls ->setattr without any hint that |
2095 | * we may have already done our cluster locking. Since | 2097 | * we may have already done our cluster locking. Since |
2096 | * ocfs2_setattr() *must* take cluster locks to | 2098 | * ocfs2_setattr() *must* take cluster locks to |
2097 | * proceeed, this will lead us to recursively lock the | 2099 | * proceeed, this will lead us to recursively lock the |
2098 | * inode. There's also the dinode i_size state which | 2100 | * inode. There's also the dinode i_size state which |
2099 | * can be lost via setattr during extending writes (we | 2101 | * can be lost via setattr during extending writes (we |
2100 | * set inode->i_size at the end of a write. */ | 2102 | * set inode->i_size at the end of a write. */ |
2101 | if (should_remove_suid(dentry)) { | 2103 | if (should_remove_suid(dentry)) { |
2102 | if (meta_level == 0) { | 2104 | if (meta_level == 0) { |
2103 | ocfs2_inode_unlock(inode, meta_level); | 2105 | ocfs2_inode_unlock(inode, meta_level); |
2104 | meta_level = 1; | 2106 | meta_level = 1; |
2105 | continue; | 2107 | continue; |
2106 | } | 2108 | } |
2107 | 2109 | ||
2108 | ret = ocfs2_write_remove_suid(inode); | 2110 | ret = ocfs2_write_remove_suid(inode); |
2109 | if (ret < 0) { | 2111 | if (ret < 0) { |
2110 | mlog_errno(ret); | 2112 | mlog_errno(ret); |
2111 | goto out_unlock; | 2113 | goto out_unlock; |
2112 | } | 2114 | } |
2113 | } | 2115 | } |
2114 | 2116 | ||
2115 | /* work on a copy of ppos until we're sure that we won't have | 2117 | /* work on a copy of ppos until we're sure that we won't have |
2116 | * to recalculate it due to relocking. */ | 2118 | * to recalculate it due to relocking. */ |
2117 | if (appending) | 2119 | if (appending) |
2118 | saved_pos = i_size_read(inode); | 2120 | saved_pos = i_size_read(inode); |
2119 | else | 2121 | else |
2120 | saved_pos = *ppos; | 2122 | saved_pos = *ppos; |
2121 | 2123 | ||
2122 | end = saved_pos + count; | 2124 | end = saved_pos + count; |
2123 | 2125 | ||
2124 | ret = ocfs2_check_range_for_refcount(inode, saved_pos, count); | 2126 | ret = ocfs2_check_range_for_refcount(inode, saved_pos, count); |
2125 | if (ret == 1) { | 2127 | if (ret == 1) { |
2126 | ocfs2_inode_unlock(inode, meta_level); | 2128 | ocfs2_inode_unlock(inode, meta_level); |
2127 | meta_level = -1; | 2129 | meta_level = -1; |
2128 | 2130 | ||
2129 | ret = ocfs2_prepare_inode_for_refcount(inode, | 2131 | ret = ocfs2_prepare_inode_for_refcount(inode, |
2130 | file, | 2132 | file, |
2131 | saved_pos, | 2133 | saved_pos, |
2132 | count, | 2134 | count, |
2133 | &meta_level); | 2135 | &meta_level); |
2134 | if (has_refcount) | 2136 | if (has_refcount) |
2135 | *has_refcount = 1; | 2137 | *has_refcount = 1; |
2136 | if (direct_io) | 2138 | if (direct_io) |
2137 | *direct_io = 0; | 2139 | *direct_io = 0; |
2138 | } | 2140 | } |
2139 | 2141 | ||
2140 | if (ret < 0) { | 2142 | if (ret < 0) { |
2141 | mlog_errno(ret); | 2143 | mlog_errno(ret); |
2142 | goto out_unlock; | 2144 | goto out_unlock; |
2143 | } | 2145 | } |
2144 | 2146 | ||
2145 | /* | 2147 | /* |
2146 | * Skip the O_DIRECT checks if we don't need | 2148 | * Skip the O_DIRECT checks if we don't need |
2147 | * them. | 2149 | * them. |
2148 | */ | 2150 | */ |
2149 | if (!direct_io || !(*direct_io)) | 2151 | if (!direct_io || !(*direct_io)) |
2150 | break; | 2152 | break; |
2151 | 2153 | ||
2152 | /* | 2154 | /* |
2153 | * There's no sane way to do direct writes to an inode | 2155 | * There's no sane way to do direct writes to an inode |
2154 | * with inline data. | 2156 | * with inline data. |
2155 | */ | 2157 | */ |
2156 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 2158 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
2157 | *direct_io = 0; | 2159 | *direct_io = 0; |
2158 | break; | 2160 | break; |
2159 | } | 2161 | } |
2160 | 2162 | ||
2161 | /* | 2163 | /* |
2162 | * Allowing concurrent direct writes means | 2164 | * Allowing concurrent direct writes means |
2163 | * i_size changes wouldn't be synchronized, so | 2165 | * i_size changes wouldn't be synchronized, so |
2164 | * one node could wind up truncating another | 2166 | * one node could wind up truncating another |
2165 | * nodes writes. | 2167 | * nodes writes. |
2166 | */ | 2168 | */ |
2167 | if (end > i_size_read(inode)) { | 2169 | if (end > i_size_read(inode)) { |
2168 | *direct_io = 0; | 2170 | *direct_io = 0; |
2169 | break; | 2171 | break; |
2170 | } | 2172 | } |
2171 | 2173 | ||
2172 | /* | 2174 | /* |
2173 | * We don't fill holes during direct io, so | 2175 | * We don't fill holes during direct io, so |
2174 | * check for them here. If any are found, the | 2176 | * check for them here. If any are found, the |
2175 | * caller will have to retake some cluster | 2177 | * caller will have to retake some cluster |
2176 | * locks and initiate the io as buffered. | 2178 | * locks and initiate the io as buffered. |
2177 | */ | 2179 | */ |
2178 | ret = ocfs2_check_range_for_holes(inode, saved_pos, count); | 2180 | ret = ocfs2_check_range_for_holes(inode, saved_pos, count); |
2179 | if (ret == 1) { | 2181 | if (ret == 1) { |
2180 | *direct_io = 0; | 2182 | *direct_io = 0; |
2181 | ret = 0; | 2183 | ret = 0; |
2182 | } else if (ret < 0) | 2184 | } else if (ret < 0) |
2183 | mlog_errno(ret); | 2185 | mlog_errno(ret); |
2184 | break; | 2186 | break; |
2185 | } | 2187 | } |
2186 | 2188 | ||
2187 | if (appending) | 2189 | if (appending) |
2188 | *ppos = saved_pos; | 2190 | *ppos = saved_pos; |
2189 | 2191 | ||
2190 | out_unlock: | 2192 | out_unlock: |
2191 | trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, | 2193 | trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, |
2192 | saved_pos, appending, count, | 2194 | saved_pos, appending, count, |
2193 | direct_io, has_refcount); | 2195 | direct_io, has_refcount); |
2194 | 2196 | ||
2195 | if (meta_level >= 0) | 2197 | if (meta_level >= 0) |
2196 | ocfs2_inode_unlock(inode, meta_level); | 2198 | ocfs2_inode_unlock(inode, meta_level); |
2197 | 2199 | ||
2198 | out: | 2200 | out: |
2199 | return ret; | 2201 | return ret; |
2200 | } | 2202 | } |
2201 | 2203 | ||
2202 | static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | 2204 | static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, |
2203 | const struct iovec *iov, | 2205 | const struct iovec *iov, |
2204 | unsigned long nr_segs, | 2206 | unsigned long nr_segs, |
2205 | loff_t pos) | 2207 | loff_t pos) |
2206 | { | 2208 | { |
2207 | int ret, direct_io, appending, rw_level, have_alloc_sem = 0; | 2209 | int ret, direct_io, appending, rw_level, have_alloc_sem = 0; |
2208 | int can_do_direct, has_refcount = 0; | 2210 | int can_do_direct, has_refcount = 0; |
2209 | ssize_t written = 0; | 2211 | ssize_t written = 0; |
2210 | size_t ocount; /* original count */ | 2212 | size_t ocount; /* original count */ |
2211 | size_t count; /* after file limit checks */ | 2213 | size_t count; /* after file limit checks */ |
2212 | loff_t old_size, *ppos = &iocb->ki_pos; | 2214 | loff_t old_size, *ppos = &iocb->ki_pos; |
2213 | u32 old_clusters; | 2215 | u32 old_clusters; |
2214 | struct file *file = iocb->ki_filp; | 2216 | struct file *file = iocb->ki_filp; |
2215 | struct inode *inode = file->f_path.dentry->d_inode; | 2217 | struct inode *inode = file->f_path.dentry->d_inode; |
2216 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2218 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2217 | int full_coherency = !(osb->s_mount_opt & | 2219 | int full_coherency = !(osb->s_mount_opt & |
2218 | OCFS2_MOUNT_COHERENCY_BUFFERED); | 2220 | OCFS2_MOUNT_COHERENCY_BUFFERED); |
2219 | 2221 | ||
2220 | trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, | 2222 | trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, |
2221 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2223 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2222 | file->f_path.dentry->d_name.len, | 2224 | file->f_path.dentry->d_name.len, |
2223 | file->f_path.dentry->d_name.name, | 2225 | file->f_path.dentry->d_name.name, |
2224 | (unsigned int)nr_segs); | 2226 | (unsigned int)nr_segs); |
2225 | 2227 | ||
2226 | if (iocb->ki_left == 0) | 2228 | if (iocb->ki_left == 0) |
2227 | return 0; | 2229 | return 0; |
2228 | 2230 | ||
2229 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 2231 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
2230 | 2232 | ||
2231 | appending = file->f_flags & O_APPEND ? 1 : 0; | 2233 | appending = file->f_flags & O_APPEND ? 1 : 0; |
2232 | direct_io = file->f_flags & O_DIRECT ? 1 : 0; | 2234 | direct_io = file->f_flags & O_DIRECT ? 1 : 0; |
2233 | 2235 | ||
2234 | mutex_lock(&inode->i_mutex); | 2236 | mutex_lock(&inode->i_mutex); |
2235 | 2237 | ||
2236 | ocfs2_iocb_clear_sem_locked(iocb); | 2238 | ocfs2_iocb_clear_sem_locked(iocb); |
2237 | 2239 | ||
2238 | relock: | 2240 | relock: |
2239 | /* to match setattr's i_mutex -> rw_lock ordering */ | 2241 | /* to match setattr's i_mutex -> rw_lock ordering */ |
2240 | if (direct_io) { | 2242 | if (direct_io) { |
2241 | atomic_inc(&inode->i_dio_count); | 2243 | atomic_inc(&inode->i_dio_count); |
2242 | have_alloc_sem = 1; | 2244 | have_alloc_sem = 1; |
2243 | /* communicate with ocfs2_dio_end_io */ | 2245 | /* communicate with ocfs2_dio_end_io */ |
2244 | ocfs2_iocb_set_sem_locked(iocb); | 2246 | ocfs2_iocb_set_sem_locked(iocb); |
2245 | } | 2247 | } |
2246 | 2248 | ||
2247 | /* | 2249 | /* |
2248 | * Concurrent O_DIRECT writes are allowed with | 2250 | * Concurrent O_DIRECT writes are allowed with |
2249 | * mount_option "coherency=buffered". | 2251 | * mount_option "coherency=buffered". |
2250 | */ | 2252 | */ |
2251 | rw_level = (!direct_io || full_coherency); | 2253 | rw_level = (!direct_io || full_coherency); |
2252 | 2254 | ||
2253 | ret = ocfs2_rw_lock(inode, rw_level); | 2255 | ret = ocfs2_rw_lock(inode, rw_level); |
2254 | if (ret < 0) { | 2256 | if (ret < 0) { |
2255 | mlog_errno(ret); | 2257 | mlog_errno(ret); |
2256 | goto out_sems; | 2258 | goto out_sems; |
2257 | } | 2259 | } |
2258 | 2260 | ||
2259 | /* | 2261 | /* |
2260 | * O_DIRECT writes with "coherency=full" need to take EX cluster | 2262 | * O_DIRECT writes with "coherency=full" need to take EX cluster |
2261 | * inode_lock to guarantee coherency. | 2263 | * inode_lock to guarantee coherency. |
2262 | */ | 2264 | */ |
2263 | if (direct_io && full_coherency) { | 2265 | if (direct_io && full_coherency) { |
2264 | /* | 2266 | /* |
2265 | * We need to take and drop the inode lock to force | 2267 | * We need to take and drop the inode lock to force |
2266 | * other nodes to drop their caches. Buffered I/O | 2268 | * other nodes to drop their caches. Buffered I/O |
2267 | * already does this in write_begin(). | 2269 | * already does this in write_begin(). |
2268 | */ | 2270 | */ |
2269 | ret = ocfs2_inode_lock(inode, NULL, 1); | 2271 | ret = ocfs2_inode_lock(inode, NULL, 1); |
2270 | if (ret < 0) { | 2272 | if (ret < 0) { |
2271 | mlog_errno(ret); | 2273 | mlog_errno(ret); |
2272 | goto out_sems; | 2274 | goto out_sems; |
2273 | } | 2275 | } |
2274 | 2276 | ||
2275 | ocfs2_inode_unlock(inode, 1); | 2277 | ocfs2_inode_unlock(inode, 1); |
2276 | } | 2278 | } |
2277 | 2279 | ||
2278 | can_do_direct = direct_io; | 2280 | can_do_direct = direct_io; |
2279 | ret = ocfs2_prepare_inode_for_write(file, ppos, | 2281 | ret = ocfs2_prepare_inode_for_write(file, ppos, |
2280 | iocb->ki_left, appending, | 2282 | iocb->ki_left, appending, |
2281 | &can_do_direct, &has_refcount); | 2283 | &can_do_direct, &has_refcount); |
2282 | if (ret < 0) { | 2284 | if (ret < 0) { |
2283 | mlog_errno(ret); | 2285 | mlog_errno(ret); |
2284 | goto out; | 2286 | goto out; |
2285 | } | 2287 | } |
2286 | 2288 | ||
2287 | /* | 2289 | /* |
2288 | * We can't complete the direct I/O as requested, fall back to | 2290 | * We can't complete the direct I/O as requested, fall back to |
2289 | * buffered I/O. | 2291 | * buffered I/O. |
2290 | */ | 2292 | */ |
2291 | if (direct_io && !can_do_direct) { | 2293 | if (direct_io && !can_do_direct) { |
2292 | ocfs2_rw_unlock(inode, rw_level); | 2294 | ocfs2_rw_unlock(inode, rw_level); |
2293 | inode_dio_done(inode); | 2295 | inode_dio_done(inode); |
2294 | 2296 | ||
2295 | have_alloc_sem = 0; | 2297 | have_alloc_sem = 0; |
2296 | rw_level = -1; | 2298 | rw_level = -1; |
2297 | 2299 | ||
2298 | direct_io = 0; | 2300 | direct_io = 0; |
2299 | goto relock; | 2301 | goto relock; |
2300 | } | 2302 | } |
2301 | 2303 | ||
2302 | /* | 2304 | /* |
2303 | * To later detect whether a journal commit for sync writes is | 2305 | * To later detect whether a journal commit for sync writes is |
2304 | * necessary, we sample i_size, and cluster count here. | 2306 | * necessary, we sample i_size, and cluster count here. |
2305 | */ | 2307 | */ |
2306 | old_size = i_size_read(inode); | 2308 | old_size = i_size_read(inode); |
2307 | old_clusters = OCFS2_I(inode)->ip_clusters; | 2309 | old_clusters = OCFS2_I(inode)->ip_clusters; |
2308 | 2310 | ||
2309 | /* communicate with ocfs2_dio_end_io */ | 2311 | /* communicate with ocfs2_dio_end_io */ |
2310 | ocfs2_iocb_set_rw_locked(iocb, rw_level); | 2312 | ocfs2_iocb_set_rw_locked(iocb, rw_level); |
2311 | 2313 | ||
2312 | ret = generic_segment_checks(iov, &nr_segs, &ocount, | 2314 | ret = generic_segment_checks(iov, &nr_segs, &ocount, |
2313 | VERIFY_READ); | 2315 | VERIFY_READ); |
2314 | if (ret) | 2316 | if (ret) |
2315 | goto out_dio; | 2317 | goto out_dio; |
2316 | 2318 | ||
2317 | count = ocount; | 2319 | count = ocount; |
2318 | ret = generic_write_checks(file, ppos, &count, | 2320 | ret = generic_write_checks(file, ppos, &count, |
2319 | S_ISBLK(inode->i_mode)); | 2321 | S_ISBLK(inode->i_mode)); |
2320 | if (ret) | 2322 | if (ret) |
2321 | goto out_dio; | 2323 | goto out_dio; |
2322 | 2324 | ||
2323 | if (direct_io) { | 2325 | if (direct_io) { |
2324 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, | 2326 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, |
2325 | ppos, count, ocount); | 2327 | ppos, count, ocount); |
2326 | if (written < 0) { | 2328 | if (written < 0) { |
2327 | ret = written; | 2329 | ret = written; |
2328 | goto out_dio; | 2330 | goto out_dio; |
2329 | } | 2331 | } |
2330 | } else { | 2332 | } else { |
2331 | current->backing_dev_info = file->f_mapping->backing_dev_info; | 2333 | current->backing_dev_info = file->f_mapping->backing_dev_info; |
2332 | written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, | 2334 | written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, |
2333 | ppos, count, 0); | 2335 | ppos, count, 0); |
2334 | current->backing_dev_info = NULL; | 2336 | current->backing_dev_info = NULL; |
2335 | } | 2337 | } |
2336 | 2338 | ||
2337 | out_dio: | 2339 | out_dio: |
2338 | /* buffered aio wouldn't have proper lock coverage today */ | 2340 | /* buffered aio wouldn't have proper lock coverage today */ |
2339 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); | 2341 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); |
2340 | 2342 | ||
2341 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || | 2343 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || |
2342 | ((file->f_flags & O_DIRECT) && !direct_io)) { | 2344 | ((file->f_flags & O_DIRECT) && !direct_io)) { |
2343 | ret = filemap_fdatawrite_range(file->f_mapping, pos, | 2345 | ret = filemap_fdatawrite_range(file->f_mapping, pos, |
2344 | pos + count - 1); | 2346 | pos + count - 1); |
2345 | if (ret < 0) | 2347 | if (ret < 0) |
2346 | written = ret; | 2348 | written = ret; |
2347 | 2349 | ||
2348 | if (!ret && ((old_size != i_size_read(inode)) || | 2350 | if (!ret && ((old_size != i_size_read(inode)) || |
2349 | (old_clusters != OCFS2_I(inode)->ip_clusters) || | 2351 | (old_clusters != OCFS2_I(inode)->ip_clusters) || |
2350 | has_refcount)) { | 2352 | has_refcount)) { |
2351 | ret = jbd2_journal_force_commit(osb->journal->j_journal); | 2353 | ret = jbd2_journal_force_commit(osb->journal->j_journal); |
2352 | if (ret < 0) | 2354 | if (ret < 0) |
2353 | written = ret; | 2355 | written = ret; |
2354 | } | 2356 | } |
2355 | 2357 | ||
2356 | if (!ret) | 2358 | if (!ret) |
2357 | ret = filemap_fdatawait_range(file->f_mapping, pos, | 2359 | ret = filemap_fdatawait_range(file->f_mapping, pos, |
2358 | pos + count - 1); | 2360 | pos + count - 1); |
2359 | } | 2361 | } |
2360 | 2362 | ||
2361 | /* | 2363 | /* |
2362 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io | 2364 | * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io |
2363 | * function pointer which is called when o_direct io completes so that | 2365 | * function pointer which is called when o_direct io completes so that |
2364 | * it can unlock our rw lock. | 2366 | * it can unlock our rw lock. |
2365 | * Unfortunately there are error cases which call end_io and others | 2367 | * Unfortunately there are error cases which call end_io and others |
2366 | * that don't. so we don't have to unlock the rw_lock if either an | 2368 | * that don't. so we don't have to unlock the rw_lock if either an |
2367 | * async dio is going to do it in the future or an end_io after an | 2369 | * async dio is going to do it in the future or an end_io after an |
2368 | * error has already done it. | 2370 | * error has already done it. |
2369 | */ | 2371 | */ |
2370 | if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { | 2372 | if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { |
2371 | rw_level = -1; | 2373 | rw_level = -1; |
2372 | have_alloc_sem = 0; | 2374 | have_alloc_sem = 0; |
2373 | } | 2375 | } |
2374 | 2376 | ||
2375 | out: | 2377 | out: |
2376 | if (rw_level != -1) | 2378 | if (rw_level != -1) |
2377 | ocfs2_rw_unlock(inode, rw_level); | 2379 | ocfs2_rw_unlock(inode, rw_level); |
2378 | 2380 | ||
2379 | out_sems: | 2381 | out_sems: |
2380 | if (have_alloc_sem) { | 2382 | if (have_alloc_sem) { |
2381 | inode_dio_done(inode); | 2383 | inode_dio_done(inode); |
2382 | ocfs2_iocb_clear_sem_locked(iocb); | 2384 | ocfs2_iocb_clear_sem_locked(iocb); |
2383 | } | 2385 | } |
2384 | 2386 | ||
2385 | mutex_unlock(&inode->i_mutex); | 2387 | mutex_unlock(&inode->i_mutex); |
2386 | 2388 | ||
2387 | if (written) | 2389 | if (written) |
2388 | ret = written; | 2390 | ret = written; |
2389 | return ret; | 2391 | return ret; |
2390 | } | 2392 | } |
2391 | 2393 | ||
2392 | static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | 2394 | static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, |
2393 | struct file *out, | 2395 | struct file *out, |
2394 | struct splice_desc *sd) | 2396 | struct splice_desc *sd) |
2395 | { | 2397 | { |
2396 | int ret; | 2398 | int ret; |
2397 | 2399 | ||
2398 | ret = ocfs2_prepare_inode_for_write(out, &sd->pos, | 2400 | ret = ocfs2_prepare_inode_for_write(out, &sd->pos, |
2399 | sd->total_len, 0, NULL, NULL); | 2401 | sd->total_len, 0, NULL, NULL); |
2400 | if (ret < 0) { | 2402 | if (ret < 0) { |
2401 | mlog_errno(ret); | 2403 | mlog_errno(ret); |
2402 | return ret; | 2404 | return ret; |
2403 | } | 2405 | } |
2404 | 2406 | ||
2405 | return splice_from_pipe_feed(pipe, sd, pipe_to_file); | 2407 | return splice_from_pipe_feed(pipe, sd, pipe_to_file); |
2406 | } | 2408 | } |
2407 | 2409 | ||
2408 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | 2410 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, |
2409 | struct file *out, | 2411 | struct file *out, |
2410 | loff_t *ppos, | 2412 | loff_t *ppos, |
2411 | size_t len, | 2413 | size_t len, |
2412 | unsigned int flags) | 2414 | unsigned int flags) |
2413 | { | 2415 | { |
2414 | int ret; | 2416 | int ret; |
2415 | struct address_space *mapping = out->f_mapping; | 2417 | struct address_space *mapping = out->f_mapping; |
2416 | struct inode *inode = mapping->host; | 2418 | struct inode *inode = mapping->host; |
2417 | struct splice_desc sd = { | 2419 | struct splice_desc sd = { |
2418 | .total_len = len, | 2420 | .total_len = len, |
2419 | .flags = flags, | 2421 | .flags = flags, |
2420 | .pos = *ppos, | 2422 | .pos = *ppos, |
2421 | .u.file = out, | 2423 | .u.file = out, |
2422 | }; | 2424 | }; |
2423 | 2425 | ||
2424 | 2426 | ||
2425 | trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry, | 2427 | trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry, |
2426 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2428 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2427 | out->f_path.dentry->d_name.len, | 2429 | out->f_path.dentry->d_name.len, |
2428 | out->f_path.dentry->d_name.name, len); | 2430 | out->f_path.dentry->d_name.name, len); |
2429 | 2431 | ||
2430 | if (pipe->inode) | 2432 | if (pipe->inode) |
2431 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); | 2433 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); |
2432 | 2434 | ||
2433 | splice_from_pipe_begin(&sd); | 2435 | splice_from_pipe_begin(&sd); |
2434 | do { | 2436 | do { |
2435 | ret = splice_from_pipe_next(pipe, &sd); | 2437 | ret = splice_from_pipe_next(pipe, &sd); |
2436 | if (ret <= 0) | 2438 | if (ret <= 0) |
2437 | break; | 2439 | break; |
2438 | 2440 | ||
2439 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | 2441 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); |
2440 | ret = ocfs2_rw_lock(inode, 1); | 2442 | ret = ocfs2_rw_lock(inode, 1); |
2441 | if (ret < 0) | 2443 | if (ret < 0) |
2442 | mlog_errno(ret); | 2444 | mlog_errno(ret); |
2443 | else { | 2445 | else { |
2444 | ret = ocfs2_splice_to_file(pipe, out, &sd); | 2446 | ret = ocfs2_splice_to_file(pipe, out, &sd); |
2445 | ocfs2_rw_unlock(inode, 1); | 2447 | ocfs2_rw_unlock(inode, 1); |
2446 | } | 2448 | } |
2447 | mutex_unlock(&inode->i_mutex); | 2449 | mutex_unlock(&inode->i_mutex); |
2448 | } while (ret > 0); | 2450 | } while (ret > 0); |
2449 | splice_from_pipe_end(pipe, &sd); | 2451 | splice_from_pipe_end(pipe, &sd); |
2450 | 2452 | ||
2451 | if (pipe->inode) | 2453 | if (pipe->inode) |
2452 | mutex_unlock(&pipe->inode->i_mutex); | 2454 | mutex_unlock(&pipe->inode->i_mutex); |
2453 | 2455 | ||
2454 | if (sd.num_spliced) | 2456 | if (sd.num_spliced) |
2455 | ret = sd.num_spliced; | 2457 | ret = sd.num_spliced; |
2456 | 2458 | ||
2457 | if (ret > 0) { | 2459 | if (ret > 0) { |
2458 | unsigned long nr_pages; | 2460 | unsigned long nr_pages; |
2459 | int err; | 2461 | int err; |
2460 | 2462 | ||
2461 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 2463 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
2462 | 2464 | ||
2463 | err = generic_write_sync(out, *ppos, ret); | 2465 | err = generic_write_sync(out, *ppos, ret); |
2464 | if (err) | 2466 | if (err) |
2465 | ret = err; | 2467 | ret = err; |
2466 | else | 2468 | else |
2467 | *ppos += ret; | 2469 | *ppos += ret; |
2468 | 2470 | ||
2469 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | 2471 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); |
2470 | } | 2472 | } |
2471 | 2473 | ||
2472 | return ret; | 2474 | return ret; |
2473 | } | 2475 | } |
2474 | 2476 | ||
2475 | static ssize_t ocfs2_file_splice_read(struct file *in, | 2477 | static ssize_t ocfs2_file_splice_read(struct file *in, |
2476 | loff_t *ppos, | 2478 | loff_t *ppos, |
2477 | struct pipe_inode_info *pipe, | 2479 | struct pipe_inode_info *pipe, |
2478 | size_t len, | 2480 | size_t len, |
2479 | unsigned int flags) | 2481 | unsigned int flags) |
2480 | { | 2482 | { |
2481 | int ret = 0, lock_level = 0; | 2483 | int ret = 0, lock_level = 0; |
2482 | struct inode *inode = in->f_path.dentry->d_inode; | 2484 | struct inode *inode = in->f_path.dentry->d_inode; |
2483 | 2485 | ||
2484 | trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry, | 2486 | trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry, |
2485 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2487 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2486 | in->f_path.dentry->d_name.len, | 2488 | in->f_path.dentry->d_name.len, |
2487 | in->f_path.dentry->d_name.name, len); | 2489 | in->f_path.dentry->d_name.name, len); |
2488 | 2490 | ||
2489 | /* | 2491 | /* |
2490 | * See the comment in ocfs2_file_aio_read() | 2492 | * See the comment in ocfs2_file_aio_read() |
2491 | */ | 2493 | */ |
2492 | ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level); | 2494 | ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level); |
2493 | if (ret < 0) { | 2495 | if (ret < 0) { |
2494 | mlog_errno(ret); | 2496 | mlog_errno(ret); |
2495 | goto bail; | 2497 | goto bail; |
2496 | } | 2498 | } |
2497 | ocfs2_inode_unlock(inode, lock_level); | 2499 | ocfs2_inode_unlock(inode, lock_level); |
2498 | 2500 | ||
2499 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); | 2501 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); |
2500 | 2502 | ||
2501 | bail: | 2503 | bail: |
2502 | return ret; | 2504 | return ret; |
2503 | } | 2505 | } |
2504 | 2506 | ||
2505 | static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | 2507 | static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, |
2506 | const struct iovec *iov, | 2508 | const struct iovec *iov, |
2507 | unsigned long nr_segs, | 2509 | unsigned long nr_segs, |
2508 | loff_t pos) | 2510 | loff_t pos) |
2509 | { | 2511 | { |
2510 | int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; | 2512 | int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; |
2511 | struct file *filp = iocb->ki_filp; | 2513 | struct file *filp = iocb->ki_filp; |
2512 | struct inode *inode = filp->f_path.dentry->d_inode; | 2514 | struct inode *inode = filp->f_path.dentry->d_inode; |
2513 | 2515 | ||
2514 | trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, | 2516 | trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, |
2515 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2517 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2516 | filp->f_path.dentry->d_name.len, | 2518 | filp->f_path.dentry->d_name.len, |
2517 | filp->f_path.dentry->d_name.name, nr_segs); | 2519 | filp->f_path.dentry->d_name.name, nr_segs); |
2518 | 2520 | ||
2519 | 2521 | ||
2520 | if (!inode) { | 2522 | if (!inode) { |
2521 | ret = -EINVAL; | 2523 | ret = -EINVAL; |
2522 | mlog_errno(ret); | 2524 | mlog_errno(ret); |
2523 | goto bail; | 2525 | goto bail; |
2524 | } | 2526 | } |
2525 | 2527 | ||
2526 | ocfs2_iocb_clear_sem_locked(iocb); | 2528 | ocfs2_iocb_clear_sem_locked(iocb); |
2527 | 2529 | ||
2528 | /* | 2530 | /* |
2529 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads | 2531 | * buffered reads protect themselves in ->readpage(). O_DIRECT reads |
2530 | * need locks to protect pending reads from racing with truncate. | 2532 | * need locks to protect pending reads from racing with truncate. |
2531 | */ | 2533 | */ |
2532 | if (filp->f_flags & O_DIRECT) { | 2534 | if (filp->f_flags & O_DIRECT) { |
2533 | have_alloc_sem = 1; | 2535 | have_alloc_sem = 1; |
2534 | atomic_inc(&inode->i_dio_count); | 2536 | atomic_inc(&inode->i_dio_count); |
2535 | ocfs2_iocb_set_sem_locked(iocb); | 2537 | ocfs2_iocb_set_sem_locked(iocb); |
2536 | 2538 | ||
2537 | ret = ocfs2_rw_lock(inode, 0); | 2539 | ret = ocfs2_rw_lock(inode, 0); |
2538 | if (ret < 0) { | 2540 | if (ret < 0) { |
2539 | mlog_errno(ret); | 2541 | mlog_errno(ret); |
2540 | goto bail; | 2542 | goto bail; |
2541 | } | 2543 | } |
2542 | rw_level = 0; | 2544 | rw_level = 0; |
2543 | /* communicate with ocfs2_dio_end_io */ | 2545 | /* communicate with ocfs2_dio_end_io */ |
2544 | ocfs2_iocb_set_rw_locked(iocb, rw_level); | 2546 | ocfs2_iocb_set_rw_locked(iocb, rw_level); |
2545 | } | 2547 | } |
2546 | 2548 | ||
2547 | /* | 2549 | /* |
2548 | * We're fine letting folks race truncates and extending | 2550 | * We're fine letting folks race truncates and extending |
2549 | * writes with read across the cluster, just like they can | 2551 | * writes with read across the cluster, just like they can |
2550 | * locally. Hence no rw_lock during read. | 2552 | * locally. Hence no rw_lock during read. |
2551 | * | 2553 | * |
2552 | * Take and drop the meta data lock to update inode fields | 2554 | * Take and drop the meta data lock to update inode fields |
2553 | * like i_size. This allows the checks down below | 2555 | * like i_size. This allows the checks down below |
2554 | * generic_file_aio_read() a chance of actually working. | 2556 | * generic_file_aio_read() a chance of actually working. |
2555 | */ | 2557 | */ |
2556 | ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); | 2558 | ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); |
2557 | if (ret < 0) { | 2559 | if (ret < 0) { |
2558 | mlog_errno(ret); | 2560 | mlog_errno(ret); |
2559 | goto bail; | 2561 | goto bail; |
2560 | } | 2562 | } |
2561 | ocfs2_inode_unlock(inode, lock_level); | 2563 | ocfs2_inode_unlock(inode, lock_level); |
2562 | 2564 | ||
2563 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); | 2565 | ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); |
2564 | trace_generic_file_aio_read_ret(ret); | 2566 | trace_generic_file_aio_read_ret(ret); |
2565 | 2567 | ||
2566 | /* buffered aio wouldn't have proper lock coverage today */ | 2568 | /* buffered aio wouldn't have proper lock coverage today */ |
2567 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); | 2569 | BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); |
2568 | 2570 | ||
2569 | /* see ocfs2_file_aio_write */ | 2571 | /* see ocfs2_file_aio_write */ |
2570 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { | 2572 | if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { |
2571 | rw_level = -1; | 2573 | rw_level = -1; |
2572 | have_alloc_sem = 0; | 2574 | have_alloc_sem = 0; |
2573 | } | 2575 | } |
2574 | 2576 | ||
2575 | bail: | 2577 | bail: |
2576 | if (have_alloc_sem) { | 2578 | if (have_alloc_sem) { |
2577 | inode_dio_done(inode); | 2579 | inode_dio_done(inode); |
2578 | ocfs2_iocb_clear_sem_locked(iocb); | 2580 | ocfs2_iocb_clear_sem_locked(iocb); |
2579 | } | 2581 | } |
2580 | if (rw_level != -1) | 2582 | if (rw_level != -1) |
2581 | ocfs2_rw_unlock(inode, rw_level); | 2583 | ocfs2_rw_unlock(inode, rw_level); |
2582 | 2584 | ||
2583 | return ret; | 2585 | return ret; |
2584 | } | 2586 | } |
2585 | 2587 | ||
2586 | const struct inode_operations ocfs2_file_iops = { | 2588 | const struct inode_operations ocfs2_file_iops = { |
2587 | .setattr = ocfs2_setattr, | 2589 | .setattr = ocfs2_setattr, |
2588 | .getattr = ocfs2_getattr, | 2590 | .getattr = ocfs2_getattr, |
2589 | .permission = ocfs2_permission, | 2591 | .permission = ocfs2_permission, |
2590 | .setxattr = generic_setxattr, | 2592 | .setxattr = generic_setxattr, |
2591 | .getxattr = generic_getxattr, | 2593 | .getxattr = generic_getxattr, |
2592 | .listxattr = ocfs2_listxattr, | 2594 | .listxattr = ocfs2_listxattr, |
2593 | .removexattr = generic_removexattr, | 2595 | .removexattr = generic_removexattr, |
2594 | .fiemap = ocfs2_fiemap, | 2596 | .fiemap = ocfs2_fiemap, |
2595 | .check_acl = ocfs2_check_acl, | 2597 | .check_acl = ocfs2_check_acl, |
2596 | }; | 2598 | }; |
2597 | 2599 | ||
2598 | const struct inode_operations ocfs2_special_file_iops = { | 2600 | const struct inode_operations ocfs2_special_file_iops = { |
2599 | .setattr = ocfs2_setattr, | 2601 | .setattr = ocfs2_setattr, |
2600 | .getattr = ocfs2_getattr, | 2602 | .getattr = ocfs2_getattr, |
2601 | .permission = ocfs2_permission, | 2603 | .permission = ocfs2_permission, |
2602 | .check_acl = ocfs2_check_acl, | 2604 | .check_acl = ocfs2_check_acl, |
2603 | }; | 2605 | }; |
2604 | 2606 | ||
2605 | /* | 2607 | /* |
2606 | * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with | 2608 | * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with |
2607 | * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! | 2609 | * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! |
2608 | */ | 2610 | */ |
2609 | const struct file_operations ocfs2_fops = { | 2611 | const struct file_operations ocfs2_fops = { |
2610 | .llseek = generic_file_llseek, | 2612 | .llseek = generic_file_llseek, |
2611 | .read = do_sync_read, | 2613 | .read = do_sync_read, |
2612 | .write = do_sync_write, | 2614 | .write = do_sync_write, |
2613 | .mmap = ocfs2_mmap, | 2615 | .mmap = ocfs2_mmap, |
2614 | .fsync = ocfs2_sync_file, | 2616 | .fsync = ocfs2_sync_file, |
2615 | .release = ocfs2_file_release, | 2617 | .release = ocfs2_file_release, |
2616 | .open = ocfs2_file_open, | 2618 | .open = ocfs2_file_open, |
2617 | .aio_read = ocfs2_file_aio_read, | 2619 | .aio_read = ocfs2_file_aio_read, |
2618 | .aio_write = ocfs2_file_aio_write, | 2620 | .aio_write = ocfs2_file_aio_write, |
2619 | .unlocked_ioctl = ocfs2_ioctl, | 2621 | .unlocked_ioctl = ocfs2_ioctl, |
2620 | #ifdef CONFIG_COMPAT | 2622 | #ifdef CONFIG_COMPAT |
2621 | .compat_ioctl = ocfs2_compat_ioctl, | 2623 | .compat_ioctl = ocfs2_compat_ioctl, |
2622 | #endif | 2624 | #endif |
2623 | .lock = ocfs2_lock, | 2625 | .lock = ocfs2_lock, |
2624 | .flock = ocfs2_flock, | 2626 | .flock = ocfs2_flock, |
2625 | .splice_read = ocfs2_file_splice_read, | 2627 | .splice_read = ocfs2_file_splice_read, |
2626 | .splice_write = ocfs2_file_splice_write, | 2628 | .splice_write = ocfs2_file_splice_write, |
2627 | .fallocate = ocfs2_fallocate, | 2629 | .fallocate = ocfs2_fallocate, |
2628 | }; | 2630 | }; |
2629 | 2631 | ||
2630 | const struct file_operations ocfs2_dops = { | 2632 | const struct file_operations ocfs2_dops = { |
2631 | .llseek = generic_file_llseek, | 2633 | .llseek = generic_file_llseek, |
2632 | .read = generic_read_dir, | 2634 | .read = generic_read_dir, |
2633 | .readdir = ocfs2_readdir, | 2635 | .readdir = ocfs2_readdir, |
2634 | .fsync = ocfs2_sync_file, | 2636 | .fsync = ocfs2_sync_file, |
2635 | .release = ocfs2_dir_release, | 2637 | .release = ocfs2_dir_release, |
2636 | .open = ocfs2_dir_open, | 2638 | .open = ocfs2_dir_open, |
2637 | .unlocked_ioctl = ocfs2_ioctl, | 2639 | .unlocked_ioctl = ocfs2_ioctl, |
2638 | #ifdef CONFIG_COMPAT | 2640 | #ifdef CONFIG_COMPAT |
2639 | .compat_ioctl = ocfs2_compat_ioctl, | 2641 | .compat_ioctl = ocfs2_compat_ioctl, |
2640 | #endif | 2642 | #endif |
2641 | .lock = ocfs2_lock, | 2643 | .lock = ocfs2_lock, |
2642 | .flock = ocfs2_flock, | 2644 | .flock = ocfs2_flock, |
2643 | }; | 2645 | }; |
2644 | 2646 | ||
2645 | /* | 2647 | /* |
2646 | * POSIX-lockless variants of our file_operations. | 2648 | * POSIX-lockless variants of our file_operations. |
2647 | * | 2649 | * |
2648 | * These will be used if the underlying cluster stack does not support | 2650 | * These will be used if the underlying cluster stack does not support |
2649 | * posix file locking, if the user passes the "localflocks" mount | 2651 | * posix file locking, if the user passes the "localflocks" mount |
2650 | * option, or if we have a local-only fs. | 2652 | * option, or if we have a local-only fs. |
2651 | * | 2653 | * |
2652 | * ocfs2_flock is in here because all stacks handle UNIX file locks, | 2654 | * ocfs2_flock is in here because all stacks handle UNIX file locks, |
2653 | * so we still want it in the case of no stack support for | 2655 | * so we still want it in the case of no stack support for |
2654 | * plocks. Internally, it will do the right thing when asked to ignore | 2656 | * plocks. Internally, it will do the right thing when asked to ignore |
2655 | * the cluster. | 2657 | * the cluster. |
2656 | */ | 2658 | */ |
2657 | const struct file_operations ocfs2_fops_no_plocks = { | 2659 | const struct file_operations ocfs2_fops_no_plocks = { |
2658 | .llseek = generic_file_llseek, | 2660 | .llseek = generic_file_llseek, |
2659 | .read = do_sync_read, | 2661 | .read = do_sync_read, |
2660 | .write = do_sync_write, | 2662 | .write = do_sync_write, |
2661 | .mmap = ocfs2_mmap, | 2663 | .mmap = ocfs2_mmap, |
2662 | .fsync = ocfs2_sync_file, | 2664 | .fsync = ocfs2_sync_file, |
2663 | .release = ocfs2_file_release, | 2665 | .release = ocfs2_file_release, |
2664 | .open = ocfs2_file_open, | 2666 | .open = ocfs2_file_open, |
2665 | .aio_read = ocfs2_file_aio_read, | 2667 | .aio_read = ocfs2_file_aio_read, |
2666 | .aio_write = ocfs2_file_aio_write, | 2668 | .aio_write = ocfs2_file_aio_write, |
2667 | .unlocked_ioctl = ocfs2_ioctl, | 2669 | .unlocked_ioctl = ocfs2_ioctl, |
2668 | #ifdef CONFIG_COMPAT | 2670 | #ifdef CONFIG_COMPAT |
2669 | .compat_ioctl = ocfs2_compat_ioctl, | 2671 | .compat_ioctl = ocfs2_compat_ioctl, |
2670 | #endif | 2672 | #endif |
2671 | .flock = ocfs2_flock, | 2673 | .flock = ocfs2_flock, |
2672 | .splice_read = ocfs2_file_splice_read, | 2674 | .splice_read = ocfs2_file_splice_read, |
2673 | .splice_write = ocfs2_file_splice_write, | 2675 | .splice_write = ocfs2_file_splice_write, |
2674 | .fallocate = ocfs2_fallocate, | 2676 | .fallocate = ocfs2_fallocate, |
2675 | }; | 2677 | }; |
2676 | 2678 | ||
2677 | const struct file_operations ocfs2_dops_no_plocks = { | 2679 | const struct file_operations ocfs2_dops_no_plocks = { |
2678 | .llseek = generic_file_llseek, | 2680 | .llseek = generic_file_llseek, |
2679 | .read = generic_read_dir, | 2681 | .read = generic_read_dir, |
2680 | .readdir = ocfs2_readdir, | 2682 | .readdir = ocfs2_readdir, |
2681 | .fsync = ocfs2_sync_file, | 2683 | .fsync = ocfs2_sync_file, |
2682 | .release = ocfs2_dir_release, | 2684 | .release = ocfs2_dir_release, |
2683 | .open = ocfs2_dir_open, | 2685 | .open = ocfs2_dir_open, |
2684 | .unlocked_ioctl = ocfs2_ioctl, | 2686 | .unlocked_ioctl = ocfs2_ioctl, |
2685 | #ifdef CONFIG_COMPAT | 2687 | #ifdef CONFIG_COMPAT |
2686 | .compat_ioctl = ocfs2_compat_ioctl, | 2688 | .compat_ioctl = ocfs2_compat_ioctl, |
2687 | #endif | 2689 | #endif |
2688 | .flock = ocfs2_flock, | 2690 | .flock = ocfs2_flock, |
2689 | }; | 2691 | }; |
2690 | 2692 |
fs/reiserfs/inode.c
1 | /* | 1 | /* |
2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README | 2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/time.h> | 5 | #include <linux/time.h> |
6 | #include <linux/fs.h> | 6 | #include <linux/fs.h> |
7 | #include <linux/reiserfs_fs.h> | 7 | #include <linux/reiserfs_fs.h> |
8 | #include <linux/reiserfs_acl.h> | 8 | #include <linux/reiserfs_acl.h> |
9 | #include <linux/reiserfs_xattr.h> | 9 | #include <linux/reiserfs_xattr.h> |
10 | #include <linux/exportfs.h> | 10 | #include <linux/exportfs.h> |
11 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
12 | #include <linux/highmem.h> | 12 | #include <linux/highmem.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <asm/uaccess.h> | 14 | #include <asm/uaccess.h> |
15 | #include <asm/unaligned.h> | 15 | #include <asm/unaligned.h> |
16 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
17 | #include <linux/mpage.h> | 17 | #include <linux/mpage.h> |
18 | #include <linux/writeback.h> | 18 | #include <linux/writeback.h> |
19 | #include <linux/quotaops.h> | 19 | #include <linux/quotaops.h> |
20 | #include <linux/swap.h> | 20 | #include <linux/swap.h> |
21 | 21 | ||
22 | int reiserfs_commit_write(struct file *f, struct page *page, | 22 | int reiserfs_commit_write(struct file *f, struct page *page, |
23 | unsigned from, unsigned to); | 23 | unsigned from, unsigned to); |
24 | 24 | ||
25 | void reiserfs_evict_inode(struct inode *inode) | 25 | void reiserfs_evict_inode(struct inode *inode) |
26 | { | 26 | { |
27 | /* We need blocks for transaction + (user+group) quota update (possibly delete) */ | 27 | /* We need blocks for transaction + (user+group) quota update (possibly delete) */ |
28 | int jbegin_count = | 28 | int jbegin_count = |
29 | JOURNAL_PER_BALANCE_CNT * 2 + | 29 | JOURNAL_PER_BALANCE_CNT * 2 + |
30 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); | 30 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); |
31 | struct reiserfs_transaction_handle th; | 31 | struct reiserfs_transaction_handle th; |
32 | int depth; | 32 | int depth; |
33 | int err; | 33 | int err; |
34 | 34 | ||
35 | if (!inode->i_nlink && !is_bad_inode(inode)) | 35 | if (!inode->i_nlink && !is_bad_inode(inode)) |
36 | dquot_initialize(inode); | 36 | dquot_initialize(inode); |
37 | 37 | ||
38 | truncate_inode_pages(&inode->i_data, 0); | 38 | truncate_inode_pages(&inode->i_data, 0); |
39 | if (inode->i_nlink) | 39 | if (inode->i_nlink) |
40 | goto no_delete; | 40 | goto no_delete; |
41 | 41 | ||
42 | depth = reiserfs_write_lock_once(inode->i_sb); | 42 | depth = reiserfs_write_lock_once(inode->i_sb); |
43 | 43 | ||
44 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ | 44 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ |
45 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ | 45 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ |
46 | reiserfs_delete_xattrs(inode); | 46 | reiserfs_delete_xattrs(inode); |
47 | 47 | ||
48 | if (journal_begin(&th, inode->i_sb, jbegin_count)) | 48 | if (journal_begin(&th, inode->i_sb, jbegin_count)) |
49 | goto out; | 49 | goto out; |
50 | reiserfs_update_inode_transaction(inode); | 50 | reiserfs_update_inode_transaction(inode); |
51 | 51 | ||
52 | reiserfs_discard_prealloc(&th, inode); | 52 | reiserfs_discard_prealloc(&th, inode); |
53 | 53 | ||
54 | err = reiserfs_delete_object(&th, inode); | 54 | err = reiserfs_delete_object(&th, inode); |
55 | 55 | ||
56 | /* Do quota update inside a transaction for journaled quotas. We must do that | 56 | /* Do quota update inside a transaction for journaled quotas. We must do that |
57 | * after delete_object so that quota updates go into the same transaction as | 57 | * after delete_object so that quota updates go into the same transaction as |
58 | * stat data deletion */ | 58 | * stat data deletion */ |
59 | if (!err) | 59 | if (!err) |
60 | dquot_free_inode(inode); | 60 | dquot_free_inode(inode); |
61 | 61 | ||
62 | if (journal_end(&th, inode->i_sb, jbegin_count)) | 62 | if (journal_end(&th, inode->i_sb, jbegin_count)) |
63 | goto out; | 63 | goto out; |
64 | 64 | ||
65 | /* check return value from reiserfs_delete_object after | 65 | /* check return value from reiserfs_delete_object after |
66 | * ending the transaction | 66 | * ending the transaction |
67 | */ | 67 | */ |
68 | if (err) | 68 | if (err) |
69 | goto out; | 69 | goto out; |
70 | 70 | ||
71 | /* all items of file are deleted, so we can remove "save" link */ | 71 | /* all items of file are deleted, so we can remove "save" link */ |
72 | remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything | 72 | remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything |
73 | * about an error here */ | 73 | * about an error here */ |
74 | } else { | 74 | } else { |
75 | /* no object items are in the tree */ | 75 | /* no object items are in the tree */ |
76 | ; | 76 | ; |
77 | } | 77 | } |
78 | out: | 78 | out: |
79 | end_writeback(inode); /* note this must go after the journal_end to prevent deadlock */ | 79 | end_writeback(inode); /* note this must go after the journal_end to prevent deadlock */ |
80 | dquot_drop(inode); | 80 | dquot_drop(inode); |
81 | inode->i_blocks = 0; | 81 | inode->i_blocks = 0; |
82 | reiserfs_write_unlock_once(inode->i_sb, depth); | 82 | reiserfs_write_unlock_once(inode->i_sb, depth); |
83 | return; | 83 | return; |
84 | 84 | ||
85 | no_delete: | 85 | no_delete: |
86 | end_writeback(inode); | 86 | end_writeback(inode); |
87 | dquot_drop(inode); | 87 | dquot_drop(inode); |
88 | } | 88 | } |
89 | 89 | ||
90 | static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, | 90 | static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, |
91 | __u32 objectid, loff_t offset, int type, int length) | 91 | __u32 objectid, loff_t offset, int type, int length) |
92 | { | 92 | { |
93 | key->version = version; | 93 | key->version = version; |
94 | 94 | ||
95 | key->on_disk_key.k_dir_id = dirid; | 95 | key->on_disk_key.k_dir_id = dirid; |
96 | key->on_disk_key.k_objectid = objectid; | 96 | key->on_disk_key.k_objectid = objectid; |
97 | set_cpu_key_k_offset(key, offset); | 97 | set_cpu_key_k_offset(key, offset); |
98 | set_cpu_key_k_type(key, type); | 98 | set_cpu_key_k_type(key, type); |
99 | key->key_length = length; | 99 | key->key_length = length; |
100 | } | 100 | } |
101 | 101 | ||
102 | /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set | 102 | /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set |
103 | offset and type of key */ | 103 | offset and type of key */ |
104 | void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, | 104 | void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, |
105 | int type, int length) | 105 | int type, int length) |
106 | { | 106 | { |
107 | _make_cpu_key(key, get_inode_item_key_version(inode), | 107 | _make_cpu_key(key, get_inode_item_key_version(inode), |
108 | le32_to_cpu(INODE_PKEY(inode)->k_dir_id), | 108 | le32_to_cpu(INODE_PKEY(inode)->k_dir_id), |
109 | le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type, | 109 | le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type, |
110 | length); | 110 | length); |
111 | } | 111 | } |
112 | 112 | ||
113 | // | 113 | // |
114 | // when key is 0, do not set version and short key | 114 | // when key is 0, do not set version and short key |
115 | // | 115 | // |
116 | inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, | 116 | inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, |
117 | int version, | 117 | int version, |
118 | loff_t offset, int type, int length, | 118 | loff_t offset, int type, int length, |
119 | int entry_count /*or ih_free_space */ ) | 119 | int entry_count /*or ih_free_space */ ) |
120 | { | 120 | { |
121 | if (key) { | 121 | if (key) { |
122 | ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id); | 122 | ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id); |
123 | ih->ih_key.k_objectid = | 123 | ih->ih_key.k_objectid = |
124 | cpu_to_le32(key->on_disk_key.k_objectid); | 124 | cpu_to_le32(key->on_disk_key.k_objectid); |
125 | } | 125 | } |
126 | put_ih_version(ih, version); | 126 | put_ih_version(ih, version); |
127 | set_le_ih_k_offset(ih, offset); | 127 | set_le_ih_k_offset(ih, offset); |
128 | set_le_ih_k_type(ih, type); | 128 | set_le_ih_k_type(ih, type); |
129 | put_ih_item_len(ih, length); | 129 | put_ih_item_len(ih, length); |
130 | /* set_ih_free_space (ih, 0); */ | 130 | /* set_ih_free_space (ih, 0); */ |
131 | // for directory items it is entry count, for directs and stat | 131 | // for directory items it is entry count, for directs and stat |
132 | // datas - 0xffff, for indirects - 0 | 132 | // datas - 0xffff, for indirects - 0 |
133 | put_ih_entry_count(ih, entry_count); | 133 | put_ih_entry_count(ih, entry_count); |
134 | } | 134 | } |
135 | 135 | ||
136 | // | 136 | // |
137 | // FIXME: we might cache recently accessed indirect item | 137 | // FIXME: we might cache recently accessed indirect item |
138 | 138 | ||
139 | // Ugh. Not too eager for that.... | 139 | // Ugh. Not too eager for that.... |
140 | // I cut the code until such time as I see a convincing argument (benchmark). | 140 | // I cut the code until such time as I see a convincing argument (benchmark). |
141 | // I don't want a bloated inode struct..., and I don't like code complexity.... | 141 | // I don't want a bloated inode struct..., and I don't like code complexity.... |
142 | 142 | ||
143 | /* cutting the code is fine, since it really isn't in use yet and is easy | 143 | /* cutting the code is fine, since it really isn't in use yet and is easy |
144 | ** to add back in. But, Vladimir has a really good idea here. Think | 144 | ** to add back in. But, Vladimir has a really good idea here. Think |
145 | ** about what happens for reading a file. For each page, | 145 | ** about what happens for reading a file. For each page, |
146 | ** The VFS layer calls reiserfs_readpage, who searches the tree to find | 146 | ** The VFS layer calls reiserfs_readpage, who searches the tree to find |
147 | ** an indirect item. This indirect item has X number of pointers, where | 147 | ** an indirect item. This indirect item has X number of pointers, where |
148 | ** X is a big number if we've done the block allocation right. But, | 148 | ** X is a big number if we've done the block allocation right. But, |
149 | ** we only use one or two of these pointers during each call to readpage, | 149 | ** we only use one or two of these pointers during each call to readpage, |
150 | ** needlessly researching again later on. | 150 | ** needlessly researching again later on. |
151 | ** | 151 | ** |
152 | ** The size of the cache could be dynamic based on the size of the file. | 152 | ** The size of the cache could be dynamic based on the size of the file. |
153 | ** | 153 | ** |
154 | ** I'd also like to see us cache the location the stat data item, since | 154 | ** I'd also like to see us cache the location the stat data item, since |
155 | ** we are needlessly researching for that frequently. | 155 | ** we are needlessly researching for that frequently. |
156 | ** | 156 | ** |
157 | ** --chris | 157 | ** --chris |
158 | */ | 158 | */ |
159 | 159 | ||
160 | /* If this page has a file tail in it, and | 160 | /* If this page has a file tail in it, and |
161 | ** it was read in by get_block_create_0, the page data is valid, | 161 | ** it was read in by get_block_create_0, the page data is valid, |
162 | ** but tail is still sitting in a direct item, and we can't write to | 162 | ** but tail is still sitting in a direct item, and we can't write to |
163 | ** it. So, look through this page, and check all the mapped buffers | 163 | ** it. So, look through this page, and check all the mapped buffers |
164 | ** to make sure they have valid block numbers. Any that don't need | 164 | ** to make sure they have valid block numbers. Any that don't need |
165 | ** to be unmapped, so that __block_write_begin will correctly call | 165 | ** to be unmapped, so that __block_write_begin will correctly call |
166 | ** reiserfs_get_block to convert the tail into an unformatted node | 166 | ** reiserfs_get_block to convert the tail into an unformatted node |
167 | */ | 167 | */ |
168 | static inline void fix_tail_page_for_writing(struct page *page) | 168 | static inline void fix_tail_page_for_writing(struct page *page) |
169 | { | 169 | { |
170 | struct buffer_head *head, *next, *bh; | 170 | struct buffer_head *head, *next, *bh; |
171 | 171 | ||
172 | if (page && page_has_buffers(page)) { | 172 | if (page && page_has_buffers(page)) { |
173 | head = page_buffers(page); | 173 | head = page_buffers(page); |
174 | bh = head; | 174 | bh = head; |
175 | do { | 175 | do { |
176 | next = bh->b_this_page; | 176 | next = bh->b_this_page; |
177 | if (buffer_mapped(bh) && bh->b_blocknr == 0) { | 177 | if (buffer_mapped(bh) && bh->b_blocknr == 0) { |
178 | reiserfs_unmap_buffer(bh); | 178 | reiserfs_unmap_buffer(bh); |
179 | } | 179 | } |
180 | bh = next; | 180 | bh = next; |
181 | } while (bh != head); | 181 | } while (bh != head); |
182 | } | 182 | } |
183 | } | 183 | } |
184 | 184 | ||
185 | /* reiserfs_get_block does not need to allocate a block only if it has been | 185 | /* reiserfs_get_block does not need to allocate a block only if it has been |
186 | done already or non-hole position has been found in the indirect item */ | 186 | done already or non-hole position has been found in the indirect item */ |
187 | static inline int allocation_needed(int retval, b_blocknr_t allocated, | 187 | static inline int allocation_needed(int retval, b_blocknr_t allocated, |
188 | struct item_head *ih, | 188 | struct item_head *ih, |
189 | __le32 * item, int pos_in_item) | 189 | __le32 * item, int pos_in_item) |
190 | { | 190 | { |
191 | if (allocated) | 191 | if (allocated) |
192 | return 0; | 192 | return 0; |
193 | if (retval == POSITION_FOUND && is_indirect_le_ih(ih) && | 193 | if (retval == POSITION_FOUND && is_indirect_le_ih(ih) && |
194 | get_block_num(item, pos_in_item)) | 194 | get_block_num(item, pos_in_item)) |
195 | return 0; | 195 | return 0; |
196 | return 1; | 196 | return 1; |
197 | } | 197 | } |
198 | 198 | ||
199 | static inline int indirect_item_found(int retval, struct item_head *ih) | 199 | static inline int indirect_item_found(int retval, struct item_head *ih) |
200 | { | 200 | { |
201 | return (retval == POSITION_FOUND) && is_indirect_le_ih(ih); | 201 | return (retval == POSITION_FOUND) && is_indirect_le_ih(ih); |
202 | } | 202 | } |
203 | 203 | ||
204 | static inline void set_block_dev_mapped(struct buffer_head *bh, | 204 | static inline void set_block_dev_mapped(struct buffer_head *bh, |
205 | b_blocknr_t block, struct inode *inode) | 205 | b_blocknr_t block, struct inode *inode) |
206 | { | 206 | { |
207 | map_bh(bh, inode->i_sb, block); | 207 | map_bh(bh, inode->i_sb, block); |
208 | } | 208 | } |
209 | 209 | ||
210 | // | 210 | // |
211 | // files which were created in the earlier version can not be longer, | 211 | // files which were created in the earlier version can not be longer, |
212 | // than 2 gb | 212 | // than 2 gb |
213 | // | 213 | // |
214 | static int file_capable(struct inode *inode, sector_t block) | 214 | static int file_capable(struct inode *inode, sector_t block) |
215 | { | 215 | { |
216 | if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. | 216 | if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. |
217 | block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb | 217 | block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb |
218 | return 1; | 218 | return 1; |
219 | 219 | ||
220 | return 0; | 220 | return 0; |
221 | } | 221 | } |
222 | 222 | ||
223 | static int restart_transaction(struct reiserfs_transaction_handle *th, | 223 | static int restart_transaction(struct reiserfs_transaction_handle *th, |
224 | struct inode *inode, struct treepath *path) | 224 | struct inode *inode, struct treepath *path) |
225 | { | 225 | { |
226 | struct super_block *s = th->t_super; | 226 | struct super_block *s = th->t_super; |
227 | int len = th->t_blocks_allocated; | 227 | int len = th->t_blocks_allocated; |
228 | int err; | 228 | int err; |
229 | 229 | ||
230 | BUG_ON(!th->t_trans_id); | 230 | BUG_ON(!th->t_trans_id); |
231 | BUG_ON(!th->t_refcount); | 231 | BUG_ON(!th->t_refcount); |
232 | 232 | ||
233 | pathrelse(path); | 233 | pathrelse(path); |
234 | 234 | ||
235 | /* we cannot restart while nested */ | 235 | /* we cannot restart while nested */ |
236 | if (th->t_refcount > 1) { | 236 | if (th->t_refcount > 1) { |
237 | return 0; | 237 | return 0; |
238 | } | 238 | } |
239 | reiserfs_update_sd(th, inode); | 239 | reiserfs_update_sd(th, inode); |
240 | err = journal_end(th, s, len); | 240 | err = journal_end(th, s, len); |
241 | if (!err) { | 241 | if (!err) { |
242 | err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); | 242 | err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); |
243 | if (!err) | 243 | if (!err) |
244 | reiserfs_update_inode_transaction(inode); | 244 | reiserfs_update_inode_transaction(inode); |
245 | } | 245 | } |
246 | return err; | 246 | return err; |
247 | } | 247 | } |
248 | 248 | ||
249 | // it is called by get_block when create == 0. Returns block number | 249 | // it is called by get_block when create == 0. Returns block number |
250 | // for 'block'-th logical block of file. When it hits direct item it | 250 | // for 'block'-th logical block of file. When it hits direct item it |
251 | // returns 0 (being called from bmap) or read direct item into piece | 251 | // returns 0 (being called from bmap) or read direct item into piece |
252 | // of page (bh_result) | 252 | // of page (bh_result) |
253 | 253 | ||
254 | // Please improve the english/clarity in the comment above, as it is | 254 | // Please improve the english/clarity in the comment above, as it is |
255 | // hard to understand. | 255 | // hard to understand. |
256 | 256 | ||
257 | static int _get_block_create_0(struct inode *inode, sector_t block, | 257 | static int _get_block_create_0(struct inode *inode, sector_t block, |
258 | struct buffer_head *bh_result, int args) | 258 | struct buffer_head *bh_result, int args) |
259 | { | 259 | { |
260 | INITIALIZE_PATH(path); | 260 | INITIALIZE_PATH(path); |
261 | struct cpu_key key; | 261 | struct cpu_key key; |
262 | struct buffer_head *bh; | 262 | struct buffer_head *bh; |
263 | struct item_head *ih, tmp_ih; | 263 | struct item_head *ih, tmp_ih; |
264 | b_blocknr_t blocknr; | 264 | b_blocknr_t blocknr; |
265 | char *p = NULL; | 265 | char *p = NULL; |
266 | int chars; | 266 | int chars; |
267 | int ret; | 267 | int ret; |
268 | int result; | 268 | int result; |
269 | int done = 0; | 269 | int done = 0; |
270 | unsigned long offset; | 270 | unsigned long offset; |
271 | 271 | ||
272 | // prepare the key to look for the 'block'-th block of file | 272 | // prepare the key to look for the 'block'-th block of file |
273 | make_cpu_key(&key, inode, | 273 | make_cpu_key(&key, inode, |
274 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, | 274 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, |
275 | 3); | 275 | 3); |
276 | 276 | ||
277 | result = search_for_position_by_key(inode->i_sb, &key, &path); | 277 | result = search_for_position_by_key(inode->i_sb, &key, &path); |
278 | if (result != POSITION_FOUND) { | 278 | if (result != POSITION_FOUND) { |
279 | pathrelse(&path); | 279 | pathrelse(&path); |
280 | if (p) | 280 | if (p) |
281 | kunmap(bh_result->b_page); | 281 | kunmap(bh_result->b_page); |
282 | if (result == IO_ERROR) | 282 | if (result == IO_ERROR) |
283 | return -EIO; | 283 | return -EIO; |
284 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means | 284 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means |
285 | // That there is some MMAPED data associated with it that is yet to be written to disk. | 285 | // That there is some MMAPED data associated with it that is yet to be written to disk. |
286 | if ((args & GET_BLOCK_NO_HOLE) | 286 | if ((args & GET_BLOCK_NO_HOLE) |
287 | && !PageUptodate(bh_result->b_page)) { | 287 | && !PageUptodate(bh_result->b_page)) { |
288 | return -ENOENT; | 288 | return -ENOENT; |
289 | } | 289 | } |
290 | return 0; | 290 | return 0; |
291 | } | 291 | } |
292 | // | 292 | // |
293 | bh = get_last_bh(&path); | 293 | bh = get_last_bh(&path); |
294 | ih = get_ih(&path); | 294 | ih = get_ih(&path); |
295 | if (is_indirect_le_ih(ih)) { | 295 | if (is_indirect_le_ih(ih)) { |
296 | __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); | 296 | __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); |
297 | 297 | ||
298 | /* FIXME: here we could cache indirect item or part of it in | 298 | /* FIXME: here we could cache indirect item or part of it in |
299 | the inode to avoid search_by_key in case of subsequent | 299 | the inode to avoid search_by_key in case of subsequent |
300 | access to file */ | 300 | access to file */ |
301 | blocknr = get_block_num(ind_item, path.pos_in_item); | 301 | blocknr = get_block_num(ind_item, path.pos_in_item); |
302 | ret = 0; | 302 | ret = 0; |
303 | if (blocknr) { | 303 | if (blocknr) { |
304 | map_bh(bh_result, inode->i_sb, blocknr); | 304 | map_bh(bh_result, inode->i_sb, blocknr); |
305 | if (path.pos_in_item == | 305 | if (path.pos_in_item == |
306 | ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { | 306 | ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { |
307 | set_buffer_boundary(bh_result); | 307 | set_buffer_boundary(bh_result); |
308 | } | 308 | } |
309 | } else | 309 | } else |
310 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means | 310 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means |
311 | // That there is some MMAPED data associated with it that is yet to be written to disk. | 311 | // That there is some MMAPED data associated with it that is yet to be written to disk. |
312 | if ((args & GET_BLOCK_NO_HOLE) | 312 | if ((args & GET_BLOCK_NO_HOLE) |
313 | && !PageUptodate(bh_result->b_page)) { | 313 | && !PageUptodate(bh_result->b_page)) { |
314 | ret = -ENOENT; | 314 | ret = -ENOENT; |
315 | } | 315 | } |
316 | 316 | ||
317 | pathrelse(&path); | 317 | pathrelse(&path); |
318 | if (p) | 318 | if (p) |
319 | kunmap(bh_result->b_page); | 319 | kunmap(bh_result->b_page); |
320 | return ret; | 320 | return ret; |
321 | } | 321 | } |
322 | // requested data are in direct item(s) | 322 | // requested data are in direct item(s) |
323 | if (!(args & GET_BLOCK_READ_DIRECT)) { | 323 | if (!(args & GET_BLOCK_READ_DIRECT)) { |
324 | // we are called by bmap. FIXME: we can not map block of file | 324 | // we are called by bmap. FIXME: we can not map block of file |
325 | // when it is stored in direct item(s) | 325 | // when it is stored in direct item(s) |
326 | pathrelse(&path); | 326 | pathrelse(&path); |
327 | if (p) | 327 | if (p) |
328 | kunmap(bh_result->b_page); | 328 | kunmap(bh_result->b_page); |
329 | return -ENOENT; | 329 | return -ENOENT; |
330 | } | 330 | } |
331 | 331 | ||
332 | /* if we've got a direct item, and the buffer or page was uptodate, | 332 | /* if we've got a direct item, and the buffer or page was uptodate, |
333 | ** we don't want to pull data off disk again. skip to the | 333 | ** we don't want to pull data off disk again. skip to the |
334 | ** end, where we map the buffer and return | 334 | ** end, where we map the buffer and return |
335 | */ | 335 | */ |
336 | if (buffer_uptodate(bh_result)) { | 336 | if (buffer_uptodate(bh_result)) { |
337 | goto finished; | 337 | goto finished; |
338 | } else | 338 | } else |
339 | /* | 339 | /* |
340 | ** grab_tail_page can trigger calls to reiserfs_get_block on up to date | 340 | ** grab_tail_page can trigger calls to reiserfs_get_block on up to date |
341 | ** pages without any buffers. If the page is up to date, we don't want | 341 | ** pages without any buffers. If the page is up to date, we don't want |
342 | ** read old data off disk. Set the up to date bit on the buffer instead | 342 | ** read old data off disk. Set the up to date bit on the buffer instead |
343 | ** and jump to the end | 343 | ** and jump to the end |
344 | */ | 344 | */ |
345 | if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { | 345 | if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { |
346 | set_buffer_uptodate(bh_result); | 346 | set_buffer_uptodate(bh_result); |
347 | goto finished; | 347 | goto finished; |
348 | } | 348 | } |
349 | // read file tail into part of page | 349 | // read file tail into part of page |
350 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); | 350 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); |
351 | copy_item_head(&tmp_ih, ih); | 351 | copy_item_head(&tmp_ih, ih); |
352 | 352 | ||
353 | /* we only want to kmap if we are reading the tail into the page. | 353 | /* we only want to kmap if we are reading the tail into the page. |
354 | ** this is not the common case, so we don't kmap until we are | 354 | ** this is not the common case, so we don't kmap until we are |
355 | ** sure we need to. But, this means the item might move if | 355 | ** sure we need to. But, this means the item might move if |
356 | ** kmap schedules | 356 | ** kmap schedules |
357 | */ | 357 | */ |
358 | if (!p) | 358 | if (!p) |
359 | p = (char *)kmap(bh_result->b_page); | 359 | p = (char *)kmap(bh_result->b_page); |
360 | 360 | ||
361 | p += offset; | 361 | p += offset; |
362 | memset(p, 0, inode->i_sb->s_blocksize); | 362 | memset(p, 0, inode->i_sb->s_blocksize); |
363 | do { | 363 | do { |
364 | if (!is_direct_le_ih(ih)) { | 364 | if (!is_direct_le_ih(ih)) { |
365 | BUG(); | 365 | BUG(); |
366 | } | 366 | } |
367 | /* make sure we don't read more bytes than actually exist in | 367 | /* make sure we don't read more bytes than actually exist in |
368 | ** the file. This can happen in odd cases where i_size isn't | 368 | ** the file. This can happen in odd cases where i_size isn't |
369 | ** correct, and when direct item padding results in a few | 369 | ** correct, and when direct item padding results in a few |
370 | ** extra bytes at the end of the direct item | 370 | ** extra bytes at the end of the direct item |
371 | */ | 371 | */ |
372 | if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) | 372 | if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) |
373 | break; | 373 | break; |
374 | if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { | 374 | if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { |
375 | chars = | 375 | chars = |
376 | inode->i_size - (le_ih_k_offset(ih) - 1) - | 376 | inode->i_size - (le_ih_k_offset(ih) - 1) - |
377 | path.pos_in_item; | 377 | path.pos_in_item; |
378 | done = 1; | 378 | done = 1; |
379 | } else { | 379 | } else { |
380 | chars = ih_item_len(ih) - path.pos_in_item; | 380 | chars = ih_item_len(ih) - path.pos_in_item; |
381 | } | 381 | } |
382 | memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); | 382 | memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); |
383 | 383 | ||
384 | if (done) | 384 | if (done) |
385 | break; | 385 | break; |
386 | 386 | ||
387 | p += chars; | 387 | p += chars; |
388 | 388 | ||
389 | if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) | 389 | if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) |
390 | // we done, if read direct item is not the last item of | 390 | // we done, if read direct item is not the last item of |
391 | // node FIXME: we could try to check right delimiting key | 391 | // node FIXME: we could try to check right delimiting key |
392 | // to see whether direct item continues in the right | 392 | // to see whether direct item continues in the right |
393 | // neighbor or rely on i_size | 393 | // neighbor or rely on i_size |
394 | break; | 394 | break; |
395 | 395 | ||
396 | // update key to look for the next piece | 396 | // update key to look for the next piece |
397 | set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); | 397 | set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); |
398 | result = search_for_position_by_key(inode->i_sb, &key, &path); | 398 | result = search_for_position_by_key(inode->i_sb, &key, &path); |
399 | if (result != POSITION_FOUND) | 399 | if (result != POSITION_FOUND) |
400 | // i/o error most likely | 400 | // i/o error most likely |
401 | break; | 401 | break; |
402 | bh = get_last_bh(&path); | 402 | bh = get_last_bh(&path); |
403 | ih = get_ih(&path); | 403 | ih = get_ih(&path); |
404 | } while (1); | 404 | } while (1); |
405 | 405 | ||
406 | flush_dcache_page(bh_result->b_page); | 406 | flush_dcache_page(bh_result->b_page); |
407 | kunmap(bh_result->b_page); | 407 | kunmap(bh_result->b_page); |
408 | 408 | ||
409 | finished: | 409 | finished: |
410 | pathrelse(&path); | 410 | pathrelse(&path); |
411 | 411 | ||
412 | if (result == IO_ERROR) | 412 | if (result == IO_ERROR) |
413 | return -EIO; | 413 | return -EIO; |
414 | 414 | ||
415 | /* this buffer has valid data, but isn't valid for io. mapping it to | 415 | /* this buffer has valid data, but isn't valid for io. mapping it to |
416 | * block #0 tells the rest of reiserfs it just has a tail in it | 416 | * block #0 tells the rest of reiserfs it just has a tail in it |
417 | */ | 417 | */ |
418 | map_bh(bh_result, inode->i_sb, 0); | 418 | map_bh(bh_result, inode->i_sb, 0); |
419 | set_buffer_uptodate(bh_result); | 419 | set_buffer_uptodate(bh_result); |
420 | return 0; | 420 | return 0; |
421 | } | 421 | } |
422 | 422 | ||
423 | // this is called to create file map. So, _get_block_create_0 will not | 423 | // this is called to create file map. So, _get_block_create_0 will not |
424 | // read direct item | 424 | // read direct item |
425 | static int reiserfs_bmap(struct inode *inode, sector_t block, | 425 | static int reiserfs_bmap(struct inode *inode, sector_t block, |
426 | struct buffer_head *bh_result, int create) | 426 | struct buffer_head *bh_result, int create) |
427 | { | 427 | { |
428 | if (!file_capable(inode, block)) | 428 | if (!file_capable(inode, block)) |
429 | return -EFBIG; | 429 | return -EFBIG; |
430 | 430 | ||
431 | reiserfs_write_lock(inode->i_sb); | 431 | reiserfs_write_lock(inode->i_sb); |
432 | /* do not read the direct item */ | 432 | /* do not read the direct item */ |
433 | _get_block_create_0(inode, block, bh_result, 0); | 433 | _get_block_create_0(inode, block, bh_result, 0); |
434 | reiserfs_write_unlock(inode->i_sb); | 434 | reiserfs_write_unlock(inode->i_sb); |
435 | return 0; | 435 | return 0; |
436 | } | 436 | } |
437 | 437 | ||
438 | /* special version of get_block that is only used by grab_tail_page right | 438 | /* special version of get_block that is only used by grab_tail_page right |
439 | ** now. It is sent to __block_write_begin, and when you try to get a | 439 | ** now. It is sent to __block_write_begin, and when you try to get a |
440 | ** block past the end of the file (or a block from a hole) it returns | 440 | ** block past the end of the file (or a block from a hole) it returns |
441 | ** -ENOENT instead of a valid buffer. __block_write_begin expects to | 441 | ** -ENOENT instead of a valid buffer. __block_write_begin expects to |
442 | ** be able to do i/o on the buffers returned, unless an error value | 442 | ** be able to do i/o on the buffers returned, unless an error value |
443 | ** is also returned. | 443 | ** is also returned. |
444 | ** | 444 | ** |
445 | ** So, this allows __block_write_begin to be used for reading a single block | 445 | ** So, this allows __block_write_begin to be used for reading a single block |
446 | ** in a page. Where it does not produce a valid page for holes, or past the | 446 | ** in a page. Where it does not produce a valid page for holes, or past the |
447 | ** end of the file. This turns out to be exactly what we need for reading | 447 | ** end of the file. This turns out to be exactly what we need for reading |
448 | ** tails for conversion. | 448 | ** tails for conversion. |
449 | ** | 449 | ** |
450 | ** The point of the wrapper is forcing a certain value for create, even | 450 | ** The point of the wrapper is forcing a certain value for create, even |
451 | ** though the VFS layer is calling this function with create==1. If you | 451 | ** though the VFS layer is calling this function with create==1. If you |
452 | ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, | 452 | ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, |
453 | ** don't use this function. | 453 | ** don't use this function. |
454 | */ | 454 | */ |
455 | static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, | 455 | static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, |
456 | struct buffer_head *bh_result, | 456 | struct buffer_head *bh_result, |
457 | int create) | 457 | int create) |
458 | { | 458 | { |
459 | return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); | 459 | return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); |
460 | } | 460 | } |
461 | 461 | ||
462 | /* This is special helper for reiserfs_get_block in case we are executing | 462 | /* This is special helper for reiserfs_get_block in case we are executing |
463 | direct_IO request. */ | 463 | direct_IO request. */ |
464 | static int reiserfs_get_blocks_direct_io(struct inode *inode, | 464 | static int reiserfs_get_blocks_direct_io(struct inode *inode, |
465 | sector_t iblock, | 465 | sector_t iblock, |
466 | struct buffer_head *bh_result, | 466 | struct buffer_head *bh_result, |
467 | int create) | 467 | int create) |
468 | { | 468 | { |
469 | int ret; | 469 | int ret; |
470 | 470 | ||
471 | bh_result->b_page = NULL; | 471 | bh_result->b_page = NULL; |
472 | 472 | ||
473 | /* We set the b_size before reiserfs_get_block call since it is | 473 | /* We set the b_size before reiserfs_get_block call since it is |
474 | referenced in convert_tail_for_hole() that may be called from | 474 | referenced in convert_tail_for_hole() that may be called from |
475 | reiserfs_get_block() */ | 475 | reiserfs_get_block() */ |
476 | bh_result->b_size = (1 << inode->i_blkbits); | 476 | bh_result->b_size = (1 << inode->i_blkbits); |
477 | 477 | ||
478 | ret = reiserfs_get_block(inode, iblock, bh_result, | 478 | ret = reiserfs_get_block(inode, iblock, bh_result, |
479 | create | GET_BLOCK_NO_DANGLE); | 479 | create | GET_BLOCK_NO_DANGLE); |
480 | if (ret) | 480 | if (ret) |
481 | goto out; | 481 | goto out; |
482 | 482 | ||
483 | /* don't allow direct io onto tail pages */ | 483 | /* don't allow direct io onto tail pages */ |
484 | if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { | 484 | if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { |
485 | /* make sure future calls to the direct io funcs for this offset | 485 | /* make sure future calls to the direct io funcs for this offset |
486 | ** in the file fail by unmapping the buffer | 486 | ** in the file fail by unmapping the buffer |
487 | */ | 487 | */ |
488 | clear_buffer_mapped(bh_result); | 488 | clear_buffer_mapped(bh_result); |
489 | ret = -EINVAL; | 489 | ret = -EINVAL; |
490 | } | 490 | } |
491 | /* Possible unpacked tail. Flush the data before pages have | 491 | /* Possible unpacked tail. Flush the data before pages have |
492 | disappeared */ | 492 | disappeared */ |
493 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { | 493 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { |
494 | int err; | 494 | int err; |
495 | 495 | ||
496 | reiserfs_write_lock(inode->i_sb); | 496 | reiserfs_write_lock(inode->i_sb); |
497 | 497 | ||
498 | err = reiserfs_commit_for_inode(inode); | 498 | err = reiserfs_commit_for_inode(inode); |
499 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 499 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
500 | 500 | ||
501 | reiserfs_write_unlock(inode->i_sb); | 501 | reiserfs_write_unlock(inode->i_sb); |
502 | 502 | ||
503 | if (err < 0) | 503 | if (err < 0) |
504 | ret = err; | 504 | ret = err; |
505 | } | 505 | } |
506 | out: | 506 | out: |
507 | return ret; | 507 | return ret; |
508 | } | 508 | } |
509 | 509 | ||
510 | /* | 510 | /* |
511 | ** helper function for when reiserfs_get_block is called for a hole | 511 | ** helper function for when reiserfs_get_block is called for a hole |
512 | ** but the file tail is still in a direct item | 512 | ** but the file tail is still in a direct item |
513 | ** bh_result is the buffer head for the hole | 513 | ** bh_result is the buffer head for the hole |
514 | ** tail_offset is the offset of the start of the tail in the file | 514 | ** tail_offset is the offset of the start of the tail in the file |
515 | ** | 515 | ** |
516 | ** This calls prepare_write, which will start a new transaction | 516 | ** This calls prepare_write, which will start a new transaction |
517 | ** you should not be in a transaction, or have any paths held when you | 517 | ** you should not be in a transaction, or have any paths held when you |
518 | ** call this. | 518 | ** call this. |
519 | */ | 519 | */ |
520 | static int convert_tail_for_hole(struct inode *inode, | 520 | static int convert_tail_for_hole(struct inode *inode, |
521 | struct buffer_head *bh_result, | 521 | struct buffer_head *bh_result, |
522 | loff_t tail_offset) | 522 | loff_t tail_offset) |
523 | { | 523 | { |
524 | unsigned long index; | 524 | unsigned long index; |
525 | unsigned long tail_end; | 525 | unsigned long tail_end; |
526 | unsigned long tail_start; | 526 | unsigned long tail_start; |
527 | struct page *tail_page; | 527 | struct page *tail_page; |
528 | struct page *hole_page = bh_result->b_page; | 528 | struct page *hole_page = bh_result->b_page; |
529 | int retval = 0; | 529 | int retval = 0; |
530 | 530 | ||
531 | if ((tail_offset & (bh_result->b_size - 1)) != 1) | 531 | if ((tail_offset & (bh_result->b_size - 1)) != 1) |
532 | return -EIO; | 532 | return -EIO; |
533 | 533 | ||
534 | /* always try to read until the end of the block */ | 534 | /* always try to read until the end of the block */ |
535 | tail_start = tail_offset & (PAGE_CACHE_SIZE - 1); | 535 | tail_start = tail_offset & (PAGE_CACHE_SIZE - 1); |
536 | tail_end = (tail_start | (bh_result->b_size - 1)) + 1; | 536 | tail_end = (tail_start | (bh_result->b_size - 1)) + 1; |
537 | 537 | ||
538 | index = tail_offset >> PAGE_CACHE_SHIFT; | 538 | index = tail_offset >> PAGE_CACHE_SHIFT; |
539 | /* hole_page can be zero in case of direct_io, we are sure | 539 | /* hole_page can be zero in case of direct_io, we are sure |
540 | that we cannot get here if we write with O_DIRECT into | 540 | that we cannot get here if we write with O_DIRECT into |
541 | tail page */ | 541 | tail page */ |
542 | if (!hole_page || index != hole_page->index) { | 542 | if (!hole_page || index != hole_page->index) { |
543 | tail_page = grab_cache_page(inode->i_mapping, index); | 543 | tail_page = grab_cache_page(inode->i_mapping, index); |
544 | retval = -ENOMEM; | 544 | retval = -ENOMEM; |
545 | if (!tail_page) { | 545 | if (!tail_page) { |
546 | goto out; | 546 | goto out; |
547 | } | 547 | } |
548 | } else { | 548 | } else { |
549 | tail_page = hole_page; | 549 | tail_page = hole_page; |
550 | } | 550 | } |
551 | 551 | ||
552 | /* we don't have to make sure the conversion did not happen while | 552 | /* we don't have to make sure the conversion did not happen while |
553 | ** we were locking the page because anyone that could convert | 553 | ** we were locking the page because anyone that could convert |
554 | ** must first take i_mutex. | 554 | ** must first take i_mutex. |
555 | ** | 555 | ** |
556 | ** We must fix the tail page for writing because it might have buffers | 556 | ** We must fix the tail page for writing because it might have buffers |
557 | ** that are mapped, but have a block number of 0. This indicates tail | 557 | ** that are mapped, but have a block number of 0. This indicates tail |
558 | ** data that has been read directly into the page, and | 558 | ** data that has been read directly into the page, and |
559 | ** __block_write_begin won't trigger a get_block in this case. | 559 | ** __block_write_begin won't trigger a get_block in this case. |
560 | */ | 560 | */ |
561 | fix_tail_page_for_writing(tail_page); | 561 | fix_tail_page_for_writing(tail_page); |
562 | retval = __reiserfs_write_begin(tail_page, tail_start, | 562 | retval = __reiserfs_write_begin(tail_page, tail_start, |
563 | tail_end - tail_start); | 563 | tail_end - tail_start); |
564 | if (retval) | 564 | if (retval) |
565 | goto unlock; | 565 | goto unlock; |
566 | 566 | ||
567 | /* tail conversion might change the data in the page */ | 567 | /* tail conversion might change the data in the page */ |
568 | flush_dcache_page(tail_page); | 568 | flush_dcache_page(tail_page); |
569 | 569 | ||
570 | retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); | 570 | retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); |
571 | 571 | ||
572 | unlock: | 572 | unlock: |
573 | if (tail_page != hole_page) { | 573 | if (tail_page != hole_page) { |
574 | unlock_page(tail_page); | 574 | unlock_page(tail_page); |
575 | page_cache_release(tail_page); | 575 | page_cache_release(tail_page); |
576 | } | 576 | } |
577 | out: | 577 | out: |
578 | return retval; | 578 | return retval; |
579 | } | 579 | } |
580 | 580 | ||
581 | static inline int _allocate_block(struct reiserfs_transaction_handle *th, | 581 | static inline int _allocate_block(struct reiserfs_transaction_handle *th, |
582 | sector_t block, | 582 | sector_t block, |
583 | struct inode *inode, | 583 | struct inode *inode, |
584 | b_blocknr_t * allocated_block_nr, | 584 | b_blocknr_t * allocated_block_nr, |
585 | struct treepath *path, int flags) | 585 | struct treepath *path, int flags) |
586 | { | 586 | { |
587 | BUG_ON(!th->t_trans_id); | 587 | BUG_ON(!th->t_trans_id); |
588 | 588 | ||
589 | #ifdef REISERFS_PREALLOCATE | 589 | #ifdef REISERFS_PREALLOCATE |
590 | if (!(flags & GET_BLOCK_NO_IMUX)) { | 590 | if (!(flags & GET_BLOCK_NO_IMUX)) { |
591 | return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, | 591 | return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, |
592 | path, block); | 592 | path, block); |
593 | } | 593 | } |
594 | #endif | 594 | #endif |
595 | return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path, | 595 | return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path, |
596 | block); | 596 | block); |
597 | } | 597 | } |
598 | 598 | ||
599 | int reiserfs_get_block(struct inode *inode, sector_t block, | 599 | int reiserfs_get_block(struct inode *inode, sector_t block, |
600 | struct buffer_head *bh_result, int create) | 600 | struct buffer_head *bh_result, int create) |
601 | { | 601 | { |
602 | int repeat, retval = 0; | 602 | int repeat, retval = 0; |
603 | b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int | 603 | b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int |
604 | INITIALIZE_PATH(path); | 604 | INITIALIZE_PATH(path); |
605 | int pos_in_item; | 605 | int pos_in_item; |
606 | struct cpu_key key; | 606 | struct cpu_key key; |
607 | struct buffer_head *bh, *unbh = NULL; | 607 | struct buffer_head *bh, *unbh = NULL; |
608 | struct item_head *ih, tmp_ih; | 608 | struct item_head *ih, tmp_ih; |
609 | __le32 *item; | 609 | __le32 *item; |
610 | int done; | 610 | int done; |
611 | int fs_gen; | 611 | int fs_gen; |
612 | int lock_depth; | 612 | int lock_depth; |
613 | struct reiserfs_transaction_handle *th = NULL; | 613 | struct reiserfs_transaction_handle *th = NULL; |
614 | /* space reserved in transaction batch: | 614 | /* space reserved in transaction batch: |
615 | . 3 balancings in direct->indirect conversion | 615 | . 3 balancings in direct->indirect conversion |
616 | . 1 block involved into reiserfs_update_sd() | 616 | . 1 block involved into reiserfs_update_sd() |
617 | XXX in practically impossible worst case direct2indirect() | 617 | XXX in practically impossible worst case direct2indirect() |
618 | can incur (much) more than 3 balancings. | 618 | can incur (much) more than 3 balancings. |
619 | quota update for user, group */ | 619 | quota update for user, group */ |
620 | int jbegin_count = | 620 | int jbegin_count = |
621 | JOURNAL_PER_BALANCE_CNT * 3 + 1 + | 621 | JOURNAL_PER_BALANCE_CNT * 3 + 1 + |
622 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); | 622 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); |
623 | int version; | 623 | int version; |
624 | int dangle = 1; | 624 | int dangle = 1; |
625 | loff_t new_offset = | 625 | loff_t new_offset = |
626 | (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; | 626 | (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; |
627 | 627 | ||
628 | lock_depth = reiserfs_write_lock_once(inode->i_sb); | 628 | lock_depth = reiserfs_write_lock_once(inode->i_sb); |
629 | version = get_inode_item_key_version(inode); | 629 | version = get_inode_item_key_version(inode); |
630 | 630 | ||
631 | if (!file_capable(inode, block)) { | 631 | if (!file_capable(inode, block)) { |
632 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 632 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
633 | return -EFBIG; | 633 | return -EFBIG; |
634 | } | 634 | } |
635 | 635 | ||
636 | /* if !create, we aren't changing the FS, so we don't need to | 636 | /* if !create, we aren't changing the FS, so we don't need to |
637 | ** log anything, so we don't need to start a transaction | 637 | ** log anything, so we don't need to start a transaction |
638 | */ | 638 | */ |
639 | if (!(create & GET_BLOCK_CREATE)) { | 639 | if (!(create & GET_BLOCK_CREATE)) { |
640 | int ret; | 640 | int ret; |
641 | /* find number of block-th logical block of the file */ | 641 | /* find number of block-th logical block of the file */ |
642 | ret = _get_block_create_0(inode, block, bh_result, | 642 | ret = _get_block_create_0(inode, block, bh_result, |
643 | create | GET_BLOCK_READ_DIRECT); | 643 | create | GET_BLOCK_READ_DIRECT); |
644 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 644 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
645 | return ret; | 645 | return ret; |
646 | } | 646 | } |
647 | /* | 647 | /* |
648 | * if we're already in a transaction, make sure to close | 648 | * if we're already in a transaction, make sure to close |
649 | * any new transactions we start in this func | 649 | * any new transactions we start in this func |
650 | */ | 650 | */ |
651 | if ((create & GET_BLOCK_NO_DANGLE) || | 651 | if ((create & GET_BLOCK_NO_DANGLE) || |
652 | reiserfs_transaction_running(inode->i_sb)) | 652 | reiserfs_transaction_running(inode->i_sb)) |
653 | dangle = 0; | 653 | dangle = 0; |
654 | 654 | ||
655 | /* If file is of such a size, that it might have a tail and tails are enabled | 655 | /* If file is of such a size, that it might have a tail and tails are enabled |
656 | ** we should mark it as possibly needing tail packing on close | 656 | ** we should mark it as possibly needing tail packing on close |
657 | */ | 657 | */ |
658 | if ((have_large_tails(inode->i_sb) | 658 | if ((have_large_tails(inode->i_sb) |
659 | && inode->i_size < i_block_size(inode) * 4) | 659 | && inode->i_size < i_block_size(inode) * 4) |
660 | || (have_small_tails(inode->i_sb) | 660 | || (have_small_tails(inode->i_sb) |
661 | && inode->i_size < i_block_size(inode))) | 661 | && inode->i_size < i_block_size(inode))) |
662 | REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; | 662 | REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; |
663 | 663 | ||
664 | /* set the key of the first byte in the 'block'-th block of file */ | 664 | /* set the key of the first byte in the 'block'-th block of file */ |
665 | make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); | 665 | make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); |
666 | if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { | 666 | if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { |
667 | start_trans: | 667 | start_trans: |
668 | th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); | 668 | th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); |
669 | if (!th) { | 669 | if (!th) { |
670 | retval = -ENOMEM; | 670 | retval = -ENOMEM; |
671 | goto failure; | 671 | goto failure; |
672 | } | 672 | } |
673 | reiserfs_update_inode_transaction(inode); | 673 | reiserfs_update_inode_transaction(inode); |
674 | } | 674 | } |
675 | research: | 675 | research: |
676 | 676 | ||
677 | retval = search_for_position_by_key(inode->i_sb, &key, &path); | 677 | retval = search_for_position_by_key(inode->i_sb, &key, &path); |
678 | if (retval == IO_ERROR) { | 678 | if (retval == IO_ERROR) { |
679 | retval = -EIO; | 679 | retval = -EIO; |
680 | goto failure; | 680 | goto failure; |
681 | } | 681 | } |
682 | 682 | ||
683 | bh = get_last_bh(&path); | 683 | bh = get_last_bh(&path); |
684 | ih = get_ih(&path); | 684 | ih = get_ih(&path); |
685 | item = get_item(&path); | 685 | item = get_item(&path); |
686 | pos_in_item = path.pos_in_item; | 686 | pos_in_item = path.pos_in_item; |
687 | 687 | ||
688 | fs_gen = get_generation(inode->i_sb); | 688 | fs_gen = get_generation(inode->i_sb); |
689 | copy_item_head(&tmp_ih, ih); | 689 | copy_item_head(&tmp_ih, ih); |
690 | 690 | ||
691 | if (allocation_needed | 691 | if (allocation_needed |
692 | (retval, allocated_block_nr, ih, item, pos_in_item)) { | 692 | (retval, allocated_block_nr, ih, item, pos_in_item)) { |
693 | /* we have to allocate block for the unformatted node */ | 693 | /* we have to allocate block for the unformatted node */ |
694 | if (!th) { | 694 | if (!th) { |
695 | pathrelse(&path); | 695 | pathrelse(&path); |
696 | goto start_trans; | 696 | goto start_trans; |
697 | } | 697 | } |
698 | 698 | ||
699 | repeat = | 699 | repeat = |
700 | _allocate_block(th, block, inode, &allocated_block_nr, | 700 | _allocate_block(th, block, inode, &allocated_block_nr, |
701 | &path, create); | 701 | &path, create); |
702 | 702 | ||
703 | if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { | 703 | if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { |
704 | /* restart the transaction to give the journal a chance to free | 704 | /* restart the transaction to give the journal a chance to free |
705 | ** some blocks. releases the path, so we have to go back to | 705 | ** some blocks. releases the path, so we have to go back to |
706 | ** research if we succeed on the second try | 706 | ** research if we succeed on the second try |
707 | */ | 707 | */ |
708 | SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; | 708 | SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; |
709 | retval = restart_transaction(th, inode, &path); | 709 | retval = restart_transaction(th, inode, &path); |
710 | if (retval) | 710 | if (retval) |
711 | goto failure; | 711 | goto failure; |
712 | repeat = | 712 | repeat = |
713 | _allocate_block(th, block, inode, | 713 | _allocate_block(th, block, inode, |
714 | &allocated_block_nr, NULL, create); | 714 | &allocated_block_nr, NULL, create); |
715 | 715 | ||
716 | if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { | 716 | if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) { |
717 | goto research; | 717 | goto research; |
718 | } | 718 | } |
719 | if (repeat == QUOTA_EXCEEDED) | 719 | if (repeat == QUOTA_EXCEEDED) |
720 | retval = -EDQUOT; | 720 | retval = -EDQUOT; |
721 | else | 721 | else |
722 | retval = -ENOSPC; | 722 | retval = -ENOSPC; |
723 | goto failure; | 723 | goto failure; |
724 | } | 724 | } |
725 | 725 | ||
726 | if (fs_changed(fs_gen, inode->i_sb) | 726 | if (fs_changed(fs_gen, inode->i_sb) |
727 | && item_moved(&tmp_ih, &path)) { | 727 | && item_moved(&tmp_ih, &path)) { |
728 | goto research; | 728 | goto research; |
729 | } | 729 | } |
730 | } | 730 | } |
731 | 731 | ||
732 | if (indirect_item_found(retval, ih)) { | 732 | if (indirect_item_found(retval, ih)) { |
733 | b_blocknr_t unfm_ptr; | 733 | b_blocknr_t unfm_ptr; |
734 | /* 'block'-th block is in the file already (there is | 734 | /* 'block'-th block is in the file already (there is |
735 | corresponding cell in some indirect item). But it may be | 735 | corresponding cell in some indirect item). But it may be |
736 | zero unformatted node pointer (hole) */ | 736 | zero unformatted node pointer (hole) */ |
737 | unfm_ptr = get_block_num(item, pos_in_item); | 737 | unfm_ptr = get_block_num(item, pos_in_item); |
738 | if (unfm_ptr == 0) { | 738 | if (unfm_ptr == 0) { |
739 | /* use allocated block to plug the hole */ | 739 | /* use allocated block to plug the hole */ |
740 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); | 740 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); |
741 | if (fs_changed(fs_gen, inode->i_sb) | 741 | if (fs_changed(fs_gen, inode->i_sb) |
742 | && item_moved(&tmp_ih, &path)) { | 742 | && item_moved(&tmp_ih, &path)) { |
743 | reiserfs_restore_prepared_buffer(inode->i_sb, | 743 | reiserfs_restore_prepared_buffer(inode->i_sb, |
744 | bh); | 744 | bh); |
745 | goto research; | 745 | goto research; |
746 | } | 746 | } |
747 | set_buffer_new(bh_result); | 747 | set_buffer_new(bh_result); |
748 | if (buffer_dirty(bh_result) | 748 | if (buffer_dirty(bh_result) |
749 | && reiserfs_data_ordered(inode->i_sb)) | 749 | && reiserfs_data_ordered(inode->i_sb)) |
750 | reiserfs_add_ordered_list(inode, bh_result); | 750 | reiserfs_add_ordered_list(inode, bh_result); |
751 | put_block_num(item, pos_in_item, allocated_block_nr); | 751 | put_block_num(item, pos_in_item, allocated_block_nr); |
752 | unfm_ptr = allocated_block_nr; | 752 | unfm_ptr = allocated_block_nr; |
753 | journal_mark_dirty(th, inode->i_sb, bh); | 753 | journal_mark_dirty(th, inode->i_sb, bh); |
754 | reiserfs_update_sd(th, inode); | 754 | reiserfs_update_sd(th, inode); |
755 | } | 755 | } |
756 | set_block_dev_mapped(bh_result, unfm_ptr, inode); | 756 | set_block_dev_mapped(bh_result, unfm_ptr, inode); |
757 | pathrelse(&path); | 757 | pathrelse(&path); |
758 | retval = 0; | 758 | retval = 0; |
759 | if (!dangle && th) | 759 | if (!dangle && th) |
760 | retval = reiserfs_end_persistent_transaction(th); | 760 | retval = reiserfs_end_persistent_transaction(th); |
761 | 761 | ||
762 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 762 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
763 | 763 | ||
764 | /* the item was found, so new blocks were not added to the file | 764 | /* the item was found, so new blocks were not added to the file |
765 | ** there is no need to make sure the inode is updated with this | 765 | ** there is no need to make sure the inode is updated with this |
766 | ** transaction | 766 | ** transaction |
767 | */ | 767 | */ |
768 | return retval; | 768 | return retval; |
769 | } | 769 | } |
770 | 770 | ||
771 | if (!th) { | 771 | if (!th) { |
772 | pathrelse(&path); | 772 | pathrelse(&path); |
773 | goto start_trans; | 773 | goto start_trans; |
774 | } | 774 | } |
775 | 775 | ||
776 | /* desired position is not found or is in the direct item. We have | 776 | /* desired position is not found or is in the direct item. We have |
777 | to append file with holes up to 'block'-th block converting | 777 | to append file with holes up to 'block'-th block converting |
778 | direct items to indirect one if necessary */ | 778 | direct items to indirect one if necessary */ |
779 | done = 0; | 779 | done = 0; |
780 | do { | 780 | do { |
781 | if (is_statdata_le_ih(ih)) { | 781 | if (is_statdata_le_ih(ih)) { |
782 | __le32 unp = 0; | 782 | __le32 unp = 0; |
783 | struct cpu_key tmp_key; | 783 | struct cpu_key tmp_key; |
784 | 784 | ||
785 | /* indirect item has to be inserted */ | 785 | /* indirect item has to be inserted */ |
786 | make_le_item_head(&tmp_ih, &key, version, 1, | 786 | make_le_item_head(&tmp_ih, &key, version, 1, |
787 | TYPE_INDIRECT, UNFM_P_SIZE, | 787 | TYPE_INDIRECT, UNFM_P_SIZE, |
788 | 0 /* free_space */ ); | 788 | 0 /* free_space */ ); |
789 | 789 | ||
790 | if (cpu_key_k_offset(&key) == 1) { | 790 | if (cpu_key_k_offset(&key) == 1) { |
791 | /* we are going to add 'block'-th block to the file. Use | 791 | /* we are going to add 'block'-th block to the file. Use |
792 | allocated block for that */ | 792 | allocated block for that */ |
793 | unp = cpu_to_le32(allocated_block_nr); | 793 | unp = cpu_to_le32(allocated_block_nr); |
794 | set_block_dev_mapped(bh_result, | 794 | set_block_dev_mapped(bh_result, |
795 | allocated_block_nr, inode); | 795 | allocated_block_nr, inode); |
796 | set_buffer_new(bh_result); | 796 | set_buffer_new(bh_result); |
797 | done = 1; | 797 | done = 1; |
798 | } | 798 | } |
799 | tmp_key = key; // ;) | 799 | tmp_key = key; // ;) |
800 | set_cpu_key_k_offset(&tmp_key, 1); | 800 | set_cpu_key_k_offset(&tmp_key, 1); |
801 | PATH_LAST_POSITION(&path)++; | 801 | PATH_LAST_POSITION(&path)++; |
802 | 802 | ||
803 | retval = | 803 | retval = |
804 | reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih, | 804 | reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih, |
805 | inode, (char *)&unp); | 805 | inode, (char *)&unp); |
806 | if (retval) { | 806 | if (retval) { |
807 | reiserfs_free_block(th, inode, | 807 | reiserfs_free_block(th, inode, |
808 | allocated_block_nr, 1); | 808 | allocated_block_nr, 1); |
809 | goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST | 809 | goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST |
810 | } | 810 | } |
811 | //mark_tail_converted (inode); | 811 | //mark_tail_converted (inode); |
812 | } else if (is_direct_le_ih(ih)) { | 812 | } else if (is_direct_le_ih(ih)) { |
813 | /* direct item has to be converted */ | 813 | /* direct item has to be converted */ |
814 | loff_t tail_offset; | 814 | loff_t tail_offset; |
815 | 815 | ||
816 | tail_offset = | 816 | tail_offset = |
817 | ((le_ih_k_offset(ih) - | 817 | ((le_ih_k_offset(ih) - |
818 | 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; | 818 | 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; |
819 | if (tail_offset == cpu_key_k_offset(&key)) { | 819 | if (tail_offset == cpu_key_k_offset(&key)) { |
820 | /* direct item we just found fits into block we have | 820 | /* direct item we just found fits into block we have |
821 | to map. Convert it into unformatted node: use | 821 | to map. Convert it into unformatted node: use |
822 | bh_result for the conversion */ | 822 | bh_result for the conversion */ |
823 | set_block_dev_mapped(bh_result, | 823 | set_block_dev_mapped(bh_result, |
824 | allocated_block_nr, inode); | 824 | allocated_block_nr, inode); |
825 | unbh = bh_result; | 825 | unbh = bh_result; |
826 | done = 1; | 826 | done = 1; |
827 | } else { | 827 | } else { |
828 | /* we have to padd file tail stored in direct item(s) | 828 | /* we have to padd file tail stored in direct item(s) |
829 | up to block size and convert it to unformatted | 829 | up to block size and convert it to unformatted |
830 | node. FIXME: this should also get into page cache */ | 830 | node. FIXME: this should also get into page cache */ |
831 | 831 | ||
832 | pathrelse(&path); | 832 | pathrelse(&path); |
833 | /* | 833 | /* |
834 | * ugly, but we can only end the transaction if | 834 | * ugly, but we can only end the transaction if |
835 | * we aren't nested | 835 | * we aren't nested |
836 | */ | 836 | */ |
837 | BUG_ON(!th->t_refcount); | 837 | BUG_ON(!th->t_refcount); |
838 | if (th->t_refcount == 1) { | 838 | if (th->t_refcount == 1) { |
839 | retval = | 839 | retval = |
840 | reiserfs_end_persistent_transaction | 840 | reiserfs_end_persistent_transaction |
841 | (th); | 841 | (th); |
842 | th = NULL; | 842 | th = NULL; |
843 | if (retval) | 843 | if (retval) |
844 | goto failure; | 844 | goto failure; |
845 | } | 845 | } |
846 | 846 | ||
847 | retval = | 847 | retval = |
848 | convert_tail_for_hole(inode, bh_result, | 848 | convert_tail_for_hole(inode, bh_result, |
849 | tail_offset); | 849 | tail_offset); |
850 | if (retval) { | 850 | if (retval) { |
851 | if (retval != -ENOSPC) | 851 | if (retval != -ENOSPC) |
852 | reiserfs_error(inode->i_sb, | 852 | reiserfs_error(inode->i_sb, |
853 | "clm-6004", | 853 | "clm-6004", |
854 | "convert tail failed " | 854 | "convert tail failed " |
855 | "inode %lu, error %d", | 855 | "inode %lu, error %d", |
856 | inode->i_ino, | 856 | inode->i_ino, |
857 | retval); | 857 | retval); |
858 | if (allocated_block_nr) { | 858 | if (allocated_block_nr) { |
859 | /* the bitmap, the super, and the stat data == 3 */ | 859 | /* the bitmap, the super, and the stat data == 3 */ |
860 | if (!th) | 860 | if (!th) |
861 | th = reiserfs_persistent_transaction(inode->i_sb, 3); | 861 | th = reiserfs_persistent_transaction(inode->i_sb, 3); |
862 | if (th) | 862 | if (th) |
863 | reiserfs_free_block(th, | 863 | reiserfs_free_block(th, |
864 | inode, | 864 | inode, |
865 | allocated_block_nr, | 865 | allocated_block_nr, |
866 | 1); | 866 | 1); |
867 | } | 867 | } |
868 | goto failure; | 868 | goto failure; |
869 | } | 869 | } |
870 | goto research; | 870 | goto research; |
871 | } | 871 | } |
872 | retval = | 872 | retval = |
873 | direct2indirect(th, inode, &path, unbh, | 873 | direct2indirect(th, inode, &path, unbh, |
874 | tail_offset); | 874 | tail_offset); |
875 | if (retval) { | 875 | if (retval) { |
876 | reiserfs_unmap_buffer(unbh); | 876 | reiserfs_unmap_buffer(unbh); |
877 | reiserfs_free_block(th, inode, | 877 | reiserfs_free_block(th, inode, |
878 | allocated_block_nr, 1); | 878 | allocated_block_nr, 1); |
879 | goto failure; | 879 | goto failure; |
880 | } | 880 | } |
881 | /* it is important the set_buffer_uptodate is done after | 881 | /* it is important the set_buffer_uptodate is done after |
882 | ** the direct2indirect. The buffer might contain valid | 882 | ** the direct2indirect. The buffer might contain valid |
883 | ** data newer than the data on disk (read by readpage, changed, | 883 | ** data newer than the data on disk (read by readpage, changed, |
884 | ** and then sent here by writepage). direct2indirect needs | 884 | ** and then sent here by writepage). direct2indirect needs |
885 | ** to know if unbh was already up to date, so it can decide | 885 | ** to know if unbh was already up to date, so it can decide |
886 | ** if the data in unbh needs to be replaced with data from | 886 | ** if the data in unbh needs to be replaced with data from |
887 | ** the disk | 887 | ** the disk |
888 | */ | 888 | */ |
889 | set_buffer_uptodate(unbh); | 889 | set_buffer_uptodate(unbh); |
890 | 890 | ||
891 | /* unbh->b_page == NULL in case of DIRECT_IO request, this means | 891 | /* unbh->b_page == NULL in case of DIRECT_IO request, this means |
892 | buffer will disappear shortly, so it should not be added to | 892 | buffer will disappear shortly, so it should not be added to |
893 | */ | 893 | */ |
894 | if (unbh->b_page) { | 894 | if (unbh->b_page) { |
895 | /* we've converted the tail, so we must | 895 | /* we've converted the tail, so we must |
896 | ** flush unbh before the transaction commits | 896 | ** flush unbh before the transaction commits |
897 | */ | 897 | */ |
898 | reiserfs_add_tail_list(inode, unbh); | 898 | reiserfs_add_tail_list(inode, unbh); |
899 | 899 | ||
900 | /* mark it dirty now to prevent commit_write from adding | 900 | /* mark it dirty now to prevent commit_write from adding |
901 | ** this buffer to the inode's dirty buffer list | 901 | ** this buffer to the inode's dirty buffer list |
902 | */ | 902 | */ |
903 | /* | 903 | /* |
904 | * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). | 904 | * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). |
905 | * It's still atomic, but it sets the page dirty too, | 905 | * It's still atomic, but it sets the page dirty too, |
906 | * which makes it eligible for writeback at any time by the | 906 | * which makes it eligible for writeback at any time by the |
907 | * VM (which was also the case with __mark_buffer_dirty()) | 907 | * VM (which was also the case with __mark_buffer_dirty()) |
908 | */ | 908 | */ |
909 | mark_buffer_dirty(unbh); | 909 | mark_buffer_dirty(unbh); |
910 | } | 910 | } |
911 | } else { | 911 | } else { |
912 | /* append indirect item with holes if needed, when appending | 912 | /* append indirect item with holes if needed, when appending |
913 | pointer to 'block'-th block use block, which is already | 913 | pointer to 'block'-th block use block, which is already |
914 | allocated */ | 914 | allocated */ |
915 | struct cpu_key tmp_key; | 915 | struct cpu_key tmp_key; |
916 | unp_t unf_single = 0; // We use this in case we need to allocate only | 916 | unp_t unf_single = 0; // We use this in case we need to allocate only |
917 | // one block which is a fastpath | 917 | // one block which is a fastpath |
918 | unp_t *un; | 918 | unp_t *un; |
919 | __u64 max_to_insert = | 919 | __u64 max_to_insert = |
920 | MAX_ITEM_LEN(inode->i_sb->s_blocksize) / | 920 | MAX_ITEM_LEN(inode->i_sb->s_blocksize) / |
921 | UNFM_P_SIZE; | 921 | UNFM_P_SIZE; |
922 | __u64 blocks_needed; | 922 | __u64 blocks_needed; |
923 | 923 | ||
924 | RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, | 924 | RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, |
925 | "vs-804: invalid position for append"); | 925 | "vs-804: invalid position for append"); |
926 | /* indirect item has to be appended, set up key of that position */ | 926 | /* indirect item has to be appended, set up key of that position */ |
927 | make_cpu_key(&tmp_key, inode, | 927 | make_cpu_key(&tmp_key, inode, |
928 | le_key_k_offset(version, | 928 | le_key_k_offset(version, |
929 | &(ih->ih_key)) + | 929 | &(ih->ih_key)) + |
930 | op_bytes_number(ih, | 930 | op_bytes_number(ih, |
931 | inode->i_sb->s_blocksize), | 931 | inode->i_sb->s_blocksize), |
932 | //pos_in_item * inode->i_sb->s_blocksize, | 932 | //pos_in_item * inode->i_sb->s_blocksize, |
933 | TYPE_INDIRECT, 3); // key type is unimportant | 933 | TYPE_INDIRECT, 3); // key type is unimportant |
934 | 934 | ||
935 | RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), | 935 | RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), |
936 | "green-805: invalid offset"); | 936 | "green-805: invalid offset"); |
937 | blocks_needed = | 937 | blocks_needed = |
938 | 1 + | 938 | 1 + |
939 | ((cpu_key_k_offset(&key) - | 939 | ((cpu_key_k_offset(&key) - |
940 | cpu_key_k_offset(&tmp_key)) >> inode->i_sb-> | 940 | cpu_key_k_offset(&tmp_key)) >> inode->i_sb-> |
941 | s_blocksize_bits); | 941 | s_blocksize_bits); |
942 | 942 | ||
943 | if (blocks_needed == 1) { | 943 | if (blocks_needed == 1) { |
944 | un = &unf_single; | 944 | un = &unf_single; |
945 | } else { | 945 | } else { |
946 | un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS); | 946 | un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS); |
947 | if (!un) { | 947 | if (!un) { |
948 | un = &unf_single; | 948 | un = &unf_single; |
949 | blocks_needed = 1; | 949 | blocks_needed = 1; |
950 | max_to_insert = 0; | 950 | max_to_insert = 0; |
951 | } | 951 | } |
952 | } | 952 | } |
953 | if (blocks_needed <= max_to_insert) { | 953 | if (blocks_needed <= max_to_insert) { |
954 | /* we are going to add target block to the file. Use allocated | 954 | /* we are going to add target block to the file. Use allocated |
955 | block for that */ | 955 | block for that */ |
956 | un[blocks_needed - 1] = | 956 | un[blocks_needed - 1] = |
957 | cpu_to_le32(allocated_block_nr); | 957 | cpu_to_le32(allocated_block_nr); |
958 | set_block_dev_mapped(bh_result, | 958 | set_block_dev_mapped(bh_result, |
959 | allocated_block_nr, inode); | 959 | allocated_block_nr, inode); |
960 | set_buffer_new(bh_result); | 960 | set_buffer_new(bh_result); |
961 | done = 1; | 961 | done = 1; |
962 | } else { | 962 | } else { |
963 | /* paste hole to the indirect item */ | 963 | /* paste hole to the indirect item */ |
964 | /* If kmalloc failed, max_to_insert becomes zero and it means we | 964 | /* If kmalloc failed, max_to_insert becomes zero and it means we |
965 | only have space for one block */ | 965 | only have space for one block */ |
966 | blocks_needed = | 966 | blocks_needed = |
967 | max_to_insert ? max_to_insert : 1; | 967 | max_to_insert ? max_to_insert : 1; |
968 | } | 968 | } |
969 | retval = | 969 | retval = |
970 | reiserfs_paste_into_item(th, &path, &tmp_key, inode, | 970 | reiserfs_paste_into_item(th, &path, &tmp_key, inode, |
971 | (char *)un, | 971 | (char *)un, |
972 | UNFM_P_SIZE * | 972 | UNFM_P_SIZE * |
973 | blocks_needed); | 973 | blocks_needed); |
974 | 974 | ||
975 | if (blocks_needed != 1) | 975 | if (blocks_needed != 1) |
976 | kfree(un); | 976 | kfree(un); |
977 | 977 | ||
978 | if (retval) { | 978 | if (retval) { |
979 | reiserfs_free_block(th, inode, | 979 | reiserfs_free_block(th, inode, |
980 | allocated_block_nr, 1); | 980 | allocated_block_nr, 1); |
981 | goto failure; | 981 | goto failure; |
982 | } | 982 | } |
983 | if (!done) { | 983 | if (!done) { |
984 | /* We need to mark new file size in case this function will be | 984 | /* We need to mark new file size in case this function will be |
985 | interrupted/aborted later on. And we may do this only for | 985 | interrupted/aborted later on. And we may do this only for |
986 | holes. */ | 986 | holes. */ |
987 | inode->i_size += | 987 | inode->i_size += |
988 | inode->i_sb->s_blocksize * blocks_needed; | 988 | inode->i_sb->s_blocksize * blocks_needed; |
989 | } | 989 | } |
990 | } | 990 | } |
991 | 991 | ||
992 | if (done == 1) | 992 | if (done == 1) |
993 | break; | 993 | break; |
994 | 994 | ||
995 | /* this loop could log more blocks than we had originally asked | 995 | /* this loop could log more blocks than we had originally asked |
996 | ** for. So, we have to allow the transaction to end if it is | 996 | ** for. So, we have to allow the transaction to end if it is |
997 | ** too big or too full. Update the inode so things are | 997 | ** too big or too full. Update the inode so things are |
998 | ** consistent if we crash before the function returns | 998 | ** consistent if we crash before the function returns |
999 | ** | 999 | ** |
1000 | ** release the path so that anybody waiting on the path before | 1000 | ** release the path so that anybody waiting on the path before |
1001 | ** ending their transaction will be able to continue. | 1001 | ** ending their transaction will be able to continue. |
1002 | */ | 1002 | */ |
1003 | if (journal_transaction_should_end(th, th->t_blocks_allocated)) { | 1003 | if (journal_transaction_should_end(th, th->t_blocks_allocated)) { |
1004 | retval = restart_transaction(th, inode, &path); | 1004 | retval = restart_transaction(th, inode, &path); |
1005 | if (retval) | 1005 | if (retval) |
1006 | goto failure; | 1006 | goto failure; |
1007 | } | 1007 | } |
1008 | /* | 1008 | /* |
1009 | * inserting indirect pointers for a hole can take a | 1009 | * inserting indirect pointers for a hole can take a |
1010 | * long time. reschedule if needed and also release the write | 1010 | * long time. reschedule if needed and also release the write |
1011 | * lock for others. | 1011 | * lock for others. |
1012 | */ | 1012 | */ |
1013 | if (need_resched()) { | 1013 | if (need_resched()) { |
1014 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 1014 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
1015 | schedule(); | 1015 | schedule(); |
1016 | lock_depth = reiserfs_write_lock_once(inode->i_sb); | 1016 | lock_depth = reiserfs_write_lock_once(inode->i_sb); |
1017 | } | 1017 | } |
1018 | 1018 | ||
1019 | retval = search_for_position_by_key(inode->i_sb, &key, &path); | 1019 | retval = search_for_position_by_key(inode->i_sb, &key, &path); |
1020 | if (retval == IO_ERROR) { | 1020 | if (retval == IO_ERROR) { |
1021 | retval = -EIO; | 1021 | retval = -EIO; |
1022 | goto failure; | 1022 | goto failure; |
1023 | } | 1023 | } |
1024 | if (retval == POSITION_FOUND) { | 1024 | if (retval == POSITION_FOUND) { |
1025 | reiserfs_warning(inode->i_sb, "vs-825", | 1025 | reiserfs_warning(inode->i_sb, "vs-825", |
1026 | "%K should not be found", &key); | 1026 | "%K should not be found", &key); |
1027 | retval = -EEXIST; | 1027 | retval = -EEXIST; |
1028 | if (allocated_block_nr) | 1028 | if (allocated_block_nr) |
1029 | reiserfs_free_block(th, inode, | 1029 | reiserfs_free_block(th, inode, |
1030 | allocated_block_nr, 1); | 1030 | allocated_block_nr, 1); |
1031 | pathrelse(&path); | 1031 | pathrelse(&path); |
1032 | goto failure; | 1032 | goto failure; |
1033 | } | 1033 | } |
1034 | bh = get_last_bh(&path); | 1034 | bh = get_last_bh(&path); |
1035 | ih = get_ih(&path); | 1035 | ih = get_ih(&path); |
1036 | item = get_item(&path); | 1036 | item = get_item(&path); |
1037 | pos_in_item = path.pos_in_item; | 1037 | pos_in_item = path.pos_in_item; |
1038 | } while (1); | 1038 | } while (1); |
1039 | 1039 | ||
1040 | retval = 0; | 1040 | retval = 0; |
1041 | 1041 | ||
1042 | failure: | 1042 | failure: |
1043 | if (th && (!dangle || (retval && !th->t_trans_id))) { | 1043 | if (th && (!dangle || (retval && !th->t_trans_id))) { |
1044 | int err; | 1044 | int err; |
1045 | if (th->t_trans_id) | 1045 | if (th->t_trans_id) |
1046 | reiserfs_update_sd(th, inode); | 1046 | reiserfs_update_sd(th, inode); |
1047 | err = reiserfs_end_persistent_transaction(th); | 1047 | err = reiserfs_end_persistent_transaction(th); |
1048 | if (err) | 1048 | if (err) |
1049 | retval = err; | 1049 | retval = err; |
1050 | } | 1050 | } |
1051 | 1051 | ||
1052 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 1052 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
1053 | reiserfs_check_path(&path); | 1053 | reiserfs_check_path(&path); |
1054 | return retval; | 1054 | return retval; |
1055 | } | 1055 | } |
1056 | 1056 | ||
1057 | static int | 1057 | static int |
1058 | reiserfs_readpages(struct file *file, struct address_space *mapping, | 1058 | reiserfs_readpages(struct file *file, struct address_space *mapping, |
1059 | struct list_head *pages, unsigned nr_pages) | 1059 | struct list_head *pages, unsigned nr_pages) |
1060 | { | 1060 | { |
1061 | return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); | 1061 | return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); |
1062 | } | 1062 | } |
1063 | 1063 | ||
1064 | /* Compute real number of used bytes by file | 1064 | /* Compute real number of used bytes by file |
1065 | * Following three functions can go away when we'll have enough space in stat item | 1065 | * Following three functions can go away when we'll have enough space in stat item |
1066 | */ | 1066 | */ |
1067 | static int real_space_diff(struct inode *inode, int sd_size) | 1067 | static int real_space_diff(struct inode *inode, int sd_size) |
1068 | { | 1068 | { |
1069 | int bytes; | 1069 | int bytes; |
1070 | loff_t blocksize = inode->i_sb->s_blocksize; | 1070 | loff_t blocksize = inode->i_sb->s_blocksize; |
1071 | 1071 | ||
1072 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) | 1072 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) |
1073 | return sd_size; | 1073 | return sd_size; |
1074 | 1074 | ||
1075 | /* End of file is also in full block with indirect reference, so round | 1075 | /* End of file is also in full block with indirect reference, so round |
1076 | ** up to the next block. | 1076 | ** up to the next block. |
1077 | ** | 1077 | ** |
1078 | ** there is just no way to know if the tail is actually packed | 1078 | ** there is just no way to know if the tail is actually packed |
1079 | ** on the file, so we have to assume it isn't. When we pack the | 1079 | ** on the file, so we have to assume it isn't. When we pack the |
1080 | ** tail, we add 4 bytes to pretend there really is an unformatted | 1080 | ** tail, we add 4 bytes to pretend there really is an unformatted |
1081 | ** node pointer | 1081 | ** node pointer |
1082 | */ | 1082 | */ |
1083 | bytes = | 1083 | bytes = |
1084 | ((inode->i_size + | 1084 | ((inode->i_size + |
1085 | (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + | 1085 | (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + |
1086 | sd_size; | 1086 | sd_size; |
1087 | return bytes; | 1087 | return bytes; |
1088 | } | 1088 | } |
1089 | 1089 | ||
1090 | static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, | 1090 | static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, |
1091 | int sd_size) | 1091 | int sd_size) |
1092 | { | 1092 | { |
1093 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { | 1093 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { |
1094 | return inode->i_size + | 1094 | return inode->i_size + |
1095 | (loff_t) (real_space_diff(inode, sd_size)); | 1095 | (loff_t) (real_space_diff(inode, sd_size)); |
1096 | } | 1096 | } |
1097 | return ((loff_t) real_space_diff(inode, sd_size)) + | 1097 | return ((loff_t) real_space_diff(inode, sd_size)) + |
1098 | (((loff_t) blocks) << 9); | 1098 | (((loff_t) blocks) << 9); |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | /* Compute number of blocks used by file in ReiserFS counting */ | 1101 | /* Compute number of blocks used by file in ReiserFS counting */ |
1102 | static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) | 1102 | static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) |
1103 | { | 1103 | { |
1104 | loff_t bytes = inode_get_bytes(inode); | 1104 | loff_t bytes = inode_get_bytes(inode); |
1105 | loff_t real_space = real_space_diff(inode, sd_size); | 1105 | loff_t real_space = real_space_diff(inode, sd_size); |
1106 | 1106 | ||
1107 | /* keeps fsck and non-quota versions of reiserfs happy */ | 1107 | /* keeps fsck and non-quota versions of reiserfs happy */ |
1108 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { | 1108 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { |
1109 | bytes += (loff_t) 511; | 1109 | bytes += (loff_t) 511; |
1110 | } | 1110 | } |
1111 | 1111 | ||
1112 | /* files from before the quota patch might i_blocks such that | 1112 | /* files from before the quota patch might i_blocks such that |
1113 | ** bytes < real_space. Deal with that here to prevent it from | 1113 | ** bytes < real_space. Deal with that here to prevent it from |
1114 | ** going negative. | 1114 | ** going negative. |
1115 | */ | 1115 | */ |
1116 | if (bytes < real_space) | 1116 | if (bytes < real_space) |
1117 | return 0; | 1117 | return 0; |
1118 | return (bytes - real_space) >> 9; | 1118 | return (bytes - real_space) >> 9; |
1119 | } | 1119 | } |
1120 | 1120 | ||
1121 | // | 1121 | // |
1122 | // BAD: new directories have stat data of new type and all other items | 1122 | // BAD: new directories have stat data of new type and all other items |
1123 | // of old type. Version stored in the inode says about body items, so | 1123 | // of old type. Version stored in the inode says about body items, so |
1124 | // in update_stat_data we can not rely on inode, but have to check | 1124 | // in update_stat_data we can not rely on inode, but have to check |
1125 | // item version directly | 1125 | // item version directly |
1126 | // | 1126 | // |
1127 | 1127 | ||
1128 | // called by read_locked_inode | 1128 | // called by read_locked_inode |
1129 | static void init_inode(struct inode *inode, struct treepath *path) | 1129 | static void init_inode(struct inode *inode, struct treepath *path) |
1130 | { | 1130 | { |
1131 | struct buffer_head *bh; | 1131 | struct buffer_head *bh; |
1132 | struct item_head *ih; | 1132 | struct item_head *ih; |
1133 | __u32 rdev; | 1133 | __u32 rdev; |
1134 | //int version = ITEM_VERSION_1; | 1134 | //int version = ITEM_VERSION_1; |
1135 | 1135 | ||
1136 | bh = PATH_PLAST_BUFFER(path); | 1136 | bh = PATH_PLAST_BUFFER(path); |
1137 | ih = PATH_PITEM_HEAD(path); | 1137 | ih = PATH_PITEM_HEAD(path); |
1138 | 1138 | ||
1139 | copy_key(INODE_PKEY(inode), &(ih->ih_key)); | 1139 | copy_key(INODE_PKEY(inode), &(ih->ih_key)); |
1140 | 1140 | ||
1141 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); | 1141 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); |
1142 | REISERFS_I(inode)->i_flags = 0; | 1142 | REISERFS_I(inode)->i_flags = 0; |
1143 | REISERFS_I(inode)->i_prealloc_block = 0; | 1143 | REISERFS_I(inode)->i_prealloc_block = 0; |
1144 | REISERFS_I(inode)->i_prealloc_count = 0; | 1144 | REISERFS_I(inode)->i_prealloc_count = 0; |
1145 | REISERFS_I(inode)->i_trans_id = 0; | 1145 | REISERFS_I(inode)->i_trans_id = 0; |
1146 | REISERFS_I(inode)->i_jl = NULL; | 1146 | REISERFS_I(inode)->i_jl = NULL; |
1147 | reiserfs_init_xattr_rwsem(inode); | 1147 | reiserfs_init_xattr_rwsem(inode); |
1148 | 1148 | ||
1149 | if (stat_data_v1(ih)) { | 1149 | if (stat_data_v1(ih)) { |
1150 | struct stat_data_v1 *sd = | 1150 | struct stat_data_v1 *sd = |
1151 | (struct stat_data_v1 *)B_I_PITEM(bh, ih); | 1151 | (struct stat_data_v1 *)B_I_PITEM(bh, ih); |
1152 | unsigned long blocks; | 1152 | unsigned long blocks; |
1153 | 1153 | ||
1154 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); | 1154 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); |
1155 | set_inode_sd_version(inode, STAT_DATA_V1); | 1155 | set_inode_sd_version(inode, STAT_DATA_V1); |
1156 | inode->i_mode = sd_v1_mode(sd); | 1156 | inode->i_mode = sd_v1_mode(sd); |
1157 | inode->i_nlink = sd_v1_nlink(sd); | 1157 | inode->i_nlink = sd_v1_nlink(sd); |
1158 | inode->i_uid = sd_v1_uid(sd); | 1158 | inode->i_uid = sd_v1_uid(sd); |
1159 | inode->i_gid = sd_v1_gid(sd); | 1159 | inode->i_gid = sd_v1_gid(sd); |
1160 | inode->i_size = sd_v1_size(sd); | 1160 | inode->i_size = sd_v1_size(sd); |
1161 | inode->i_atime.tv_sec = sd_v1_atime(sd); | 1161 | inode->i_atime.tv_sec = sd_v1_atime(sd); |
1162 | inode->i_mtime.tv_sec = sd_v1_mtime(sd); | 1162 | inode->i_mtime.tv_sec = sd_v1_mtime(sd); |
1163 | inode->i_ctime.tv_sec = sd_v1_ctime(sd); | 1163 | inode->i_ctime.tv_sec = sd_v1_ctime(sd); |
1164 | inode->i_atime.tv_nsec = 0; | 1164 | inode->i_atime.tv_nsec = 0; |
1165 | inode->i_ctime.tv_nsec = 0; | 1165 | inode->i_ctime.tv_nsec = 0; |
1166 | inode->i_mtime.tv_nsec = 0; | 1166 | inode->i_mtime.tv_nsec = 0; |
1167 | 1167 | ||
1168 | inode->i_blocks = sd_v1_blocks(sd); | 1168 | inode->i_blocks = sd_v1_blocks(sd); |
1169 | inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1169 | inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1170 | blocks = (inode->i_size + 511) >> 9; | 1170 | blocks = (inode->i_size + 511) >> 9; |
1171 | blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); | 1171 | blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); |
1172 | if (inode->i_blocks > blocks) { | 1172 | if (inode->i_blocks > blocks) { |
1173 | // there was a bug in <=3.5.23 when i_blocks could take negative | 1173 | // there was a bug in <=3.5.23 when i_blocks could take negative |
1174 | // values. Starting from 3.5.17 this value could even be stored in | 1174 | // values. Starting from 3.5.17 this value could even be stored in |
1175 | // stat data. For such files we set i_blocks based on file | 1175 | // stat data. For such files we set i_blocks based on file |
1176 | // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be | 1176 | // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be |
1177 | // only updated if file's inode will ever change | 1177 | // only updated if file's inode will ever change |
1178 | inode->i_blocks = blocks; | 1178 | inode->i_blocks = blocks; |
1179 | } | 1179 | } |
1180 | 1180 | ||
1181 | rdev = sd_v1_rdev(sd); | 1181 | rdev = sd_v1_rdev(sd); |
1182 | REISERFS_I(inode)->i_first_direct_byte = | 1182 | REISERFS_I(inode)->i_first_direct_byte = |
1183 | sd_v1_first_direct_byte(sd); | 1183 | sd_v1_first_direct_byte(sd); |
1184 | /* an early bug in the quota code can give us an odd number for the | 1184 | /* an early bug in the quota code can give us an odd number for the |
1185 | ** block count. This is incorrect, fix it here. | 1185 | ** block count. This is incorrect, fix it here. |
1186 | */ | 1186 | */ |
1187 | if (inode->i_blocks & 1) { | 1187 | if (inode->i_blocks & 1) { |
1188 | inode->i_blocks++; | 1188 | inode->i_blocks++; |
1189 | } | 1189 | } |
1190 | inode_set_bytes(inode, | 1190 | inode_set_bytes(inode, |
1191 | to_real_used_space(inode, inode->i_blocks, | 1191 | to_real_used_space(inode, inode->i_blocks, |
1192 | SD_V1_SIZE)); | 1192 | SD_V1_SIZE)); |
1193 | /* nopack is initially zero for v1 objects. For v2 objects, | 1193 | /* nopack is initially zero for v1 objects. For v2 objects, |
1194 | nopack is initialised from sd_attrs */ | 1194 | nopack is initialised from sd_attrs */ |
1195 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; | 1195 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; |
1196 | } else { | 1196 | } else { |
1197 | // new stat data found, but object may have old items | 1197 | // new stat data found, but object may have old items |
1198 | // (directories and symlinks) | 1198 | // (directories and symlinks) |
1199 | struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); | 1199 | struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); |
1200 | 1200 | ||
1201 | inode->i_mode = sd_v2_mode(sd); | 1201 | inode->i_mode = sd_v2_mode(sd); |
1202 | inode->i_nlink = sd_v2_nlink(sd); | 1202 | inode->i_nlink = sd_v2_nlink(sd); |
1203 | inode->i_uid = sd_v2_uid(sd); | 1203 | inode->i_uid = sd_v2_uid(sd); |
1204 | inode->i_size = sd_v2_size(sd); | 1204 | inode->i_size = sd_v2_size(sd); |
1205 | inode->i_gid = sd_v2_gid(sd); | 1205 | inode->i_gid = sd_v2_gid(sd); |
1206 | inode->i_mtime.tv_sec = sd_v2_mtime(sd); | 1206 | inode->i_mtime.tv_sec = sd_v2_mtime(sd); |
1207 | inode->i_atime.tv_sec = sd_v2_atime(sd); | 1207 | inode->i_atime.tv_sec = sd_v2_atime(sd); |
1208 | inode->i_ctime.tv_sec = sd_v2_ctime(sd); | 1208 | inode->i_ctime.tv_sec = sd_v2_ctime(sd); |
1209 | inode->i_ctime.tv_nsec = 0; | 1209 | inode->i_ctime.tv_nsec = 0; |
1210 | inode->i_mtime.tv_nsec = 0; | 1210 | inode->i_mtime.tv_nsec = 0; |
1211 | inode->i_atime.tv_nsec = 0; | 1211 | inode->i_atime.tv_nsec = 0; |
1212 | inode->i_blocks = sd_v2_blocks(sd); | 1212 | inode->i_blocks = sd_v2_blocks(sd); |
1213 | rdev = sd_v2_rdev(sd); | 1213 | rdev = sd_v2_rdev(sd); |
1214 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | 1214 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) |
1215 | inode->i_generation = | 1215 | inode->i_generation = |
1216 | le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1216 | le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1217 | else | 1217 | else |
1218 | inode->i_generation = sd_v2_generation(sd); | 1218 | inode->i_generation = sd_v2_generation(sd); |
1219 | 1219 | ||
1220 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 1220 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
1221 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); | 1221 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); |
1222 | else | 1222 | else |
1223 | set_inode_item_key_version(inode, KEY_FORMAT_3_6); | 1223 | set_inode_item_key_version(inode, KEY_FORMAT_3_6); |
1224 | REISERFS_I(inode)->i_first_direct_byte = 0; | 1224 | REISERFS_I(inode)->i_first_direct_byte = 0; |
1225 | set_inode_sd_version(inode, STAT_DATA_V2); | 1225 | set_inode_sd_version(inode, STAT_DATA_V2); |
1226 | inode_set_bytes(inode, | 1226 | inode_set_bytes(inode, |
1227 | to_real_used_space(inode, inode->i_blocks, | 1227 | to_real_used_space(inode, inode->i_blocks, |
1228 | SD_V2_SIZE)); | 1228 | SD_V2_SIZE)); |
1229 | /* read persistent inode attributes from sd and initialise | 1229 | /* read persistent inode attributes from sd and initialise |
1230 | generic inode flags from them */ | 1230 | generic inode flags from them */ |
1231 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); | 1231 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); |
1232 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); | 1232 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); |
1233 | } | 1233 | } |
1234 | 1234 | ||
1235 | pathrelse(path); | 1235 | pathrelse(path); |
1236 | if (S_ISREG(inode->i_mode)) { | 1236 | if (S_ISREG(inode->i_mode)) { |
1237 | inode->i_op = &reiserfs_file_inode_operations; | 1237 | inode->i_op = &reiserfs_file_inode_operations; |
1238 | inode->i_fop = &reiserfs_file_operations; | 1238 | inode->i_fop = &reiserfs_file_operations; |
1239 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; | 1239 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; |
1240 | } else if (S_ISDIR(inode->i_mode)) { | 1240 | } else if (S_ISDIR(inode->i_mode)) { |
1241 | inode->i_op = &reiserfs_dir_inode_operations; | 1241 | inode->i_op = &reiserfs_dir_inode_operations; |
1242 | inode->i_fop = &reiserfs_dir_operations; | 1242 | inode->i_fop = &reiserfs_dir_operations; |
1243 | } else if (S_ISLNK(inode->i_mode)) { | 1243 | } else if (S_ISLNK(inode->i_mode)) { |
1244 | inode->i_op = &reiserfs_symlink_inode_operations; | 1244 | inode->i_op = &reiserfs_symlink_inode_operations; |
1245 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; | 1245 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; |
1246 | } else { | 1246 | } else { |
1247 | inode->i_blocks = 0; | 1247 | inode->i_blocks = 0; |
1248 | inode->i_op = &reiserfs_special_inode_operations; | 1248 | inode->i_op = &reiserfs_special_inode_operations; |
1249 | init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); | 1249 | init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); |
1250 | } | 1250 | } |
1251 | } | 1251 | } |
1252 | 1252 | ||
1253 | // update new stat data with inode fields | 1253 | // update new stat data with inode fields |
1254 | static void inode2sd(void *sd, struct inode *inode, loff_t size) | 1254 | static void inode2sd(void *sd, struct inode *inode, loff_t size) |
1255 | { | 1255 | { |
1256 | struct stat_data *sd_v2 = (struct stat_data *)sd; | 1256 | struct stat_data *sd_v2 = (struct stat_data *)sd; |
1257 | __u16 flags; | 1257 | __u16 flags; |
1258 | 1258 | ||
1259 | set_sd_v2_mode(sd_v2, inode->i_mode); | 1259 | set_sd_v2_mode(sd_v2, inode->i_mode); |
1260 | set_sd_v2_nlink(sd_v2, inode->i_nlink); | 1260 | set_sd_v2_nlink(sd_v2, inode->i_nlink); |
1261 | set_sd_v2_uid(sd_v2, inode->i_uid); | 1261 | set_sd_v2_uid(sd_v2, inode->i_uid); |
1262 | set_sd_v2_size(sd_v2, size); | 1262 | set_sd_v2_size(sd_v2, size); |
1263 | set_sd_v2_gid(sd_v2, inode->i_gid); | 1263 | set_sd_v2_gid(sd_v2, inode->i_gid); |
1264 | set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); | 1264 | set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); |
1265 | set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); | 1265 | set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); |
1266 | set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); | 1266 | set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); |
1267 | set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); | 1267 | set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); |
1268 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | 1268 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) |
1269 | set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); | 1269 | set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); |
1270 | else | 1270 | else |
1271 | set_sd_v2_generation(sd_v2, inode->i_generation); | 1271 | set_sd_v2_generation(sd_v2, inode->i_generation); |
1272 | flags = REISERFS_I(inode)->i_attrs; | 1272 | flags = REISERFS_I(inode)->i_attrs; |
1273 | i_attrs_to_sd_attrs(inode, &flags); | 1273 | i_attrs_to_sd_attrs(inode, &flags); |
1274 | set_sd_v2_attrs(sd_v2, flags); | 1274 | set_sd_v2_attrs(sd_v2, flags); |
1275 | } | 1275 | } |
1276 | 1276 | ||
1277 | // used to copy inode's fields to old stat data | 1277 | // used to copy inode's fields to old stat data |
1278 | static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) | 1278 | static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) |
1279 | { | 1279 | { |
1280 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; | 1280 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; |
1281 | 1281 | ||
1282 | set_sd_v1_mode(sd_v1, inode->i_mode); | 1282 | set_sd_v1_mode(sd_v1, inode->i_mode); |
1283 | set_sd_v1_uid(sd_v1, inode->i_uid); | 1283 | set_sd_v1_uid(sd_v1, inode->i_uid); |
1284 | set_sd_v1_gid(sd_v1, inode->i_gid); | 1284 | set_sd_v1_gid(sd_v1, inode->i_gid); |
1285 | set_sd_v1_nlink(sd_v1, inode->i_nlink); | 1285 | set_sd_v1_nlink(sd_v1, inode->i_nlink); |
1286 | set_sd_v1_size(sd_v1, size); | 1286 | set_sd_v1_size(sd_v1, size); |
1287 | set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); | 1287 | set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); |
1288 | set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec); | 1288 | set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec); |
1289 | set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec); | 1289 | set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec); |
1290 | 1290 | ||
1291 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | 1291 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) |
1292 | set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); | 1292 | set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); |
1293 | else | 1293 | else |
1294 | set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); | 1294 | set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); |
1295 | 1295 | ||
1296 | // Sigh. i_first_direct_byte is back | 1296 | // Sigh. i_first_direct_byte is back |
1297 | set_sd_v1_first_direct_byte(sd_v1, | 1297 | set_sd_v1_first_direct_byte(sd_v1, |
1298 | REISERFS_I(inode)->i_first_direct_byte); | 1298 | REISERFS_I(inode)->i_first_direct_byte); |
1299 | } | 1299 | } |
1300 | 1300 | ||
1301 | /* NOTE, you must prepare the buffer head before sending it here, | 1301 | /* NOTE, you must prepare the buffer head before sending it here, |
1302 | ** and then log it after the call | 1302 | ** and then log it after the call |
1303 | */ | 1303 | */ |
1304 | static void update_stat_data(struct treepath *path, struct inode *inode, | 1304 | static void update_stat_data(struct treepath *path, struct inode *inode, |
1305 | loff_t size) | 1305 | loff_t size) |
1306 | { | 1306 | { |
1307 | struct buffer_head *bh; | 1307 | struct buffer_head *bh; |
1308 | struct item_head *ih; | 1308 | struct item_head *ih; |
1309 | 1309 | ||
1310 | bh = PATH_PLAST_BUFFER(path); | 1310 | bh = PATH_PLAST_BUFFER(path); |
1311 | ih = PATH_PITEM_HEAD(path); | 1311 | ih = PATH_PITEM_HEAD(path); |
1312 | 1312 | ||
1313 | if (!is_statdata_le_ih(ih)) | 1313 | if (!is_statdata_le_ih(ih)) |
1314 | reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", | 1314 | reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", |
1315 | INODE_PKEY(inode), ih); | 1315 | INODE_PKEY(inode), ih); |
1316 | 1316 | ||
1317 | if (stat_data_v1(ih)) { | 1317 | if (stat_data_v1(ih)) { |
1318 | // path points to old stat data | 1318 | // path points to old stat data |
1319 | inode2sd_v1(B_I_PITEM(bh, ih), inode, size); | 1319 | inode2sd_v1(B_I_PITEM(bh, ih), inode, size); |
1320 | } else { | 1320 | } else { |
1321 | inode2sd(B_I_PITEM(bh, ih), inode, size); | 1321 | inode2sd(B_I_PITEM(bh, ih), inode, size); |
1322 | } | 1322 | } |
1323 | 1323 | ||
1324 | return; | 1324 | return; |
1325 | } | 1325 | } |
1326 | 1326 | ||
1327 | void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, | 1327 | void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, |
1328 | struct inode *inode, loff_t size) | 1328 | struct inode *inode, loff_t size) |
1329 | { | 1329 | { |
1330 | struct cpu_key key; | 1330 | struct cpu_key key; |
1331 | INITIALIZE_PATH(path); | 1331 | INITIALIZE_PATH(path); |
1332 | struct buffer_head *bh; | 1332 | struct buffer_head *bh; |
1333 | int fs_gen; | 1333 | int fs_gen; |
1334 | struct item_head *ih, tmp_ih; | 1334 | struct item_head *ih, tmp_ih; |
1335 | int retval; | 1335 | int retval; |
1336 | 1336 | ||
1337 | BUG_ON(!th->t_trans_id); | 1337 | BUG_ON(!th->t_trans_id); |
1338 | 1338 | ||
1339 | make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant | 1339 | make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant |
1340 | 1340 | ||
1341 | for (;;) { | 1341 | for (;;) { |
1342 | int pos; | 1342 | int pos; |
1343 | /* look for the object's stat data */ | 1343 | /* look for the object's stat data */ |
1344 | retval = search_item(inode->i_sb, &key, &path); | 1344 | retval = search_item(inode->i_sb, &key, &path); |
1345 | if (retval == IO_ERROR) { | 1345 | if (retval == IO_ERROR) { |
1346 | reiserfs_error(inode->i_sb, "vs-13050", | 1346 | reiserfs_error(inode->i_sb, "vs-13050", |
1347 | "i/o failure occurred trying to " | 1347 | "i/o failure occurred trying to " |
1348 | "update %K stat data", &key); | 1348 | "update %K stat data", &key); |
1349 | return; | 1349 | return; |
1350 | } | 1350 | } |
1351 | if (retval == ITEM_NOT_FOUND) { | 1351 | if (retval == ITEM_NOT_FOUND) { |
1352 | pos = PATH_LAST_POSITION(&path); | 1352 | pos = PATH_LAST_POSITION(&path); |
1353 | pathrelse(&path); | 1353 | pathrelse(&path); |
1354 | if (inode->i_nlink == 0) { | 1354 | if (inode->i_nlink == 0) { |
1355 | /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ | 1355 | /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */ |
1356 | return; | 1356 | return; |
1357 | } | 1357 | } |
1358 | reiserfs_warning(inode->i_sb, "vs-13060", | 1358 | reiserfs_warning(inode->i_sb, "vs-13060", |
1359 | "stat data of object %k (nlink == %d) " | 1359 | "stat data of object %k (nlink == %d) " |
1360 | "not found (pos %d)", | 1360 | "not found (pos %d)", |
1361 | INODE_PKEY(inode), inode->i_nlink, | 1361 | INODE_PKEY(inode), inode->i_nlink, |
1362 | pos); | 1362 | pos); |
1363 | reiserfs_check_path(&path); | 1363 | reiserfs_check_path(&path); |
1364 | return; | 1364 | return; |
1365 | } | 1365 | } |
1366 | 1366 | ||
1367 | /* sigh, prepare_for_journal might schedule. When it schedules the | 1367 | /* sigh, prepare_for_journal might schedule. When it schedules the |
1368 | ** FS might change. We have to detect that, and loop back to the | 1368 | ** FS might change. We have to detect that, and loop back to the |
1369 | ** search if the stat data item has moved | 1369 | ** search if the stat data item has moved |
1370 | */ | 1370 | */ |
1371 | bh = get_last_bh(&path); | 1371 | bh = get_last_bh(&path); |
1372 | ih = get_ih(&path); | 1372 | ih = get_ih(&path); |
1373 | copy_item_head(&tmp_ih, ih); | 1373 | copy_item_head(&tmp_ih, ih); |
1374 | fs_gen = get_generation(inode->i_sb); | 1374 | fs_gen = get_generation(inode->i_sb); |
1375 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); | 1375 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); |
1376 | if (fs_changed(fs_gen, inode->i_sb) | 1376 | if (fs_changed(fs_gen, inode->i_sb) |
1377 | && item_moved(&tmp_ih, &path)) { | 1377 | && item_moved(&tmp_ih, &path)) { |
1378 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); | 1378 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); |
1379 | continue; /* Stat_data item has been moved after scheduling. */ | 1379 | continue; /* Stat_data item has been moved after scheduling. */ |
1380 | } | 1380 | } |
1381 | break; | 1381 | break; |
1382 | } | 1382 | } |
1383 | update_stat_data(&path, inode, size); | 1383 | update_stat_data(&path, inode, size); |
1384 | journal_mark_dirty(th, th->t_super, bh); | 1384 | journal_mark_dirty(th, th->t_super, bh); |
1385 | pathrelse(&path); | 1385 | pathrelse(&path); |
1386 | return; | 1386 | return; |
1387 | } | 1387 | } |
1388 | 1388 | ||
1389 | /* reiserfs_read_locked_inode is called to read the inode off disk, and it | 1389 | /* reiserfs_read_locked_inode is called to read the inode off disk, and it |
1390 | ** does a make_bad_inode when things go wrong. But, we need to make sure | 1390 | ** does a make_bad_inode when things go wrong. But, we need to make sure |
1391 | ** and clear the key in the private portion of the inode, otherwise a | 1391 | ** and clear the key in the private portion of the inode, otherwise a |
1392 | ** corresponding iput might try to delete whatever object the inode last | 1392 | ** corresponding iput might try to delete whatever object the inode last |
1393 | ** represented. | 1393 | ** represented. |
1394 | */ | 1394 | */ |
1395 | static void reiserfs_make_bad_inode(struct inode *inode) | 1395 | static void reiserfs_make_bad_inode(struct inode *inode) |
1396 | { | 1396 | { |
1397 | memset(INODE_PKEY(inode), 0, KEY_SIZE); | 1397 | memset(INODE_PKEY(inode), 0, KEY_SIZE); |
1398 | make_bad_inode(inode); | 1398 | make_bad_inode(inode); |
1399 | } | 1399 | } |
1400 | 1400 | ||
1401 | // | 1401 | // |
1402 | // initially this function was derived from minix or ext2's analog and | 1402 | // initially this function was derived from minix or ext2's analog and |
1403 | // evolved as the prototype did | 1403 | // evolved as the prototype did |
1404 | // | 1404 | // |
1405 | 1405 | ||
1406 | int reiserfs_init_locked_inode(struct inode *inode, void *p) | 1406 | int reiserfs_init_locked_inode(struct inode *inode, void *p) |
1407 | { | 1407 | { |
1408 | struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; | 1408 | struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; |
1409 | inode->i_ino = args->objectid; | 1409 | inode->i_ino = args->objectid; |
1410 | INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); | 1410 | INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); |
1411 | return 0; | 1411 | return 0; |
1412 | } | 1412 | } |
1413 | 1413 | ||
1414 | /* looks for stat data in the tree, and fills up the fields of in-core | 1414 | /* looks for stat data in the tree, and fills up the fields of in-core |
1415 | inode stat data fields */ | 1415 | inode stat data fields */ |
1416 | void reiserfs_read_locked_inode(struct inode *inode, | 1416 | void reiserfs_read_locked_inode(struct inode *inode, |
1417 | struct reiserfs_iget_args *args) | 1417 | struct reiserfs_iget_args *args) |
1418 | { | 1418 | { |
1419 | INITIALIZE_PATH(path_to_sd); | 1419 | INITIALIZE_PATH(path_to_sd); |
1420 | struct cpu_key key; | 1420 | struct cpu_key key; |
1421 | unsigned long dirino; | 1421 | unsigned long dirino; |
1422 | int retval; | 1422 | int retval; |
1423 | 1423 | ||
1424 | dirino = args->dirid; | 1424 | dirino = args->dirid; |
1425 | 1425 | ||
1426 | /* set version 1, version 2 could be used too, because stat data | 1426 | /* set version 1, version 2 could be used too, because stat data |
1427 | key is the same in both versions */ | 1427 | key is the same in both versions */ |
1428 | key.version = KEY_FORMAT_3_5; | 1428 | key.version = KEY_FORMAT_3_5; |
1429 | key.on_disk_key.k_dir_id = dirino; | 1429 | key.on_disk_key.k_dir_id = dirino; |
1430 | key.on_disk_key.k_objectid = inode->i_ino; | 1430 | key.on_disk_key.k_objectid = inode->i_ino; |
1431 | key.on_disk_key.k_offset = 0; | 1431 | key.on_disk_key.k_offset = 0; |
1432 | key.on_disk_key.k_type = 0; | 1432 | key.on_disk_key.k_type = 0; |
1433 | 1433 | ||
1434 | /* look for the object's stat data */ | 1434 | /* look for the object's stat data */ |
1435 | retval = search_item(inode->i_sb, &key, &path_to_sd); | 1435 | retval = search_item(inode->i_sb, &key, &path_to_sd); |
1436 | if (retval == IO_ERROR) { | 1436 | if (retval == IO_ERROR) { |
1437 | reiserfs_error(inode->i_sb, "vs-13070", | 1437 | reiserfs_error(inode->i_sb, "vs-13070", |
1438 | "i/o failure occurred trying to find " | 1438 | "i/o failure occurred trying to find " |
1439 | "stat data of %K", &key); | 1439 | "stat data of %K", &key); |
1440 | reiserfs_make_bad_inode(inode); | 1440 | reiserfs_make_bad_inode(inode); |
1441 | return; | 1441 | return; |
1442 | } | 1442 | } |
1443 | if (retval != ITEM_FOUND) { | 1443 | if (retval != ITEM_FOUND) { |
1444 | /* a stale NFS handle can trigger this without it being an error */ | 1444 | /* a stale NFS handle can trigger this without it being an error */ |
1445 | pathrelse(&path_to_sd); | 1445 | pathrelse(&path_to_sd); |
1446 | reiserfs_make_bad_inode(inode); | 1446 | reiserfs_make_bad_inode(inode); |
1447 | inode->i_nlink = 0; | 1447 | inode->i_nlink = 0; |
1448 | return; | 1448 | return; |
1449 | } | 1449 | } |
1450 | 1450 | ||
1451 | init_inode(inode, &path_to_sd); | 1451 | init_inode(inode, &path_to_sd); |
1452 | 1452 | ||
1453 | /* It is possible that knfsd is trying to access inode of a file | 1453 | /* It is possible that knfsd is trying to access inode of a file |
1454 | that is being removed from the disk by some other thread. As we | 1454 | that is being removed from the disk by some other thread. As we |
1455 | update sd on unlink all that is required is to check for nlink | 1455 | update sd on unlink all that is required is to check for nlink |
1456 | here. This bug was first found by Sizif when debugging | 1456 | here. This bug was first found by Sizif when debugging |
1457 | SquidNG/Butterfly, forgotten, and found again after Philippe | 1457 | SquidNG/Butterfly, forgotten, and found again after Philippe |
1458 | Gramoulle <philippe.gramoulle@mmania.com> reproduced it. | 1458 | Gramoulle <philippe.gramoulle@mmania.com> reproduced it. |
1459 | 1459 | ||
1460 | More logical fix would require changes in fs/inode.c:iput() to | 1460 | More logical fix would require changes in fs/inode.c:iput() to |
1461 | remove inode from hash-table _after_ fs cleaned disk stuff up and | 1461 | remove inode from hash-table _after_ fs cleaned disk stuff up and |
1462 | in iget() to return NULL if I_FREEING inode is found in | 1462 | in iget() to return NULL if I_FREEING inode is found in |
1463 | hash-table. */ | 1463 | hash-table. */ |
1464 | /* Currently there is one place where it's ok to meet inode with | 1464 | /* Currently there is one place where it's ok to meet inode with |
1465 | nlink==0: processing of open-unlinked and half-truncated files | 1465 | nlink==0: processing of open-unlinked and half-truncated files |
1466 | during mount (fs/reiserfs/super.c:finish_unfinished()). */ | 1466 | during mount (fs/reiserfs/super.c:finish_unfinished()). */ |
1467 | if ((inode->i_nlink == 0) && | 1467 | if ((inode->i_nlink == 0) && |
1468 | !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { | 1468 | !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { |
1469 | reiserfs_warning(inode->i_sb, "vs-13075", | 1469 | reiserfs_warning(inode->i_sb, "vs-13075", |
1470 | "dead inode read from disk %K. " | 1470 | "dead inode read from disk %K. " |
1471 | "This is likely to be race with knfsd. Ignore", | 1471 | "This is likely to be race with knfsd. Ignore", |
1472 | &key); | 1472 | &key); |
1473 | reiserfs_make_bad_inode(inode); | 1473 | reiserfs_make_bad_inode(inode); |
1474 | } | 1474 | } |
1475 | 1475 | ||
1476 | reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ | 1476 | reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ |
1477 | 1477 | ||
1478 | } | 1478 | } |
1479 | 1479 | ||
1480 | /** | 1480 | /** |
1481 | * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). | 1481 | * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). |
1482 | * | 1482 | * |
1483 | * @inode: inode from hash table to check | 1483 | * @inode: inode from hash table to check |
1484 | * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. | 1484 | * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. |
1485 | * | 1485 | * |
1486 | * This function is called by iget5_locked() to distinguish reiserfs inodes | 1486 | * This function is called by iget5_locked() to distinguish reiserfs inodes |
1487 | * having the same inode numbers. Such inodes can only exist due to some | 1487 | * having the same inode numbers. Such inodes can only exist due to some |
1488 | * error condition. One of them should be bad. Inodes with identical | 1488 | * error condition. One of them should be bad. Inodes with identical |
1489 | * inode numbers (objectids) are distinguished by parent directory ids. | 1489 | * inode numbers (objectids) are distinguished by parent directory ids. |
1490 | * | 1490 | * |
1491 | */ | 1491 | */ |
1492 | int reiserfs_find_actor(struct inode *inode, void *opaque) | 1492 | int reiserfs_find_actor(struct inode *inode, void *opaque) |
1493 | { | 1493 | { |
1494 | struct reiserfs_iget_args *args; | 1494 | struct reiserfs_iget_args *args; |
1495 | 1495 | ||
1496 | args = opaque; | 1496 | args = opaque; |
1497 | /* args is already in CPU order */ | 1497 | /* args is already in CPU order */ |
1498 | return (inode->i_ino == args->objectid) && | 1498 | return (inode->i_ino == args->objectid) && |
1499 | (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); | 1499 | (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); |
1500 | } | 1500 | } |
1501 | 1501 | ||
1502 | struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) | 1502 | struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) |
1503 | { | 1503 | { |
1504 | struct inode *inode; | 1504 | struct inode *inode; |
1505 | struct reiserfs_iget_args args; | 1505 | struct reiserfs_iget_args args; |
1506 | 1506 | ||
1507 | args.objectid = key->on_disk_key.k_objectid; | 1507 | args.objectid = key->on_disk_key.k_objectid; |
1508 | args.dirid = key->on_disk_key.k_dir_id; | 1508 | args.dirid = key->on_disk_key.k_dir_id; |
1509 | reiserfs_write_unlock(s); | 1509 | reiserfs_write_unlock(s); |
1510 | inode = iget5_locked(s, key->on_disk_key.k_objectid, | 1510 | inode = iget5_locked(s, key->on_disk_key.k_objectid, |
1511 | reiserfs_find_actor, reiserfs_init_locked_inode, | 1511 | reiserfs_find_actor, reiserfs_init_locked_inode, |
1512 | (void *)(&args)); | 1512 | (void *)(&args)); |
1513 | reiserfs_write_lock(s); | 1513 | reiserfs_write_lock(s); |
1514 | if (!inode) | 1514 | if (!inode) |
1515 | return ERR_PTR(-ENOMEM); | 1515 | return ERR_PTR(-ENOMEM); |
1516 | 1516 | ||
1517 | if (inode->i_state & I_NEW) { | 1517 | if (inode->i_state & I_NEW) { |
1518 | reiserfs_read_locked_inode(inode, &args); | 1518 | reiserfs_read_locked_inode(inode, &args); |
1519 | unlock_new_inode(inode); | 1519 | unlock_new_inode(inode); |
1520 | } | 1520 | } |
1521 | 1521 | ||
1522 | if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { | 1522 | if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { |
1523 | /* either due to i/o error or a stale NFS handle */ | 1523 | /* either due to i/o error or a stale NFS handle */ |
1524 | iput(inode); | 1524 | iput(inode); |
1525 | inode = NULL; | 1525 | inode = NULL; |
1526 | } | 1526 | } |
1527 | return inode; | 1527 | return inode; |
1528 | } | 1528 | } |
1529 | 1529 | ||
1530 | static struct dentry *reiserfs_get_dentry(struct super_block *sb, | 1530 | static struct dentry *reiserfs_get_dentry(struct super_block *sb, |
1531 | u32 objectid, u32 dir_id, u32 generation) | 1531 | u32 objectid, u32 dir_id, u32 generation) |
1532 | 1532 | ||
1533 | { | 1533 | { |
1534 | struct cpu_key key; | 1534 | struct cpu_key key; |
1535 | struct inode *inode; | 1535 | struct inode *inode; |
1536 | 1536 | ||
1537 | key.on_disk_key.k_objectid = objectid; | 1537 | key.on_disk_key.k_objectid = objectid; |
1538 | key.on_disk_key.k_dir_id = dir_id; | 1538 | key.on_disk_key.k_dir_id = dir_id; |
1539 | reiserfs_write_lock(sb); | 1539 | reiserfs_write_lock(sb); |
1540 | inode = reiserfs_iget(sb, &key); | 1540 | inode = reiserfs_iget(sb, &key); |
1541 | if (inode && !IS_ERR(inode) && generation != 0 && | 1541 | if (inode && !IS_ERR(inode) && generation != 0 && |
1542 | generation != inode->i_generation) { | 1542 | generation != inode->i_generation) { |
1543 | iput(inode); | 1543 | iput(inode); |
1544 | inode = NULL; | 1544 | inode = NULL; |
1545 | } | 1545 | } |
1546 | reiserfs_write_unlock(sb); | 1546 | reiserfs_write_unlock(sb); |
1547 | 1547 | ||
1548 | return d_obtain_alias(inode); | 1548 | return d_obtain_alias(inode); |
1549 | } | 1549 | } |
1550 | 1550 | ||
1551 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, | 1551 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
1552 | int fh_len, int fh_type) | 1552 | int fh_len, int fh_type) |
1553 | { | 1553 | { |
1554 | /* fhtype happens to reflect the number of u32s encoded. | 1554 | /* fhtype happens to reflect the number of u32s encoded. |
1555 | * due to a bug in earlier code, fhtype might indicate there | 1555 | * due to a bug in earlier code, fhtype might indicate there |
1556 | * are more u32s then actually fitted. | 1556 | * are more u32s then actually fitted. |
1557 | * so if fhtype seems to be more than len, reduce fhtype. | 1557 | * so if fhtype seems to be more than len, reduce fhtype. |
1558 | * Valid types are: | 1558 | * Valid types are: |
1559 | * 2 - objectid + dir_id - legacy support | 1559 | * 2 - objectid + dir_id - legacy support |
1560 | * 3 - objectid + dir_id + generation | 1560 | * 3 - objectid + dir_id + generation |
1561 | * 4 - objectid + dir_id + objectid and dirid of parent - legacy | 1561 | * 4 - objectid + dir_id + objectid and dirid of parent - legacy |
1562 | * 5 - objectid + dir_id + generation + objectid and dirid of parent | 1562 | * 5 - objectid + dir_id + generation + objectid and dirid of parent |
1563 | * 6 - as above plus generation of directory | 1563 | * 6 - as above plus generation of directory |
1564 | * 6 does not fit in NFSv2 handles | 1564 | * 6 does not fit in NFSv2 handles |
1565 | */ | 1565 | */ |
1566 | if (fh_type > fh_len) { | 1566 | if (fh_type > fh_len) { |
1567 | if (fh_type != 6 || fh_len != 5) | 1567 | if (fh_type != 6 || fh_len != 5) |
1568 | reiserfs_warning(sb, "reiserfs-13077", | 1568 | reiserfs_warning(sb, "reiserfs-13077", |
1569 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", | 1569 | "nfsd/reiserfs, fhtype=%d, len=%d - odd", |
1570 | fh_type, fh_len); | 1570 | fh_type, fh_len); |
1571 | fh_type = 5; | 1571 | fh_type = 5; |
1572 | } | 1572 | } |
1573 | 1573 | ||
1574 | return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], | 1574 | return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], |
1575 | (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); | 1575 | (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); |
1576 | } | 1576 | } |
1577 | 1577 | ||
1578 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, | 1578 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, |
1579 | int fh_len, int fh_type) | 1579 | int fh_len, int fh_type) |
1580 | { | 1580 | { |
1581 | if (fh_type < 4) | 1581 | if (fh_type < 4) |
1582 | return NULL; | 1582 | return NULL; |
1583 | 1583 | ||
1584 | return reiserfs_get_dentry(sb, | 1584 | return reiserfs_get_dentry(sb, |
1585 | (fh_type >= 5) ? fid->raw[3] : fid->raw[2], | 1585 | (fh_type >= 5) ? fid->raw[3] : fid->raw[2], |
1586 | (fh_type >= 5) ? fid->raw[4] : fid->raw[3], | 1586 | (fh_type >= 5) ? fid->raw[4] : fid->raw[3], |
1587 | (fh_type == 6) ? fid->raw[5] : 0); | 1587 | (fh_type == 6) ? fid->raw[5] : 0); |
1588 | } | 1588 | } |
1589 | 1589 | ||
1590 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | 1590 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, |
1591 | int need_parent) | 1591 | int need_parent) |
1592 | { | 1592 | { |
1593 | struct inode *inode = dentry->d_inode; | 1593 | struct inode *inode = dentry->d_inode; |
1594 | int maxlen = *lenp; | 1594 | int maxlen = *lenp; |
1595 | 1595 | ||
1596 | if (need_parent && (maxlen < 5)) { | 1596 | if (need_parent && (maxlen < 5)) { |
1597 | *lenp = 5; | 1597 | *lenp = 5; |
1598 | return 255; | 1598 | return 255; |
1599 | } else if (maxlen < 3) { | 1599 | } else if (maxlen < 3) { |
1600 | *lenp = 3; | 1600 | *lenp = 3; |
1601 | return 255; | 1601 | return 255; |
1602 | } | 1602 | } |
1603 | 1603 | ||
1604 | data[0] = inode->i_ino; | 1604 | data[0] = inode->i_ino; |
1605 | data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1605 | data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1606 | data[2] = inode->i_generation; | 1606 | data[2] = inode->i_generation; |
1607 | *lenp = 3; | 1607 | *lenp = 3; |
1608 | /* no room for directory info? return what we've stored so far */ | 1608 | /* no room for directory info? return what we've stored so far */ |
1609 | if (maxlen < 5 || !need_parent) | 1609 | if (maxlen < 5 || !need_parent) |
1610 | return 3; | 1610 | return 3; |
1611 | 1611 | ||
1612 | spin_lock(&dentry->d_lock); | 1612 | spin_lock(&dentry->d_lock); |
1613 | inode = dentry->d_parent->d_inode; | 1613 | inode = dentry->d_parent->d_inode; |
1614 | data[3] = inode->i_ino; | 1614 | data[3] = inode->i_ino; |
1615 | data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1615 | data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1616 | *lenp = 5; | 1616 | *lenp = 5; |
1617 | if (maxlen >= 6) { | 1617 | if (maxlen >= 6) { |
1618 | data[5] = inode->i_generation; | 1618 | data[5] = inode->i_generation; |
1619 | *lenp = 6; | 1619 | *lenp = 6; |
1620 | } | 1620 | } |
1621 | spin_unlock(&dentry->d_lock); | 1621 | spin_unlock(&dentry->d_lock); |
1622 | return *lenp; | 1622 | return *lenp; |
1623 | } | 1623 | } |
1624 | 1624 | ||
1625 | /* looks for stat data, then copies fields to it, marks the buffer | 1625 | /* looks for stat data, then copies fields to it, marks the buffer |
1626 | containing stat data as dirty */ | 1626 | containing stat data as dirty */ |
1627 | /* reiserfs inodes are never really dirty, since the dirty inode call | 1627 | /* reiserfs inodes are never really dirty, since the dirty inode call |
1628 | ** always logs them. This call allows the VFS inode marking routines | 1628 | ** always logs them. This call allows the VFS inode marking routines |
1629 | ** to properly mark inodes for datasync and such, but only actually | 1629 | ** to properly mark inodes for datasync and such, but only actually |
1630 | ** does something when called for a synchronous update. | 1630 | ** does something when called for a synchronous update. |
1631 | */ | 1631 | */ |
1632 | int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1632 | int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1633 | { | 1633 | { |
1634 | struct reiserfs_transaction_handle th; | 1634 | struct reiserfs_transaction_handle th; |
1635 | int jbegin_count = 1; | 1635 | int jbegin_count = 1; |
1636 | 1636 | ||
1637 | if (inode->i_sb->s_flags & MS_RDONLY) | 1637 | if (inode->i_sb->s_flags & MS_RDONLY) |
1638 | return -EROFS; | 1638 | return -EROFS; |
1639 | /* memory pressure can sometimes initiate write_inode calls with sync == 1, | 1639 | /* memory pressure can sometimes initiate write_inode calls with sync == 1, |
1640 | ** these cases are just when the system needs ram, not when the | 1640 | ** these cases are just when the system needs ram, not when the |
1641 | ** inode needs to reach disk for safety, and they can safely be | 1641 | ** inode needs to reach disk for safety, and they can safely be |
1642 | ** ignored because the altered inode has already been logged. | 1642 | ** ignored because the altered inode has already been logged. |
1643 | */ | 1643 | */ |
1644 | if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { | 1644 | if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { |
1645 | reiserfs_write_lock(inode->i_sb); | 1645 | reiserfs_write_lock(inode->i_sb); |
1646 | if (!journal_begin(&th, inode->i_sb, jbegin_count)) { | 1646 | if (!journal_begin(&th, inode->i_sb, jbegin_count)) { |
1647 | reiserfs_update_sd(&th, inode); | 1647 | reiserfs_update_sd(&th, inode); |
1648 | journal_end_sync(&th, inode->i_sb, jbegin_count); | 1648 | journal_end_sync(&th, inode->i_sb, jbegin_count); |
1649 | } | 1649 | } |
1650 | reiserfs_write_unlock(inode->i_sb); | 1650 | reiserfs_write_unlock(inode->i_sb); |
1651 | } | 1651 | } |
1652 | return 0; | 1652 | return 0; |
1653 | } | 1653 | } |
1654 | 1654 | ||
1655 | /* stat data of new object is inserted already, this inserts the item | 1655 | /* stat data of new object is inserted already, this inserts the item |
1656 | containing "." and ".." entries */ | 1656 | containing "." and ".." entries */ |
1657 | static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, | 1657 | static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, |
1658 | struct inode *inode, | 1658 | struct inode *inode, |
1659 | struct item_head *ih, struct treepath *path, | 1659 | struct item_head *ih, struct treepath *path, |
1660 | struct inode *dir) | 1660 | struct inode *dir) |
1661 | { | 1661 | { |
1662 | struct super_block *sb = th->t_super; | 1662 | struct super_block *sb = th->t_super; |
1663 | char empty_dir[EMPTY_DIR_SIZE]; | 1663 | char empty_dir[EMPTY_DIR_SIZE]; |
1664 | char *body = empty_dir; | 1664 | char *body = empty_dir; |
1665 | struct cpu_key key; | 1665 | struct cpu_key key; |
1666 | int retval; | 1666 | int retval; |
1667 | 1667 | ||
1668 | BUG_ON(!th->t_trans_id); | 1668 | BUG_ON(!th->t_trans_id); |
1669 | 1669 | ||
1670 | _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id), | 1670 | _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id), |
1671 | le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, | 1671 | le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, |
1672 | TYPE_DIRENTRY, 3 /*key length */ ); | 1672 | TYPE_DIRENTRY, 3 /*key length */ ); |
1673 | 1673 | ||
1674 | /* compose item head for new item. Directories consist of items of | 1674 | /* compose item head for new item. Directories consist of items of |
1675 | old type (ITEM_VERSION_1). Do not set key (second arg is 0), it | 1675 | old type (ITEM_VERSION_1). Do not set key (second arg is 0), it |
1676 | is done by reiserfs_new_inode */ | 1676 | is done by reiserfs_new_inode */ |
1677 | if (old_format_only(sb)) { | 1677 | if (old_format_only(sb)) { |
1678 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, | 1678 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, |
1679 | TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); | 1679 | TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); |
1680 | 1680 | ||
1681 | make_empty_dir_item_v1(body, ih->ih_key.k_dir_id, | 1681 | make_empty_dir_item_v1(body, ih->ih_key.k_dir_id, |
1682 | ih->ih_key.k_objectid, | 1682 | ih->ih_key.k_objectid, |
1683 | INODE_PKEY(dir)->k_dir_id, | 1683 | INODE_PKEY(dir)->k_dir_id, |
1684 | INODE_PKEY(dir)->k_objectid); | 1684 | INODE_PKEY(dir)->k_objectid); |
1685 | } else { | 1685 | } else { |
1686 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, | 1686 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, |
1687 | TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); | 1687 | TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); |
1688 | 1688 | ||
1689 | make_empty_dir_item(body, ih->ih_key.k_dir_id, | 1689 | make_empty_dir_item(body, ih->ih_key.k_dir_id, |
1690 | ih->ih_key.k_objectid, | 1690 | ih->ih_key.k_objectid, |
1691 | INODE_PKEY(dir)->k_dir_id, | 1691 | INODE_PKEY(dir)->k_dir_id, |
1692 | INODE_PKEY(dir)->k_objectid); | 1692 | INODE_PKEY(dir)->k_objectid); |
1693 | } | 1693 | } |
1694 | 1694 | ||
1695 | /* look for place in the tree for new item */ | 1695 | /* look for place in the tree for new item */ |
1696 | retval = search_item(sb, &key, path); | 1696 | retval = search_item(sb, &key, path); |
1697 | if (retval == IO_ERROR) { | 1697 | if (retval == IO_ERROR) { |
1698 | reiserfs_error(sb, "vs-13080", | 1698 | reiserfs_error(sb, "vs-13080", |
1699 | "i/o failure occurred creating new directory"); | 1699 | "i/o failure occurred creating new directory"); |
1700 | return -EIO; | 1700 | return -EIO; |
1701 | } | 1701 | } |
1702 | if (retval == ITEM_FOUND) { | 1702 | if (retval == ITEM_FOUND) { |
1703 | pathrelse(path); | 1703 | pathrelse(path); |
1704 | reiserfs_warning(sb, "vs-13070", | 1704 | reiserfs_warning(sb, "vs-13070", |
1705 | "object with this key exists (%k)", | 1705 | "object with this key exists (%k)", |
1706 | &(ih->ih_key)); | 1706 | &(ih->ih_key)); |
1707 | return -EEXIST; | 1707 | return -EEXIST; |
1708 | } | 1708 | } |
1709 | 1709 | ||
1710 | /* insert item, that is empty directory item */ | 1710 | /* insert item, that is empty directory item */ |
1711 | return reiserfs_insert_item(th, path, &key, ih, inode, body); | 1711 | return reiserfs_insert_item(th, path, &key, ih, inode, body); |
1712 | } | 1712 | } |
1713 | 1713 | ||
1714 | /* stat data of object has been inserted, this inserts the item | 1714 | /* stat data of object has been inserted, this inserts the item |
1715 | containing the body of symlink */ | 1715 | containing the body of symlink */ |
1716 | static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ | 1716 | static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ |
1717 | struct item_head *ih, | 1717 | struct item_head *ih, |
1718 | struct treepath *path, const char *symname, | 1718 | struct treepath *path, const char *symname, |
1719 | int item_len) | 1719 | int item_len) |
1720 | { | 1720 | { |
1721 | struct super_block *sb = th->t_super; | 1721 | struct super_block *sb = th->t_super; |
1722 | struct cpu_key key; | 1722 | struct cpu_key key; |
1723 | int retval; | 1723 | int retval; |
1724 | 1724 | ||
1725 | BUG_ON(!th->t_trans_id); | 1725 | BUG_ON(!th->t_trans_id); |
1726 | 1726 | ||
1727 | _make_cpu_key(&key, KEY_FORMAT_3_5, | 1727 | _make_cpu_key(&key, KEY_FORMAT_3_5, |
1728 | le32_to_cpu(ih->ih_key.k_dir_id), | 1728 | le32_to_cpu(ih->ih_key.k_dir_id), |
1729 | le32_to_cpu(ih->ih_key.k_objectid), | 1729 | le32_to_cpu(ih->ih_key.k_objectid), |
1730 | 1, TYPE_DIRECT, 3 /*key length */ ); | 1730 | 1, TYPE_DIRECT, 3 /*key length */ ); |
1731 | 1731 | ||
1732 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, | 1732 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, |
1733 | 0 /*free_space */ ); | 1733 | 0 /*free_space */ ); |
1734 | 1734 | ||
1735 | /* look for place in the tree for new item */ | 1735 | /* look for place in the tree for new item */ |
1736 | retval = search_item(sb, &key, path); | 1736 | retval = search_item(sb, &key, path); |
1737 | if (retval == IO_ERROR) { | 1737 | if (retval == IO_ERROR) { |
1738 | reiserfs_error(sb, "vs-13080", | 1738 | reiserfs_error(sb, "vs-13080", |
1739 | "i/o failure occurred creating new symlink"); | 1739 | "i/o failure occurred creating new symlink"); |
1740 | return -EIO; | 1740 | return -EIO; |
1741 | } | 1741 | } |
1742 | if (retval == ITEM_FOUND) { | 1742 | if (retval == ITEM_FOUND) { |
1743 | pathrelse(path); | 1743 | pathrelse(path); |
1744 | reiserfs_warning(sb, "vs-13080", | 1744 | reiserfs_warning(sb, "vs-13080", |
1745 | "object with this key exists (%k)", | 1745 | "object with this key exists (%k)", |
1746 | &(ih->ih_key)); | 1746 | &(ih->ih_key)); |
1747 | return -EEXIST; | 1747 | return -EEXIST; |
1748 | } | 1748 | } |
1749 | 1749 | ||
1750 | /* insert item, that is body of symlink */ | 1750 | /* insert item, that is body of symlink */ |
1751 | return reiserfs_insert_item(th, path, &key, ih, inode, symname); | 1751 | return reiserfs_insert_item(th, path, &key, ih, inode, symname); |
1752 | } | 1752 | } |
1753 | 1753 | ||
1754 | /* inserts the stat data into the tree, and then calls | 1754 | /* inserts the stat data into the tree, and then calls |
1755 | reiserfs_new_directory (to insert ".", ".." item if new object is | 1755 | reiserfs_new_directory (to insert ".", ".." item if new object is |
1756 | directory) or reiserfs_new_symlink (to insert symlink body if new | 1756 | directory) or reiserfs_new_symlink (to insert symlink body if new |
1757 | object is symlink) or nothing (if new object is regular file) | 1757 | object is symlink) or nothing (if new object is regular file) |
1758 | 1758 | ||
1759 | NOTE! uid and gid must already be set in the inode. If we return | 1759 | NOTE! uid and gid must already be set in the inode. If we return |
1760 | non-zero due to an error, we have to drop the quota previously allocated | 1760 | non-zero due to an error, we have to drop the quota previously allocated |
1761 | for the fresh inode. This can only be done outside a transaction, so | 1761 | for the fresh inode. This can only be done outside a transaction, so |
1762 | if we return non-zero, we also end the transaction. */ | 1762 | if we return non-zero, we also end the transaction. */ |
1763 | int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | 1763 | int reiserfs_new_inode(struct reiserfs_transaction_handle *th, |
1764 | struct inode *dir, int mode, const char *symname, | 1764 | struct inode *dir, int mode, const char *symname, |
1765 | /* 0 for regular, EMTRY_DIR_SIZE for dirs, | 1765 | /* 0 for regular, EMTRY_DIR_SIZE for dirs, |
1766 | strlen (symname) for symlinks) */ | 1766 | strlen (symname) for symlinks) */ |
1767 | loff_t i_size, struct dentry *dentry, | 1767 | loff_t i_size, struct dentry *dentry, |
1768 | struct inode *inode, | 1768 | struct inode *inode, |
1769 | struct reiserfs_security_handle *security) | 1769 | struct reiserfs_security_handle *security) |
1770 | { | 1770 | { |
1771 | struct super_block *sb; | 1771 | struct super_block *sb; |
1772 | struct reiserfs_iget_args args; | 1772 | struct reiserfs_iget_args args; |
1773 | INITIALIZE_PATH(path_to_key); | 1773 | INITIALIZE_PATH(path_to_key); |
1774 | struct cpu_key key; | 1774 | struct cpu_key key; |
1775 | struct item_head ih; | 1775 | struct item_head ih; |
1776 | struct stat_data sd; | 1776 | struct stat_data sd; |
1777 | int retval; | 1777 | int retval; |
1778 | int err; | 1778 | int err; |
1779 | 1779 | ||
1780 | BUG_ON(!th->t_trans_id); | 1780 | BUG_ON(!th->t_trans_id); |
1781 | 1781 | ||
1782 | dquot_initialize(inode); | 1782 | dquot_initialize(inode); |
1783 | err = dquot_alloc_inode(inode); | 1783 | err = dquot_alloc_inode(inode); |
1784 | if (err) | 1784 | if (err) |
1785 | goto out_end_trans; | 1785 | goto out_end_trans; |
1786 | if (!dir->i_nlink) { | 1786 | if (!dir->i_nlink) { |
1787 | err = -EPERM; | 1787 | err = -EPERM; |
1788 | goto out_bad_inode; | 1788 | goto out_bad_inode; |
1789 | } | 1789 | } |
1790 | 1790 | ||
1791 | sb = dir->i_sb; | 1791 | sb = dir->i_sb; |
1792 | 1792 | ||
1793 | /* item head of new item */ | 1793 | /* item head of new item */ |
1794 | ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); | 1794 | ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); |
1795 | ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); | 1795 | ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); |
1796 | if (!ih.ih_key.k_objectid) { | 1796 | if (!ih.ih_key.k_objectid) { |
1797 | err = -ENOMEM; | 1797 | err = -ENOMEM; |
1798 | goto out_bad_inode; | 1798 | goto out_bad_inode; |
1799 | } | 1799 | } |
1800 | args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); | 1800 | args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); |
1801 | if (old_format_only(sb)) | 1801 | if (old_format_only(sb)) |
1802 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, | 1802 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, |
1803 | TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); | 1803 | TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); |
1804 | else | 1804 | else |
1805 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, | 1805 | make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, |
1806 | TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); | 1806 | TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); |
1807 | memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); | 1807 | memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); |
1808 | args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); | 1808 | args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); |
1809 | if (insert_inode_locked4(inode, args.objectid, | 1809 | if (insert_inode_locked4(inode, args.objectid, |
1810 | reiserfs_find_actor, &args) < 0) { | 1810 | reiserfs_find_actor, &args) < 0) { |
1811 | err = -EINVAL; | 1811 | err = -EINVAL; |
1812 | goto out_bad_inode; | 1812 | goto out_bad_inode; |
1813 | } | 1813 | } |
1814 | if (old_format_only(sb)) | 1814 | if (old_format_only(sb)) |
1815 | /* not a perfect generation count, as object ids can be reused, but | 1815 | /* not a perfect generation count, as object ids can be reused, but |
1816 | ** this is as good as reiserfs can do right now. | 1816 | ** this is as good as reiserfs can do right now. |
1817 | ** note that the private part of inode isn't filled in yet, we have | 1817 | ** note that the private part of inode isn't filled in yet, we have |
1818 | ** to use the directory. | 1818 | ** to use the directory. |
1819 | */ | 1819 | */ |
1820 | inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); | 1820 | inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); |
1821 | else | 1821 | else |
1822 | #if defined( USE_INODE_GENERATION_COUNTER ) | 1822 | #if defined( USE_INODE_GENERATION_COUNTER ) |
1823 | inode->i_generation = | 1823 | inode->i_generation = |
1824 | le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); | 1824 | le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); |
1825 | #else | 1825 | #else |
1826 | inode->i_generation = ++event; | 1826 | inode->i_generation = ++event; |
1827 | #endif | 1827 | #endif |
1828 | 1828 | ||
1829 | /* fill stat data */ | 1829 | /* fill stat data */ |
1830 | inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); | 1830 | inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); |
1831 | 1831 | ||
1832 | /* uid and gid must already be set by the caller for quota init */ | 1832 | /* uid and gid must already be set by the caller for quota init */ |
1833 | 1833 | ||
1834 | /* symlink cannot be immutable or append only, right? */ | 1834 | /* symlink cannot be immutable or append only, right? */ |
1835 | if (S_ISLNK(inode->i_mode)) | 1835 | if (S_ISLNK(inode->i_mode)) |
1836 | inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); | 1836 | inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); |
1837 | 1837 | ||
1838 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 1838 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
1839 | inode->i_size = i_size; | 1839 | inode->i_size = i_size; |
1840 | inode->i_blocks = 0; | 1840 | inode->i_blocks = 0; |
1841 | inode->i_bytes = 0; | 1841 | inode->i_bytes = 0; |
1842 | REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : | 1842 | REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : |
1843 | U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; | 1843 | U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; |
1844 | 1844 | ||
1845 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); | 1845 | INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); |
1846 | REISERFS_I(inode)->i_flags = 0; | 1846 | REISERFS_I(inode)->i_flags = 0; |
1847 | REISERFS_I(inode)->i_prealloc_block = 0; | 1847 | REISERFS_I(inode)->i_prealloc_block = 0; |
1848 | REISERFS_I(inode)->i_prealloc_count = 0; | 1848 | REISERFS_I(inode)->i_prealloc_count = 0; |
1849 | REISERFS_I(inode)->i_trans_id = 0; | 1849 | REISERFS_I(inode)->i_trans_id = 0; |
1850 | REISERFS_I(inode)->i_jl = NULL; | 1850 | REISERFS_I(inode)->i_jl = NULL; |
1851 | REISERFS_I(inode)->i_attrs = | 1851 | REISERFS_I(inode)->i_attrs = |
1852 | REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; | 1852 | REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; |
1853 | sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); | 1853 | sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); |
1854 | reiserfs_init_xattr_rwsem(inode); | 1854 | reiserfs_init_xattr_rwsem(inode); |
1855 | 1855 | ||
1856 | /* key to search for correct place for new stat data */ | 1856 | /* key to search for correct place for new stat data */ |
1857 | _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id), | 1857 | _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id), |
1858 | le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET, | 1858 | le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET, |
1859 | TYPE_STAT_DATA, 3 /*key length */ ); | 1859 | TYPE_STAT_DATA, 3 /*key length */ ); |
1860 | 1860 | ||
1861 | /* find proper place for inserting of stat data */ | 1861 | /* find proper place for inserting of stat data */ |
1862 | retval = search_item(sb, &key, &path_to_key); | 1862 | retval = search_item(sb, &key, &path_to_key); |
1863 | if (retval == IO_ERROR) { | 1863 | if (retval == IO_ERROR) { |
1864 | err = -EIO; | 1864 | err = -EIO; |
1865 | goto out_bad_inode; | 1865 | goto out_bad_inode; |
1866 | } | 1866 | } |
1867 | if (retval == ITEM_FOUND) { | 1867 | if (retval == ITEM_FOUND) { |
1868 | pathrelse(&path_to_key); | 1868 | pathrelse(&path_to_key); |
1869 | err = -EEXIST; | 1869 | err = -EEXIST; |
1870 | goto out_bad_inode; | 1870 | goto out_bad_inode; |
1871 | } | 1871 | } |
1872 | if (old_format_only(sb)) { | 1872 | if (old_format_only(sb)) { |
1873 | if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { | 1873 | if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { |
1874 | pathrelse(&path_to_key); | 1874 | pathrelse(&path_to_key); |
1875 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ | 1875 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ |
1876 | err = -EINVAL; | 1876 | err = -EINVAL; |
1877 | goto out_bad_inode; | 1877 | goto out_bad_inode; |
1878 | } | 1878 | } |
1879 | inode2sd_v1(&sd, inode, inode->i_size); | 1879 | inode2sd_v1(&sd, inode, inode->i_size); |
1880 | } else { | 1880 | } else { |
1881 | inode2sd(&sd, inode, inode->i_size); | 1881 | inode2sd(&sd, inode, inode->i_size); |
1882 | } | 1882 | } |
1883 | // store in in-core inode the key of stat data and version all | 1883 | // store in in-core inode the key of stat data and version all |
1884 | // object items will have (directory items will have old offset | 1884 | // object items will have (directory items will have old offset |
1885 | // format, other new objects will consist of new items) | 1885 | // format, other new objects will consist of new items) |
1886 | if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) | 1886 | if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) |
1887 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); | 1887 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); |
1888 | else | 1888 | else |
1889 | set_inode_item_key_version(inode, KEY_FORMAT_3_6); | 1889 | set_inode_item_key_version(inode, KEY_FORMAT_3_6); |
1890 | if (old_format_only(sb)) | 1890 | if (old_format_only(sb)) |
1891 | set_inode_sd_version(inode, STAT_DATA_V1); | 1891 | set_inode_sd_version(inode, STAT_DATA_V1); |
1892 | else | 1892 | else |
1893 | set_inode_sd_version(inode, STAT_DATA_V2); | 1893 | set_inode_sd_version(inode, STAT_DATA_V2); |
1894 | 1894 | ||
1895 | /* insert the stat data into the tree */ | 1895 | /* insert the stat data into the tree */ |
1896 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 1896 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
1897 | if (REISERFS_I(dir)->new_packing_locality) | 1897 | if (REISERFS_I(dir)->new_packing_locality) |
1898 | th->displace_new_blocks = 1; | 1898 | th->displace_new_blocks = 1; |
1899 | #endif | 1899 | #endif |
1900 | retval = | 1900 | retval = |
1901 | reiserfs_insert_item(th, &path_to_key, &key, &ih, inode, | 1901 | reiserfs_insert_item(th, &path_to_key, &key, &ih, inode, |
1902 | (char *)(&sd)); | 1902 | (char *)(&sd)); |
1903 | if (retval) { | 1903 | if (retval) { |
1904 | err = retval; | 1904 | err = retval; |
1905 | reiserfs_check_path(&path_to_key); | 1905 | reiserfs_check_path(&path_to_key); |
1906 | goto out_bad_inode; | 1906 | goto out_bad_inode; |
1907 | } | 1907 | } |
1908 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 1908 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
1909 | if (!th->displace_new_blocks) | 1909 | if (!th->displace_new_blocks) |
1910 | REISERFS_I(dir)->new_packing_locality = 0; | 1910 | REISERFS_I(dir)->new_packing_locality = 0; |
1911 | #endif | 1911 | #endif |
1912 | if (S_ISDIR(mode)) { | 1912 | if (S_ISDIR(mode)) { |
1913 | /* insert item with "." and ".." */ | 1913 | /* insert item with "." and ".." */ |
1914 | retval = | 1914 | retval = |
1915 | reiserfs_new_directory(th, inode, &ih, &path_to_key, dir); | 1915 | reiserfs_new_directory(th, inode, &ih, &path_to_key, dir); |
1916 | } | 1916 | } |
1917 | 1917 | ||
1918 | if (S_ISLNK(mode)) { | 1918 | if (S_ISLNK(mode)) { |
1919 | /* insert body of symlink */ | 1919 | /* insert body of symlink */ |
1920 | if (!old_format_only(sb)) | 1920 | if (!old_format_only(sb)) |
1921 | i_size = ROUND_UP(i_size); | 1921 | i_size = ROUND_UP(i_size); |
1922 | retval = | 1922 | retval = |
1923 | reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname, | 1923 | reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname, |
1924 | i_size); | 1924 | i_size); |
1925 | } | 1925 | } |
1926 | if (retval) { | 1926 | if (retval) { |
1927 | err = retval; | 1927 | err = retval; |
1928 | reiserfs_check_path(&path_to_key); | 1928 | reiserfs_check_path(&path_to_key); |
1929 | journal_end(th, th->t_super, th->t_blocks_allocated); | 1929 | journal_end(th, th->t_super, th->t_blocks_allocated); |
1930 | goto out_inserted_sd; | 1930 | goto out_inserted_sd; |
1931 | } | 1931 | } |
1932 | 1932 | ||
1933 | if (reiserfs_posixacl(inode->i_sb)) { | 1933 | if (reiserfs_posixacl(inode->i_sb)) { |
1934 | retval = reiserfs_inherit_default_acl(th, dir, dentry, inode); | 1934 | retval = reiserfs_inherit_default_acl(th, dir, dentry, inode); |
1935 | if (retval) { | 1935 | if (retval) { |
1936 | err = retval; | 1936 | err = retval; |
1937 | reiserfs_check_path(&path_to_key); | 1937 | reiserfs_check_path(&path_to_key); |
1938 | journal_end(th, th->t_super, th->t_blocks_allocated); | 1938 | journal_end(th, th->t_super, th->t_blocks_allocated); |
1939 | goto out_inserted_sd; | 1939 | goto out_inserted_sd; |
1940 | } | 1940 | } |
1941 | } else if (inode->i_sb->s_flags & MS_POSIXACL) { | 1941 | } else if (inode->i_sb->s_flags & MS_POSIXACL) { |
1942 | reiserfs_warning(inode->i_sb, "jdm-13090", | 1942 | reiserfs_warning(inode->i_sb, "jdm-13090", |
1943 | "ACLs aren't enabled in the fs, " | 1943 | "ACLs aren't enabled in the fs, " |
1944 | "but vfs thinks they are!"); | 1944 | "but vfs thinks they are!"); |
1945 | } else if (IS_PRIVATE(dir)) | 1945 | } else if (IS_PRIVATE(dir)) |
1946 | inode->i_flags |= S_PRIVATE; | 1946 | inode->i_flags |= S_PRIVATE; |
1947 | 1947 | ||
1948 | if (security->name) { | 1948 | if (security->name) { |
1949 | retval = reiserfs_security_write(th, inode, security); | 1949 | retval = reiserfs_security_write(th, inode, security); |
1950 | if (retval) { | 1950 | if (retval) { |
1951 | err = retval; | 1951 | err = retval; |
1952 | reiserfs_check_path(&path_to_key); | 1952 | reiserfs_check_path(&path_to_key); |
1953 | retval = journal_end(th, th->t_super, | 1953 | retval = journal_end(th, th->t_super, |
1954 | th->t_blocks_allocated); | 1954 | th->t_blocks_allocated); |
1955 | if (retval) | 1955 | if (retval) |
1956 | err = retval; | 1956 | err = retval; |
1957 | goto out_inserted_sd; | 1957 | goto out_inserted_sd; |
1958 | } | 1958 | } |
1959 | } | 1959 | } |
1960 | 1960 | ||
1961 | reiserfs_update_sd(th, inode); | 1961 | reiserfs_update_sd(th, inode); |
1962 | reiserfs_check_path(&path_to_key); | 1962 | reiserfs_check_path(&path_to_key); |
1963 | 1963 | ||
1964 | return 0; | 1964 | return 0; |
1965 | 1965 | ||
1966 | /* it looks like you can easily compress these two goto targets into | 1966 | /* it looks like you can easily compress these two goto targets into |
1967 | * one. Keeping it like this doesn't actually hurt anything, and they | 1967 | * one. Keeping it like this doesn't actually hurt anything, and they |
1968 | * are place holders for what the quota code actually needs. | 1968 | * are place holders for what the quota code actually needs. |
1969 | */ | 1969 | */ |
1970 | out_bad_inode: | 1970 | out_bad_inode: |
1971 | /* Invalidate the object, nothing was inserted yet */ | 1971 | /* Invalidate the object, nothing was inserted yet */ |
1972 | INODE_PKEY(inode)->k_objectid = 0; | 1972 | INODE_PKEY(inode)->k_objectid = 0; |
1973 | 1973 | ||
1974 | /* Quota change must be inside a transaction for journaling */ | 1974 | /* Quota change must be inside a transaction for journaling */ |
1975 | dquot_free_inode(inode); | 1975 | dquot_free_inode(inode); |
1976 | 1976 | ||
1977 | out_end_trans: | 1977 | out_end_trans: |
1978 | journal_end(th, th->t_super, th->t_blocks_allocated); | 1978 | journal_end(th, th->t_super, th->t_blocks_allocated); |
1979 | /* Drop can be outside and it needs more credits so it's better to have it outside */ | 1979 | /* Drop can be outside and it needs more credits so it's better to have it outside */ |
1980 | dquot_drop(inode); | 1980 | dquot_drop(inode); |
1981 | inode->i_flags |= S_NOQUOTA; | 1981 | inode->i_flags |= S_NOQUOTA; |
1982 | make_bad_inode(inode); | 1982 | make_bad_inode(inode); |
1983 | 1983 | ||
1984 | out_inserted_sd: | 1984 | out_inserted_sd: |
1985 | inode->i_nlink = 0; | 1985 | inode->i_nlink = 0; |
1986 | th->t_trans_id = 0; /* so the caller can't use this handle later */ | 1986 | th->t_trans_id = 0; /* so the caller can't use this handle later */ |
1987 | unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ | 1987 | unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ |
1988 | iput(inode); | 1988 | iput(inode); |
1989 | return err; | 1989 | return err; |
1990 | } | 1990 | } |
1991 | 1991 | ||
1992 | /* | 1992 | /* |
1993 | ** finds the tail page in the page cache, | 1993 | ** finds the tail page in the page cache, |
1994 | ** reads the last block in. | 1994 | ** reads the last block in. |
1995 | ** | 1995 | ** |
1996 | ** On success, page_result is set to a locked, pinned page, and bh_result | 1996 | ** On success, page_result is set to a locked, pinned page, and bh_result |
1997 | ** is set to an up to date buffer for the last block in the file. returns 0. | 1997 | ** is set to an up to date buffer for the last block in the file. returns 0. |
1998 | ** | 1998 | ** |
1999 | ** tail conversion is not done, so bh_result might not be valid for writing | 1999 | ** tail conversion is not done, so bh_result might not be valid for writing |
2000 | ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before | 2000 | ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before |
2001 | ** trying to write the block. | 2001 | ** trying to write the block. |
2002 | ** | 2002 | ** |
2003 | ** on failure, nonzero is returned, page_result and bh_result are untouched. | 2003 | ** on failure, nonzero is returned, page_result and bh_result are untouched. |
2004 | */ | 2004 | */ |
2005 | static int grab_tail_page(struct inode *inode, | 2005 | static int grab_tail_page(struct inode *inode, |
2006 | struct page **page_result, | 2006 | struct page **page_result, |
2007 | struct buffer_head **bh_result) | 2007 | struct buffer_head **bh_result) |
2008 | { | 2008 | { |
2009 | 2009 | ||
2010 | /* we want the page with the last byte in the file, | 2010 | /* we want the page with the last byte in the file, |
2011 | ** not the page that will hold the next byte for appending | 2011 | ** not the page that will hold the next byte for appending |
2012 | */ | 2012 | */ |
2013 | unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; | 2013 | unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; |
2014 | unsigned long pos = 0; | 2014 | unsigned long pos = 0; |
2015 | unsigned long start = 0; | 2015 | unsigned long start = 0; |
2016 | unsigned long blocksize = inode->i_sb->s_blocksize; | 2016 | unsigned long blocksize = inode->i_sb->s_blocksize; |
2017 | unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1); | 2017 | unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1); |
2018 | struct buffer_head *bh; | 2018 | struct buffer_head *bh; |
2019 | struct buffer_head *head; | 2019 | struct buffer_head *head; |
2020 | struct page *page; | 2020 | struct page *page; |
2021 | int error; | 2021 | int error; |
2022 | 2022 | ||
2023 | /* we know that we are only called with inode->i_size > 0. | 2023 | /* we know that we are only called with inode->i_size > 0. |
2024 | ** we also know that a file tail can never be as big as a block | 2024 | ** we also know that a file tail can never be as big as a block |
2025 | ** If i_size % blocksize == 0, our file is currently block aligned | 2025 | ** If i_size % blocksize == 0, our file is currently block aligned |
2026 | ** and it won't need converting or zeroing after a truncate. | 2026 | ** and it won't need converting or zeroing after a truncate. |
2027 | */ | 2027 | */ |
2028 | if ((offset & (blocksize - 1)) == 0) { | 2028 | if ((offset & (blocksize - 1)) == 0) { |
2029 | return -ENOENT; | 2029 | return -ENOENT; |
2030 | } | 2030 | } |
2031 | page = grab_cache_page(inode->i_mapping, index); | 2031 | page = grab_cache_page(inode->i_mapping, index); |
2032 | error = -ENOMEM; | 2032 | error = -ENOMEM; |
2033 | if (!page) { | 2033 | if (!page) { |
2034 | goto out; | 2034 | goto out; |
2035 | } | 2035 | } |
2036 | /* start within the page of the last block in the file */ | 2036 | /* start within the page of the last block in the file */ |
2037 | start = (offset / blocksize) * blocksize; | 2037 | start = (offset / blocksize) * blocksize; |
2038 | 2038 | ||
2039 | error = __block_write_begin(page, start, offset - start, | 2039 | error = __block_write_begin(page, start, offset - start, |
2040 | reiserfs_get_block_create_0); | 2040 | reiserfs_get_block_create_0); |
2041 | if (error) | 2041 | if (error) |
2042 | goto unlock; | 2042 | goto unlock; |
2043 | 2043 | ||
2044 | head = page_buffers(page); | 2044 | head = page_buffers(page); |
2045 | bh = head; | 2045 | bh = head; |
2046 | do { | 2046 | do { |
2047 | if (pos >= start) { | 2047 | if (pos >= start) { |
2048 | break; | 2048 | break; |
2049 | } | 2049 | } |
2050 | bh = bh->b_this_page; | 2050 | bh = bh->b_this_page; |
2051 | pos += blocksize; | 2051 | pos += blocksize; |
2052 | } while (bh != head); | 2052 | } while (bh != head); |
2053 | 2053 | ||
2054 | if (!buffer_uptodate(bh)) { | 2054 | if (!buffer_uptodate(bh)) { |
2055 | /* note, this should never happen, prepare_write should | 2055 | /* note, this should never happen, prepare_write should |
2056 | ** be taking care of this for us. If the buffer isn't up to date, | 2056 | ** be taking care of this for us. If the buffer isn't up to date, |
2057 | ** I've screwed up the code to find the buffer, or the code to | 2057 | ** I've screwed up the code to find the buffer, or the code to |
2058 | ** call prepare_write | 2058 | ** call prepare_write |
2059 | */ | 2059 | */ |
2060 | reiserfs_error(inode->i_sb, "clm-6000", | 2060 | reiserfs_error(inode->i_sb, "clm-6000", |
2061 | "error reading block %lu", bh->b_blocknr); | 2061 | "error reading block %lu", bh->b_blocknr); |
2062 | error = -EIO; | 2062 | error = -EIO; |
2063 | goto unlock; | 2063 | goto unlock; |
2064 | } | 2064 | } |
2065 | *bh_result = bh; | 2065 | *bh_result = bh; |
2066 | *page_result = page; | 2066 | *page_result = page; |
2067 | 2067 | ||
2068 | out: | 2068 | out: |
2069 | return error; | 2069 | return error; |
2070 | 2070 | ||
2071 | unlock: | 2071 | unlock: |
2072 | unlock_page(page); | 2072 | unlock_page(page); |
2073 | page_cache_release(page); | 2073 | page_cache_release(page); |
2074 | return error; | 2074 | return error; |
2075 | } | 2075 | } |
2076 | 2076 | ||
2077 | /* | 2077 | /* |
2078 | ** vfs version of truncate file. Must NOT be called with | 2078 | ** vfs version of truncate file. Must NOT be called with |
2079 | ** a transaction already started. | 2079 | ** a transaction already started. |
2080 | ** | 2080 | ** |
2081 | ** some code taken from block_truncate_page | 2081 | ** some code taken from block_truncate_page |
2082 | */ | 2082 | */ |
2083 | int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | 2083 | int reiserfs_truncate_file(struct inode *inode, int update_timestamps) |
2084 | { | 2084 | { |
2085 | struct reiserfs_transaction_handle th; | 2085 | struct reiserfs_transaction_handle th; |
2086 | /* we want the offset for the first byte after the end of the file */ | 2086 | /* we want the offset for the first byte after the end of the file */ |
2087 | unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1); | 2087 | unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1); |
2088 | unsigned blocksize = inode->i_sb->s_blocksize; | 2088 | unsigned blocksize = inode->i_sb->s_blocksize; |
2089 | unsigned length; | 2089 | unsigned length; |
2090 | struct page *page = NULL; | 2090 | struct page *page = NULL; |
2091 | int error; | 2091 | int error; |
2092 | struct buffer_head *bh = NULL; | 2092 | struct buffer_head *bh = NULL; |
2093 | int err2; | 2093 | int err2; |
2094 | int lock_depth; | 2094 | int lock_depth; |
2095 | 2095 | ||
2096 | lock_depth = reiserfs_write_lock_once(inode->i_sb); | 2096 | lock_depth = reiserfs_write_lock_once(inode->i_sb); |
2097 | 2097 | ||
2098 | if (inode->i_size > 0) { | 2098 | if (inode->i_size > 0) { |
2099 | error = grab_tail_page(inode, &page, &bh); | 2099 | error = grab_tail_page(inode, &page, &bh); |
2100 | if (error) { | 2100 | if (error) { |
2101 | // -ENOENT means we truncated past the end of the file, | 2101 | // -ENOENT means we truncated past the end of the file, |
2102 | // and get_block_create_0 could not find a block to read in, | 2102 | // and get_block_create_0 could not find a block to read in, |
2103 | // which is ok. | 2103 | // which is ok. |
2104 | if (error != -ENOENT) | 2104 | if (error != -ENOENT) |
2105 | reiserfs_error(inode->i_sb, "clm-6001", | 2105 | reiserfs_error(inode->i_sb, "clm-6001", |
2106 | "grab_tail_page failed %d", | 2106 | "grab_tail_page failed %d", |
2107 | error); | 2107 | error); |
2108 | page = NULL; | 2108 | page = NULL; |
2109 | bh = NULL; | 2109 | bh = NULL; |
2110 | } | 2110 | } |
2111 | } | 2111 | } |
2112 | 2112 | ||
2113 | /* so, if page != NULL, we have a buffer head for the offset at | 2113 | /* so, if page != NULL, we have a buffer head for the offset at |
2114 | ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, | 2114 | ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, |
2115 | ** then we have an unformatted node. Otherwise, we have a direct item, | 2115 | ** then we have an unformatted node. Otherwise, we have a direct item, |
2116 | ** and no zeroing is required on disk. We zero after the truncate, | 2116 | ** and no zeroing is required on disk. We zero after the truncate, |
2117 | ** because the truncate might pack the item anyway | 2117 | ** because the truncate might pack the item anyway |
2118 | ** (it will unmap bh if it packs). | 2118 | ** (it will unmap bh if it packs). |
2119 | */ | 2119 | */ |
2120 | /* it is enough to reserve space in transaction for 2 balancings: | 2120 | /* it is enough to reserve space in transaction for 2 balancings: |
2121 | one for "save" link adding and another for the first | 2121 | one for "save" link adding and another for the first |
2122 | cut_from_item. 1 is for update_sd */ | 2122 | cut_from_item. 1 is for update_sd */ |
2123 | error = journal_begin(&th, inode->i_sb, | 2123 | error = journal_begin(&th, inode->i_sb, |
2124 | JOURNAL_PER_BALANCE_CNT * 2 + 1); | 2124 | JOURNAL_PER_BALANCE_CNT * 2 + 1); |
2125 | if (error) | 2125 | if (error) |
2126 | goto out; | 2126 | goto out; |
2127 | reiserfs_update_inode_transaction(inode); | 2127 | reiserfs_update_inode_transaction(inode); |
2128 | if (update_timestamps) | 2128 | if (update_timestamps) |
2129 | /* we are doing real truncate: if the system crashes before the last | 2129 | /* we are doing real truncate: if the system crashes before the last |
2130 | transaction of truncating gets committed - on reboot the file | 2130 | transaction of truncating gets committed - on reboot the file |
2131 | either appears truncated properly or not truncated at all */ | 2131 | either appears truncated properly or not truncated at all */ |
2132 | add_save_link(&th, inode, 1); | 2132 | add_save_link(&th, inode, 1); |
2133 | err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); | 2133 | err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); |
2134 | error = | 2134 | error = |
2135 | journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); | 2135 | journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); |
2136 | if (error) | 2136 | if (error) |
2137 | goto out; | 2137 | goto out; |
2138 | 2138 | ||
2139 | /* check reiserfs_do_truncate after ending the transaction */ | 2139 | /* check reiserfs_do_truncate after ending the transaction */ |
2140 | if (err2) { | 2140 | if (err2) { |
2141 | error = err2; | 2141 | error = err2; |
2142 | goto out; | 2142 | goto out; |
2143 | } | 2143 | } |
2144 | 2144 | ||
2145 | if (update_timestamps) { | 2145 | if (update_timestamps) { |
2146 | error = remove_save_link(inode, 1 /* truncate */); | 2146 | error = remove_save_link(inode, 1 /* truncate */); |
2147 | if (error) | 2147 | if (error) |
2148 | goto out; | 2148 | goto out; |
2149 | } | 2149 | } |
2150 | 2150 | ||
2151 | if (page) { | 2151 | if (page) { |
2152 | length = offset & (blocksize - 1); | 2152 | length = offset & (blocksize - 1); |
2153 | /* if we are not on a block boundary */ | 2153 | /* if we are not on a block boundary */ |
2154 | if (length) { | 2154 | if (length) { |
2155 | length = blocksize - length; | 2155 | length = blocksize - length; |
2156 | zero_user(page, offset, length); | 2156 | zero_user(page, offset, length); |
2157 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { | 2157 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { |
2158 | mark_buffer_dirty(bh); | 2158 | mark_buffer_dirty(bh); |
2159 | } | 2159 | } |
2160 | } | 2160 | } |
2161 | unlock_page(page); | 2161 | unlock_page(page); |
2162 | page_cache_release(page); | 2162 | page_cache_release(page); |
2163 | } | 2163 | } |
2164 | 2164 | ||
2165 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 2165 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
2166 | 2166 | ||
2167 | return 0; | 2167 | return 0; |
2168 | out: | 2168 | out: |
2169 | if (page) { | 2169 | if (page) { |
2170 | unlock_page(page); | 2170 | unlock_page(page); |
2171 | page_cache_release(page); | 2171 | page_cache_release(page); |
2172 | } | 2172 | } |
2173 | 2173 | ||
2174 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 2174 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
2175 | 2175 | ||
2176 | return error; | 2176 | return error; |
2177 | } | 2177 | } |
2178 | 2178 | ||
2179 | static int map_block_for_writepage(struct inode *inode, | 2179 | static int map_block_for_writepage(struct inode *inode, |
2180 | struct buffer_head *bh_result, | 2180 | struct buffer_head *bh_result, |
2181 | unsigned long block) | 2181 | unsigned long block) |
2182 | { | 2182 | { |
2183 | struct reiserfs_transaction_handle th; | 2183 | struct reiserfs_transaction_handle th; |
2184 | int fs_gen; | 2184 | int fs_gen; |
2185 | struct item_head tmp_ih; | 2185 | struct item_head tmp_ih; |
2186 | struct item_head *ih; | 2186 | struct item_head *ih; |
2187 | struct buffer_head *bh; | 2187 | struct buffer_head *bh; |
2188 | __le32 *item; | 2188 | __le32 *item; |
2189 | struct cpu_key key; | 2189 | struct cpu_key key; |
2190 | INITIALIZE_PATH(path); | 2190 | INITIALIZE_PATH(path); |
2191 | int pos_in_item; | 2191 | int pos_in_item; |
2192 | int jbegin_count = JOURNAL_PER_BALANCE_CNT; | 2192 | int jbegin_count = JOURNAL_PER_BALANCE_CNT; |
2193 | loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1; | 2193 | loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1; |
2194 | int retval; | 2194 | int retval; |
2195 | int use_get_block = 0; | 2195 | int use_get_block = 0; |
2196 | int bytes_copied = 0; | 2196 | int bytes_copied = 0; |
2197 | int copy_size; | 2197 | int copy_size; |
2198 | int trans_running = 0; | 2198 | int trans_running = 0; |
2199 | 2199 | ||
2200 | /* catch places below that try to log something without starting a trans */ | 2200 | /* catch places below that try to log something without starting a trans */ |
2201 | th.t_trans_id = 0; | 2201 | th.t_trans_id = 0; |
2202 | 2202 | ||
2203 | if (!buffer_uptodate(bh_result)) { | 2203 | if (!buffer_uptodate(bh_result)) { |
2204 | return -EIO; | 2204 | return -EIO; |
2205 | } | 2205 | } |
2206 | 2206 | ||
2207 | kmap(bh_result->b_page); | 2207 | kmap(bh_result->b_page); |
2208 | start_over: | 2208 | start_over: |
2209 | reiserfs_write_lock(inode->i_sb); | 2209 | reiserfs_write_lock(inode->i_sb); |
2210 | make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); | 2210 | make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); |
2211 | 2211 | ||
2212 | research: | 2212 | research: |
2213 | retval = search_for_position_by_key(inode->i_sb, &key, &path); | 2213 | retval = search_for_position_by_key(inode->i_sb, &key, &path); |
2214 | if (retval != POSITION_FOUND) { | 2214 | if (retval != POSITION_FOUND) { |
2215 | use_get_block = 1; | 2215 | use_get_block = 1; |
2216 | goto out; | 2216 | goto out; |
2217 | } | 2217 | } |
2218 | 2218 | ||
2219 | bh = get_last_bh(&path); | 2219 | bh = get_last_bh(&path); |
2220 | ih = get_ih(&path); | 2220 | ih = get_ih(&path); |
2221 | item = get_item(&path); | 2221 | item = get_item(&path); |
2222 | pos_in_item = path.pos_in_item; | 2222 | pos_in_item = path.pos_in_item; |
2223 | 2223 | ||
2224 | /* we've found an unformatted node */ | 2224 | /* we've found an unformatted node */ |
2225 | if (indirect_item_found(retval, ih)) { | 2225 | if (indirect_item_found(retval, ih)) { |
2226 | if (bytes_copied > 0) { | 2226 | if (bytes_copied > 0) { |
2227 | reiserfs_warning(inode->i_sb, "clm-6002", | 2227 | reiserfs_warning(inode->i_sb, "clm-6002", |
2228 | "bytes_copied %d", bytes_copied); | 2228 | "bytes_copied %d", bytes_copied); |
2229 | } | 2229 | } |
2230 | if (!get_block_num(item, pos_in_item)) { | 2230 | if (!get_block_num(item, pos_in_item)) { |
2231 | /* crap, we are writing to a hole */ | 2231 | /* crap, we are writing to a hole */ |
2232 | use_get_block = 1; | 2232 | use_get_block = 1; |
2233 | goto out; | 2233 | goto out; |
2234 | } | 2234 | } |
2235 | set_block_dev_mapped(bh_result, | 2235 | set_block_dev_mapped(bh_result, |
2236 | get_block_num(item, pos_in_item), inode); | 2236 | get_block_num(item, pos_in_item), inode); |
2237 | } else if (is_direct_le_ih(ih)) { | 2237 | } else if (is_direct_le_ih(ih)) { |
2238 | char *p; | 2238 | char *p; |
2239 | p = page_address(bh_result->b_page); | 2239 | p = page_address(bh_result->b_page); |
2240 | p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1); | 2240 | p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1); |
2241 | copy_size = ih_item_len(ih) - pos_in_item; | 2241 | copy_size = ih_item_len(ih) - pos_in_item; |
2242 | 2242 | ||
2243 | fs_gen = get_generation(inode->i_sb); | 2243 | fs_gen = get_generation(inode->i_sb); |
2244 | copy_item_head(&tmp_ih, ih); | 2244 | copy_item_head(&tmp_ih, ih); |
2245 | 2245 | ||
2246 | if (!trans_running) { | 2246 | if (!trans_running) { |
2247 | /* vs-3050 is gone, no need to drop the path */ | 2247 | /* vs-3050 is gone, no need to drop the path */ |
2248 | retval = journal_begin(&th, inode->i_sb, jbegin_count); | 2248 | retval = journal_begin(&th, inode->i_sb, jbegin_count); |
2249 | if (retval) | 2249 | if (retval) |
2250 | goto out; | 2250 | goto out; |
2251 | reiserfs_update_inode_transaction(inode); | 2251 | reiserfs_update_inode_transaction(inode); |
2252 | trans_running = 1; | 2252 | trans_running = 1; |
2253 | if (fs_changed(fs_gen, inode->i_sb) | 2253 | if (fs_changed(fs_gen, inode->i_sb) |
2254 | && item_moved(&tmp_ih, &path)) { | 2254 | && item_moved(&tmp_ih, &path)) { |
2255 | reiserfs_restore_prepared_buffer(inode->i_sb, | 2255 | reiserfs_restore_prepared_buffer(inode->i_sb, |
2256 | bh); | 2256 | bh); |
2257 | goto research; | 2257 | goto research; |
2258 | } | 2258 | } |
2259 | } | 2259 | } |
2260 | 2260 | ||
2261 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); | 2261 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); |
2262 | 2262 | ||
2263 | if (fs_changed(fs_gen, inode->i_sb) | 2263 | if (fs_changed(fs_gen, inode->i_sb) |
2264 | && item_moved(&tmp_ih, &path)) { | 2264 | && item_moved(&tmp_ih, &path)) { |
2265 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); | 2265 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); |
2266 | goto research; | 2266 | goto research; |
2267 | } | 2267 | } |
2268 | 2268 | ||
2269 | memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, | 2269 | memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, |
2270 | copy_size); | 2270 | copy_size); |
2271 | 2271 | ||
2272 | journal_mark_dirty(&th, inode->i_sb, bh); | 2272 | journal_mark_dirty(&th, inode->i_sb, bh); |
2273 | bytes_copied += copy_size; | 2273 | bytes_copied += copy_size; |
2274 | set_block_dev_mapped(bh_result, 0, inode); | 2274 | set_block_dev_mapped(bh_result, 0, inode); |
2275 | 2275 | ||
2276 | /* are there still bytes left? */ | 2276 | /* are there still bytes left? */ |
2277 | if (bytes_copied < bh_result->b_size && | 2277 | if (bytes_copied < bh_result->b_size && |
2278 | (byte_offset + bytes_copied) < inode->i_size) { | 2278 | (byte_offset + bytes_copied) < inode->i_size) { |
2279 | set_cpu_key_k_offset(&key, | 2279 | set_cpu_key_k_offset(&key, |
2280 | cpu_key_k_offset(&key) + | 2280 | cpu_key_k_offset(&key) + |
2281 | copy_size); | 2281 | copy_size); |
2282 | goto research; | 2282 | goto research; |
2283 | } | 2283 | } |
2284 | } else { | 2284 | } else { |
2285 | reiserfs_warning(inode->i_sb, "clm-6003", | 2285 | reiserfs_warning(inode->i_sb, "clm-6003", |
2286 | "bad item inode %lu", inode->i_ino); | 2286 | "bad item inode %lu", inode->i_ino); |
2287 | retval = -EIO; | 2287 | retval = -EIO; |
2288 | goto out; | 2288 | goto out; |
2289 | } | 2289 | } |
2290 | retval = 0; | 2290 | retval = 0; |
2291 | 2291 | ||
2292 | out: | 2292 | out: |
2293 | pathrelse(&path); | 2293 | pathrelse(&path); |
2294 | if (trans_running) { | 2294 | if (trans_running) { |
2295 | int err = journal_end(&th, inode->i_sb, jbegin_count); | 2295 | int err = journal_end(&th, inode->i_sb, jbegin_count); |
2296 | if (err) | 2296 | if (err) |
2297 | retval = err; | 2297 | retval = err; |
2298 | trans_running = 0; | 2298 | trans_running = 0; |
2299 | } | 2299 | } |
2300 | reiserfs_write_unlock(inode->i_sb); | 2300 | reiserfs_write_unlock(inode->i_sb); |
2301 | 2301 | ||
2302 | /* this is where we fill in holes in the file. */ | 2302 | /* this is where we fill in holes in the file. */ |
2303 | if (use_get_block) { | 2303 | if (use_get_block) { |
2304 | retval = reiserfs_get_block(inode, block, bh_result, | 2304 | retval = reiserfs_get_block(inode, block, bh_result, |
2305 | GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX | 2305 | GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX |
2306 | | GET_BLOCK_NO_DANGLE); | 2306 | | GET_BLOCK_NO_DANGLE); |
2307 | if (!retval) { | 2307 | if (!retval) { |
2308 | if (!buffer_mapped(bh_result) | 2308 | if (!buffer_mapped(bh_result) |
2309 | || bh_result->b_blocknr == 0) { | 2309 | || bh_result->b_blocknr == 0) { |
2310 | /* get_block failed to find a mapped unformatted node. */ | 2310 | /* get_block failed to find a mapped unformatted node. */ |
2311 | use_get_block = 0; | 2311 | use_get_block = 0; |
2312 | goto start_over; | 2312 | goto start_over; |
2313 | } | 2313 | } |
2314 | } | 2314 | } |
2315 | } | 2315 | } |
2316 | kunmap(bh_result->b_page); | 2316 | kunmap(bh_result->b_page); |
2317 | 2317 | ||
2318 | if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { | 2318 | if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { |
2319 | /* we've copied data from the page into the direct item, so the | 2319 | /* we've copied data from the page into the direct item, so the |
2320 | * buffer in the page is now clean, mark it to reflect that. | 2320 | * buffer in the page is now clean, mark it to reflect that. |
2321 | */ | 2321 | */ |
2322 | lock_buffer(bh_result); | 2322 | lock_buffer(bh_result); |
2323 | clear_buffer_dirty(bh_result); | 2323 | clear_buffer_dirty(bh_result); |
2324 | unlock_buffer(bh_result); | 2324 | unlock_buffer(bh_result); |
2325 | } | 2325 | } |
2326 | return retval; | 2326 | return retval; |
2327 | } | 2327 | } |
2328 | 2328 | ||
2329 | /* | 2329 | /* |
2330 | * mason@suse.com: updated in 2.5.54 to follow the same general io | 2330 | * mason@suse.com: updated in 2.5.54 to follow the same general io |
2331 | * start/recovery path as __block_write_full_page, along with special | 2331 | * start/recovery path as __block_write_full_page, along with special |
2332 | * code to handle reiserfs tails. | 2332 | * code to handle reiserfs tails. |
2333 | */ | 2333 | */ |
2334 | static int reiserfs_write_full_page(struct page *page, | 2334 | static int reiserfs_write_full_page(struct page *page, |
2335 | struct writeback_control *wbc) | 2335 | struct writeback_control *wbc) |
2336 | { | 2336 | { |
2337 | struct inode *inode = page->mapping->host; | 2337 | struct inode *inode = page->mapping->host; |
2338 | unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; | 2338 | unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; |
2339 | int error = 0; | 2339 | int error = 0; |
2340 | unsigned long block; | 2340 | unsigned long block; |
2341 | sector_t last_block; | 2341 | sector_t last_block; |
2342 | struct buffer_head *head, *bh; | 2342 | struct buffer_head *head, *bh; |
2343 | int partial = 0; | 2343 | int partial = 0; |
2344 | int nr = 0; | 2344 | int nr = 0; |
2345 | int checked = PageChecked(page); | 2345 | int checked = PageChecked(page); |
2346 | struct reiserfs_transaction_handle th; | 2346 | struct reiserfs_transaction_handle th; |
2347 | struct super_block *s = inode->i_sb; | 2347 | struct super_block *s = inode->i_sb; |
2348 | int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; | 2348 | int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; |
2349 | th.t_trans_id = 0; | 2349 | th.t_trans_id = 0; |
2350 | 2350 | ||
2351 | /* no logging allowed when nonblocking or from PF_MEMALLOC */ | 2351 | /* no logging allowed when nonblocking or from PF_MEMALLOC */ |
2352 | if (checked && (current->flags & PF_MEMALLOC)) { | 2352 | if (checked && (current->flags & PF_MEMALLOC)) { |
2353 | redirty_page_for_writepage(wbc, page); | 2353 | redirty_page_for_writepage(wbc, page); |
2354 | unlock_page(page); | 2354 | unlock_page(page); |
2355 | return 0; | 2355 | return 0; |
2356 | } | 2356 | } |
2357 | 2357 | ||
2358 | /* The page dirty bit is cleared before writepage is called, which | 2358 | /* The page dirty bit is cleared before writepage is called, which |
2359 | * means we have to tell create_empty_buffers to make dirty buffers | 2359 | * means we have to tell create_empty_buffers to make dirty buffers |
2360 | * The page really should be up to date at this point, so tossing | 2360 | * The page really should be up to date at this point, so tossing |
2361 | * in the BH_Uptodate is just a sanity check. | 2361 | * in the BH_Uptodate is just a sanity check. |
2362 | */ | 2362 | */ |
2363 | if (!page_has_buffers(page)) { | 2363 | if (!page_has_buffers(page)) { |
2364 | create_empty_buffers(page, s->s_blocksize, | 2364 | create_empty_buffers(page, s->s_blocksize, |
2365 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | 2365 | (1 << BH_Dirty) | (1 << BH_Uptodate)); |
2366 | } | 2366 | } |
2367 | head = page_buffers(page); | 2367 | head = page_buffers(page); |
2368 | 2368 | ||
2369 | /* last page in the file, zero out any contents past the | 2369 | /* last page in the file, zero out any contents past the |
2370 | ** last byte in the file | 2370 | ** last byte in the file |
2371 | */ | 2371 | */ |
2372 | if (page->index >= end_index) { | 2372 | if (page->index >= end_index) { |
2373 | unsigned last_offset; | 2373 | unsigned last_offset; |
2374 | 2374 | ||
2375 | last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); | 2375 | last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1); |
2376 | /* no file contents in this page */ | 2376 | /* no file contents in this page */ |
2377 | if (page->index >= end_index + 1 || !last_offset) { | 2377 | if (page->index >= end_index + 1 || !last_offset) { |
2378 | unlock_page(page); | 2378 | unlock_page(page); |
2379 | return 0; | 2379 | return 0; |
2380 | } | 2380 | } |
2381 | zero_user_segment(page, last_offset, PAGE_CACHE_SIZE); | 2381 | zero_user_segment(page, last_offset, PAGE_CACHE_SIZE); |
2382 | } | 2382 | } |
2383 | bh = head; | 2383 | bh = head; |
2384 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); | 2384 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); |
2385 | last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; | 2385 | last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; |
2386 | /* first map all the buffers, logging any direct items we find */ | 2386 | /* first map all the buffers, logging any direct items we find */ |
2387 | do { | 2387 | do { |
2388 | if (block > last_block) { | 2388 | if (block > last_block) { |
2389 | /* | 2389 | /* |
2390 | * This can happen when the block size is less than | 2390 | * This can happen when the block size is less than |
2391 | * the page size. The corresponding bytes in the page | 2391 | * the page size. The corresponding bytes in the page |
2392 | * were zero filled above | 2392 | * were zero filled above |
2393 | */ | 2393 | */ |
2394 | clear_buffer_dirty(bh); | 2394 | clear_buffer_dirty(bh); |
2395 | set_buffer_uptodate(bh); | 2395 | set_buffer_uptodate(bh); |
2396 | } else if ((checked || buffer_dirty(bh)) && | 2396 | } else if ((checked || buffer_dirty(bh)) && |
2397 | (!buffer_mapped(bh) || (buffer_mapped(bh) | 2397 | (!buffer_mapped(bh) || (buffer_mapped(bh) |
2398 | && bh->b_blocknr == | 2398 | && bh->b_blocknr == |
2399 | 0))) { | 2399 | 0))) { |
2400 | /* not mapped yet, or it points to a direct item, search | 2400 | /* not mapped yet, or it points to a direct item, search |
2401 | * the btree for the mapping info, and log any direct | 2401 | * the btree for the mapping info, and log any direct |
2402 | * items found | 2402 | * items found |
2403 | */ | 2403 | */ |
2404 | if ((error = map_block_for_writepage(inode, bh, block))) { | 2404 | if ((error = map_block_for_writepage(inode, bh, block))) { |
2405 | goto fail; | 2405 | goto fail; |
2406 | } | 2406 | } |
2407 | } | 2407 | } |
2408 | bh = bh->b_this_page; | 2408 | bh = bh->b_this_page; |
2409 | block++; | 2409 | block++; |
2410 | } while (bh != head); | 2410 | } while (bh != head); |
2411 | 2411 | ||
2412 | /* | 2412 | /* |
2413 | * we start the transaction after map_block_for_writepage, | 2413 | * we start the transaction after map_block_for_writepage, |
2414 | * because it can create holes in the file (an unbounded operation). | 2414 | * because it can create holes in the file (an unbounded operation). |
2415 | * starting it here, we can make a reliable estimate for how many | 2415 | * starting it here, we can make a reliable estimate for how many |
2416 | * blocks we're going to log | 2416 | * blocks we're going to log |
2417 | */ | 2417 | */ |
2418 | if (checked) { | 2418 | if (checked) { |
2419 | ClearPageChecked(page); | 2419 | ClearPageChecked(page); |
2420 | reiserfs_write_lock(s); | 2420 | reiserfs_write_lock(s); |
2421 | error = journal_begin(&th, s, bh_per_page + 1); | 2421 | error = journal_begin(&th, s, bh_per_page + 1); |
2422 | if (error) { | 2422 | if (error) { |
2423 | reiserfs_write_unlock(s); | 2423 | reiserfs_write_unlock(s); |
2424 | goto fail; | 2424 | goto fail; |
2425 | } | 2425 | } |
2426 | reiserfs_update_inode_transaction(inode); | 2426 | reiserfs_update_inode_transaction(inode); |
2427 | } | 2427 | } |
2428 | /* now go through and lock any dirty buffers on the page */ | 2428 | /* now go through and lock any dirty buffers on the page */ |
2429 | do { | 2429 | do { |
2430 | get_bh(bh); | 2430 | get_bh(bh); |
2431 | if (!buffer_mapped(bh)) | 2431 | if (!buffer_mapped(bh)) |
2432 | continue; | 2432 | continue; |
2433 | if (buffer_mapped(bh) && bh->b_blocknr == 0) | 2433 | if (buffer_mapped(bh) && bh->b_blocknr == 0) |
2434 | continue; | 2434 | continue; |
2435 | 2435 | ||
2436 | if (checked) { | 2436 | if (checked) { |
2437 | reiserfs_prepare_for_journal(s, bh, 1); | 2437 | reiserfs_prepare_for_journal(s, bh, 1); |
2438 | journal_mark_dirty(&th, s, bh); | 2438 | journal_mark_dirty(&th, s, bh); |
2439 | continue; | 2439 | continue; |
2440 | } | 2440 | } |
2441 | /* from this point on, we know the buffer is mapped to a | 2441 | /* from this point on, we know the buffer is mapped to a |
2442 | * real block and not a direct item | 2442 | * real block and not a direct item |
2443 | */ | 2443 | */ |
2444 | if (wbc->sync_mode != WB_SYNC_NONE) { | 2444 | if (wbc->sync_mode != WB_SYNC_NONE) { |
2445 | lock_buffer(bh); | 2445 | lock_buffer(bh); |
2446 | } else { | 2446 | } else { |
2447 | if (!trylock_buffer(bh)) { | 2447 | if (!trylock_buffer(bh)) { |
2448 | redirty_page_for_writepage(wbc, page); | 2448 | redirty_page_for_writepage(wbc, page); |
2449 | continue; | 2449 | continue; |
2450 | } | 2450 | } |
2451 | } | 2451 | } |
2452 | if (test_clear_buffer_dirty(bh)) { | 2452 | if (test_clear_buffer_dirty(bh)) { |
2453 | mark_buffer_async_write(bh); | 2453 | mark_buffer_async_write(bh); |
2454 | } else { | 2454 | } else { |
2455 | unlock_buffer(bh); | 2455 | unlock_buffer(bh); |
2456 | } | 2456 | } |
2457 | } while ((bh = bh->b_this_page) != head); | 2457 | } while ((bh = bh->b_this_page) != head); |
2458 | 2458 | ||
2459 | if (checked) { | 2459 | if (checked) { |
2460 | error = journal_end(&th, s, bh_per_page + 1); | 2460 | error = journal_end(&th, s, bh_per_page + 1); |
2461 | reiserfs_write_unlock(s); | 2461 | reiserfs_write_unlock(s); |
2462 | if (error) | 2462 | if (error) |
2463 | goto fail; | 2463 | goto fail; |
2464 | } | 2464 | } |
2465 | BUG_ON(PageWriteback(page)); | 2465 | BUG_ON(PageWriteback(page)); |
2466 | set_page_writeback(page); | 2466 | set_page_writeback(page); |
2467 | unlock_page(page); | 2467 | unlock_page(page); |
2468 | 2468 | ||
2469 | /* | 2469 | /* |
2470 | * since any buffer might be the only dirty buffer on the page, | 2470 | * since any buffer might be the only dirty buffer on the page, |
2471 | * the first submit_bh can bring the page out of writeback. | 2471 | * the first submit_bh can bring the page out of writeback. |
2472 | * be careful with the buffers. | 2472 | * be careful with the buffers. |
2473 | */ | 2473 | */ |
2474 | do { | 2474 | do { |
2475 | struct buffer_head *next = bh->b_this_page; | 2475 | struct buffer_head *next = bh->b_this_page; |
2476 | if (buffer_async_write(bh)) { | 2476 | if (buffer_async_write(bh)) { |
2477 | submit_bh(WRITE, bh); | 2477 | submit_bh(WRITE, bh); |
2478 | nr++; | 2478 | nr++; |
2479 | } | 2479 | } |
2480 | put_bh(bh); | 2480 | put_bh(bh); |
2481 | bh = next; | 2481 | bh = next; |
2482 | } while (bh != head); | 2482 | } while (bh != head); |
2483 | 2483 | ||
2484 | error = 0; | 2484 | error = 0; |
2485 | done: | 2485 | done: |
2486 | if (nr == 0) { | 2486 | if (nr == 0) { |
2487 | /* | 2487 | /* |
2488 | * if this page only had a direct item, it is very possible for | 2488 | * if this page only had a direct item, it is very possible for |
2489 | * no io to be required without there being an error. Or, | 2489 | * no io to be required without there being an error. Or, |
2490 | * someone else could have locked them and sent them down the | 2490 | * someone else could have locked them and sent them down the |
2491 | * pipe without locking the page | 2491 | * pipe without locking the page |
2492 | */ | 2492 | */ |
2493 | bh = head; | 2493 | bh = head; |
2494 | do { | 2494 | do { |
2495 | if (!buffer_uptodate(bh)) { | 2495 | if (!buffer_uptodate(bh)) { |
2496 | partial = 1; | 2496 | partial = 1; |
2497 | break; | 2497 | break; |
2498 | } | 2498 | } |
2499 | bh = bh->b_this_page; | 2499 | bh = bh->b_this_page; |
2500 | } while (bh != head); | 2500 | } while (bh != head); |
2501 | if (!partial) | 2501 | if (!partial) |
2502 | SetPageUptodate(page); | 2502 | SetPageUptodate(page); |
2503 | end_page_writeback(page); | 2503 | end_page_writeback(page); |
2504 | } | 2504 | } |
2505 | return error; | 2505 | return error; |
2506 | 2506 | ||
2507 | fail: | 2507 | fail: |
2508 | /* catches various errors, we need to make sure any valid dirty blocks | 2508 | /* catches various errors, we need to make sure any valid dirty blocks |
2509 | * get to the media. The page is currently locked and not marked for | 2509 | * get to the media. The page is currently locked and not marked for |
2510 | * writeback | 2510 | * writeback |
2511 | */ | 2511 | */ |
2512 | ClearPageUptodate(page); | 2512 | ClearPageUptodate(page); |
2513 | bh = head; | 2513 | bh = head; |
2514 | do { | 2514 | do { |
2515 | get_bh(bh); | 2515 | get_bh(bh); |
2516 | if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { | 2516 | if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { |
2517 | lock_buffer(bh); | 2517 | lock_buffer(bh); |
2518 | mark_buffer_async_write(bh); | 2518 | mark_buffer_async_write(bh); |
2519 | } else { | 2519 | } else { |
2520 | /* | 2520 | /* |
2521 | * clear any dirty bits that might have come from getting | 2521 | * clear any dirty bits that might have come from getting |
2522 | * attached to a dirty page | 2522 | * attached to a dirty page |
2523 | */ | 2523 | */ |
2524 | clear_buffer_dirty(bh); | 2524 | clear_buffer_dirty(bh); |
2525 | } | 2525 | } |
2526 | bh = bh->b_this_page; | 2526 | bh = bh->b_this_page; |
2527 | } while (bh != head); | 2527 | } while (bh != head); |
2528 | SetPageError(page); | 2528 | SetPageError(page); |
2529 | BUG_ON(PageWriteback(page)); | 2529 | BUG_ON(PageWriteback(page)); |
2530 | set_page_writeback(page); | 2530 | set_page_writeback(page); |
2531 | unlock_page(page); | 2531 | unlock_page(page); |
2532 | do { | 2532 | do { |
2533 | struct buffer_head *next = bh->b_this_page; | 2533 | struct buffer_head *next = bh->b_this_page; |
2534 | if (buffer_async_write(bh)) { | 2534 | if (buffer_async_write(bh)) { |
2535 | clear_buffer_dirty(bh); | 2535 | clear_buffer_dirty(bh); |
2536 | submit_bh(WRITE, bh); | 2536 | submit_bh(WRITE, bh); |
2537 | nr++; | 2537 | nr++; |
2538 | } | 2538 | } |
2539 | put_bh(bh); | 2539 | put_bh(bh); |
2540 | bh = next; | 2540 | bh = next; |
2541 | } while (bh != head); | 2541 | } while (bh != head); |
2542 | goto done; | 2542 | goto done; |
2543 | } | 2543 | } |
2544 | 2544 | ||
2545 | static int reiserfs_readpage(struct file *f, struct page *page) | 2545 | static int reiserfs_readpage(struct file *f, struct page *page) |
2546 | { | 2546 | { |
2547 | return block_read_full_page(page, reiserfs_get_block); | 2547 | return block_read_full_page(page, reiserfs_get_block); |
2548 | } | 2548 | } |
2549 | 2549 | ||
2550 | static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) | 2550 | static int reiserfs_writepage(struct page *page, struct writeback_control *wbc) |
2551 | { | 2551 | { |
2552 | struct inode *inode = page->mapping->host; | 2552 | struct inode *inode = page->mapping->host; |
2553 | reiserfs_wait_on_write_block(inode->i_sb); | 2553 | reiserfs_wait_on_write_block(inode->i_sb); |
2554 | return reiserfs_write_full_page(page, wbc); | 2554 | return reiserfs_write_full_page(page, wbc); |
2555 | } | 2555 | } |
2556 | 2556 | ||
2557 | static void reiserfs_truncate_failed_write(struct inode *inode) | 2557 | static void reiserfs_truncate_failed_write(struct inode *inode) |
2558 | { | 2558 | { |
2559 | truncate_inode_pages(inode->i_mapping, inode->i_size); | 2559 | truncate_inode_pages(inode->i_mapping, inode->i_size); |
2560 | reiserfs_truncate_file(inode, 0); | 2560 | reiserfs_truncate_file(inode, 0); |
2561 | } | 2561 | } |
2562 | 2562 | ||
2563 | static int reiserfs_write_begin(struct file *file, | 2563 | static int reiserfs_write_begin(struct file *file, |
2564 | struct address_space *mapping, | 2564 | struct address_space *mapping, |
2565 | loff_t pos, unsigned len, unsigned flags, | 2565 | loff_t pos, unsigned len, unsigned flags, |
2566 | struct page **pagep, void **fsdata) | 2566 | struct page **pagep, void **fsdata) |
2567 | { | 2567 | { |
2568 | struct inode *inode; | 2568 | struct inode *inode; |
2569 | struct page *page; | 2569 | struct page *page; |
2570 | pgoff_t index; | 2570 | pgoff_t index; |
2571 | int ret; | 2571 | int ret; |
2572 | int old_ref = 0; | 2572 | int old_ref = 0; |
2573 | 2573 | ||
2574 | inode = mapping->host; | 2574 | inode = mapping->host; |
2575 | *fsdata = 0; | 2575 | *fsdata = 0; |
2576 | if (flags & AOP_FLAG_CONT_EXPAND && | 2576 | if (flags & AOP_FLAG_CONT_EXPAND && |
2577 | (pos & (inode->i_sb->s_blocksize - 1)) == 0) { | 2577 | (pos & (inode->i_sb->s_blocksize - 1)) == 0) { |
2578 | pos ++; | 2578 | pos ++; |
2579 | *fsdata = (void *)(unsigned long)flags; | 2579 | *fsdata = (void *)(unsigned long)flags; |
2580 | } | 2580 | } |
2581 | 2581 | ||
2582 | index = pos >> PAGE_CACHE_SHIFT; | 2582 | index = pos >> PAGE_CACHE_SHIFT; |
2583 | page = grab_cache_page_write_begin(mapping, index, flags); | 2583 | page = grab_cache_page_write_begin(mapping, index, flags); |
2584 | if (!page) | 2584 | if (!page) |
2585 | return -ENOMEM; | 2585 | return -ENOMEM; |
2586 | *pagep = page; | 2586 | *pagep = page; |
2587 | 2587 | ||
2588 | reiserfs_wait_on_write_block(inode->i_sb); | 2588 | reiserfs_wait_on_write_block(inode->i_sb); |
2589 | fix_tail_page_for_writing(page); | 2589 | fix_tail_page_for_writing(page); |
2590 | if (reiserfs_transaction_running(inode->i_sb)) { | 2590 | if (reiserfs_transaction_running(inode->i_sb)) { |
2591 | struct reiserfs_transaction_handle *th; | 2591 | struct reiserfs_transaction_handle *th; |
2592 | th = (struct reiserfs_transaction_handle *)current-> | 2592 | th = (struct reiserfs_transaction_handle *)current-> |
2593 | journal_info; | 2593 | journal_info; |
2594 | BUG_ON(!th->t_refcount); | 2594 | BUG_ON(!th->t_refcount); |
2595 | BUG_ON(!th->t_trans_id); | 2595 | BUG_ON(!th->t_trans_id); |
2596 | old_ref = th->t_refcount; | 2596 | old_ref = th->t_refcount; |
2597 | th->t_refcount++; | 2597 | th->t_refcount++; |
2598 | } | 2598 | } |
2599 | ret = __block_write_begin(page, pos, len, reiserfs_get_block); | 2599 | ret = __block_write_begin(page, pos, len, reiserfs_get_block); |
2600 | if (ret && reiserfs_transaction_running(inode->i_sb)) { | 2600 | if (ret && reiserfs_transaction_running(inode->i_sb)) { |
2601 | struct reiserfs_transaction_handle *th = current->journal_info; | 2601 | struct reiserfs_transaction_handle *th = current->journal_info; |
2602 | /* this gets a little ugly. If reiserfs_get_block returned an | 2602 | /* this gets a little ugly. If reiserfs_get_block returned an |
2603 | * error and left a transacstion running, we've got to close it, | 2603 | * error and left a transacstion running, we've got to close it, |
2604 | * and we've got to free handle if it was a persistent transaction. | 2604 | * and we've got to free handle if it was a persistent transaction. |
2605 | * | 2605 | * |
2606 | * But, if we had nested into an existing transaction, we need | 2606 | * But, if we had nested into an existing transaction, we need |
2607 | * to just drop the ref count on the handle. | 2607 | * to just drop the ref count on the handle. |
2608 | * | 2608 | * |
2609 | * If old_ref == 0, the transaction is from reiserfs_get_block, | 2609 | * If old_ref == 0, the transaction is from reiserfs_get_block, |
2610 | * and it was a persistent trans. Otherwise, it was nested above. | 2610 | * and it was a persistent trans. Otherwise, it was nested above. |
2611 | */ | 2611 | */ |
2612 | if (th->t_refcount > old_ref) { | 2612 | if (th->t_refcount > old_ref) { |
2613 | if (old_ref) | 2613 | if (old_ref) |
2614 | th->t_refcount--; | 2614 | th->t_refcount--; |
2615 | else { | 2615 | else { |
2616 | int err; | 2616 | int err; |
2617 | reiserfs_write_lock(inode->i_sb); | 2617 | reiserfs_write_lock(inode->i_sb); |
2618 | err = reiserfs_end_persistent_transaction(th); | 2618 | err = reiserfs_end_persistent_transaction(th); |
2619 | reiserfs_write_unlock(inode->i_sb); | 2619 | reiserfs_write_unlock(inode->i_sb); |
2620 | if (err) | 2620 | if (err) |
2621 | ret = err; | 2621 | ret = err; |
2622 | } | 2622 | } |
2623 | } | 2623 | } |
2624 | } | 2624 | } |
2625 | if (ret) { | 2625 | if (ret) { |
2626 | unlock_page(page); | 2626 | unlock_page(page); |
2627 | page_cache_release(page); | 2627 | page_cache_release(page); |
2628 | /* Truncate allocated blocks */ | 2628 | /* Truncate allocated blocks */ |
2629 | reiserfs_truncate_failed_write(inode); | 2629 | reiserfs_truncate_failed_write(inode); |
2630 | } | 2630 | } |
2631 | return ret; | 2631 | return ret; |
2632 | } | 2632 | } |
2633 | 2633 | ||
2634 | int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len) | 2634 | int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len) |
2635 | { | 2635 | { |
2636 | struct inode *inode = page->mapping->host; | 2636 | struct inode *inode = page->mapping->host; |
2637 | int ret; | 2637 | int ret; |
2638 | int old_ref = 0; | 2638 | int old_ref = 0; |
2639 | 2639 | ||
2640 | reiserfs_write_unlock(inode->i_sb); | 2640 | reiserfs_write_unlock(inode->i_sb); |
2641 | reiserfs_wait_on_write_block(inode->i_sb); | 2641 | reiserfs_wait_on_write_block(inode->i_sb); |
2642 | reiserfs_write_lock(inode->i_sb); | 2642 | reiserfs_write_lock(inode->i_sb); |
2643 | 2643 | ||
2644 | fix_tail_page_for_writing(page); | 2644 | fix_tail_page_for_writing(page); |
2645 | if (reiserfs_transaction_running(inode->i_sb)) { | 2645 | if (reiserfs_transaction_running(inode->i_sb)) { |
2646 | struct reiserfs_transaction_handle *th; | 2646 | struct reiserfs_transaction_handle *th; |
2647 | th = (struct reiserfs_transaction_handle *)current-> | 2647 | th = (struct reiserfs_transaction_handle *)current-> |
2648 | journal_info; | 2648 | journal_info; |
2649 | BUG_ON(!th->t_refcount); | 2649 | BUG_ON(!th->t_refcount); |
2650 | BUG_ON(!th->t_trans_id); | 2650 | BUG_ON(!th->t_trans_id); |
2651 | old_ref = th->t_refcount; | 2651 | old_ref = th->t_refcount; |
2652 | th->t_refcount++; | 2652 | th->t_refcount++; |
2653 | } | 2653 | } |
2654 | 2654 | ||
2655 | ret = __block_write_begin(page, from, len, reiserfs_get_block); | 2655 | ret = __block_write_begin(page, from, len, reiserfs_get_block); |
2656 | if (ret && reiserfs_transaction_running(inode->i_sb)) { | 2656 | if (ret && reiserfs_transaction_running(inode->i_sb)) { |
2657 | struct reiserfs_transaction_handle *th = current->journal_info; | 2657 | struct reiserfs_transaction_handle *th = current->journal_info; |
2658 | /* this gets a little ugly. If reiserfs_get_block returned an | 2658 | /* this gets a little ugly. If reiserfs_get_block returned an |
2659 | * error and left a transacstion running, we've got to close it, | 2659 | * error and left a transacstion running, we've got to close it, |
2660 | * and we've got to free handle if it was a persistent transaction. | 2660 | * and we've got to free handle if it was a persistent transaction. |
2661 | * | 2661 | * |
2662 | * But, if we had nested into an existing transaction, we need | 2662 | * But, if we had nested into an existing transaction, we need |
2663 | * to just drop the ref count on the handle. | 2663 | * to just drop the ref count on the handle. |
2664 | * | 2664 | * |
2665 | * If old_ref == 0, the transaction is from reiserfs_get_block, | 2665 | * If old_ref == 0, the transaction is from reiserfs_get_block, |
2666 | * and it was a persistent trans. Otherwise, it was nested above. | 2666 | * and it was a persistent trans. Otherwise, it was nested above. |
2667 | */ | 2667 | */ |
2668 | if (th->t_refcount > old_ref) { | 2668 | if (th->t_refcount > old_ref) { |
2669 | if (old_ref) | 2669 | if (old_ref) |
2670 | th->t_refcount--; | 2670 | th->t_refcount--; |
2671 | else { | 2671 | else { |
2672 | int err; | 2672 | int err; |
2673 | reiserfs_write_lock(inode->i_sb); | 2673 | reiserfs_write_lock(inode->i_sb); |
2674 | err = reiserfs_end_persistent_transaction(th); | 2674 | err = reiserfs_end_persistent_transaction(th); |
2675 | reiserfs_write_unlock(inode->i_sb); | 2675 | reiserfs_write_unlock(inode->i_sb); |
2676 | if (err) | 2676 | if (err) |
2677 | ret = err; | 2677 | ret = err; |
2678 | } | 2678 | } |
2679 | } | 2679 | } |
2680 | } | 2680 | } |
2681 | return ret; | 2681 | return ret; |
2682 | 2682 | ||
2683 | } | 2683 | } |
2684 | 2684 | ||
2685 | static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) | 2685 | static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) |
2686 | { | 2686 | { |
2687 | return generic_block_bmap(as, block, reiserfs_bmap); | 2687 | return generic_block_bmap(as, block, reiserfs_bmap); |
2688 | } | 2688 | } |
2689 | 2689 | ||
2690 | static int reiserfs_write_end(struct file *file, struct address_space *mapping, | 2690 | static int reiserfs_write_end(struct file *file, struct address_space *mapping, |
2691 | loff_t pos, unsigned len, unsigned copied, | 2691 | loff_t pos, unsigned len, unsigned copied, |
2692 | struct page *page, void *fsdata) | 2692 | struct page *page, void *fsdata) |
2693 | { | 2693 | { |
2694 | struct inode *inode = page->mapping->host; | 2694 | struct inode *inode = page->mapping->host; |
2695 | int ret = 0; | 2695 | int ret = 0; |
2696 | int update_sd = 0; | 2696 | int update_sd = 0; |
2697 | struct reiserfs_transaction_handle *th; | 2697 | struct reiserfs_transaction_handle *th; |
2698 | unsigned start; | 2698 | unsigned start; |
2699 | int lock_depth = 0; | 2699 | int lock_depth = 0; |
2700 | bool locked = false; | 2700 | bool locked = false; |
2701 | 2701 | ||
2702 | if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) | 2702 | if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) |
2703 | pos ++; | 2703 | pos ++; |
2704 | 2704 | ||
2705 | reiserfs_wait_on_write_block(inode->i_sb); | 2705 | reiserfs_wait_on_write_block(inode->i_sb); |
2706 | if (reiserfs_transaction_running(inode->i_sb)) | 2706 | if (reiserfs_transaction_running(inode->i_sb)) |
2707 | th = current->journal_info; | 2707 | th = current->journal_info; |
2708 | else | 2708 | else |
2709 | th = NULL; | 2709 | th = NULL; |
2710 | 2710 | ||
2711 | start = pos & (PAGE_CACHE_SIZE - 1); | 2711 | start = pos & (PAGE_CACHE_SIZE - 1); |
2712 | if (unlikely(copied < len)) { | 2712 | if (unlikely(copied < len)) { |
2713 | if (!PageUptodate(page)) | 2713 | if (!PageUptodate(page)) |
2714 | copied = 0; | 2714 | copied = 0; |
2715 | 2715 | ||
2716 | page_zero_new_buffers(page, start + copied, start + len); | 2716 | page_zero_new_buffers(page, start + copied, start + len); |
2717 | } | 2717 | } |
2718 | flush_dcache_page(page); | 2718 | flush_dcache_page(page); |
2719 | 2719 | ||
2720 | reiserfs_commit_page(inode, page, start, start + copied); | 2720 | reiserfs_commit_page(inode, page, start, start + copied); |
2721 | 2721 | ||
2722 | /* generic_commit_write does this for us, but does not update the | 2722 | /* generic_commit_write does this for us, but does not update the |
2723 | ** transaction tracking stuff when the size changes. So, we have | 2723 | ** transaction tracking stuff when the size changes. So, we have |
2724 | ** to do the i_size updates here. | 2724 | ** to do the i_size updates here. |
2725 | */ | 2725 | */ |
2726 | if (pos + copied > inode->i_size) { | 2726 | if (pos + copied > inode->i_size) { |
2727 | struct reiserfs_transaction_handle myth; | 2727 | struct reiserfs_transaction_handle myth; |
2728 | lock_depth = reiserfs_write_lock_once(inode->i_sb); | 2728 | lock_depth = reiserfs_write_lock_once(inode->i_sb); |
2729 | locked = true; | 2729 | locked = true; |
2730 | /* If the file have grown beyond the border where it | 2730 | /* If the file have grown beyond the border where it |
2731 | can have a tail, unmark it as needing a tail | 2731 | can have a tail, unmark it as needing a tail |
2732 | packing */ | 2732 | packing */ |
2733 | if ((have_large_tails(inode->i_sb) | 2733 | if ((have_large_tails(inode->i_sb) |
2734 | && inode->i_size > i_block_size(inode) * 4) | 2734 | && inode->i_size > i_block_size(inode) * 4) |
2735 | || (have_small_tails(inode->i_sb) | 2735 | || (have_small_tails(inode->i_sb) |
2736 | && inode->i_size > i_block_size(inode))) | 2736 | && inode->i_size > i_block_size(inode))) |
2737 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 2737 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
2738 | 2738 | ||
2739 | ret = journal_begin(&myth, inode->i_sb, 1); | 2739 | ret = journal_begin(&myth, inode->i_sb, 1); |
2740 | if (ret) | 2740 | if (ret) |
2741 | goto journal_error; | 2741 | goto journal_error; |
2742 | 2742 | ||
2743 | reiserfs_update_inode_transaction(inode); | 2743 | reiserfs_update_inode_transaction(inode); |
2744 | inode->i_size = pos + copied; | 2744 | inode->i_size = pos + copied; |
2745 | /* | 2745 | /* |
2746 | * this will just nest into our transaction. It's important | 2746 | * this will just nest into our transaction. It's important |
2747 | * to use mark_inode_dirty so the inode gets pushed around on the | 2747 | * to use mark_inode_dirty so the inode gets pushed around on the |
2748 | * dirty lists, and so that O_SYNC works as expected | 2748 | * dirty lists, and so that O_SYNC works as expected |
2749 | */ | 2749 | */ |
2750 | mark_inode_dirty(inode); | 2750 | mark_inode_dirty(inode); |
2751 | reiserfs_update_sd(&myth, inode); | 2751 | reiserfs_update_sd(&myth, inode); |
2752 | update_sd = 1; | 2752 | update_sd = 1; |
2753 | ret = journal_end(&myth, inode->i_sb, 1); | 2753 | ret = journal_end(&myth, inode->i_sb, 1); |
2754 | if (ret) | 2754 | if (ret) |
2755 | goto journal_error; | 2755 | goto journal_error; |
2756 | } | 2756 | } |
2757 | if (th) { | 2757 | if (th) { |
2758 | if (!locked) { | 2758 | if (!locked) { |
2759 | lock_depth = reiserfs_write_lock_once(inode->i_sb); | 2759 | lock_depth = reiserfs_write_lock_once(inode->i_sb); |
2760 | locked = true; | 2760 | locked = true; |
2761 | } | 2761 | } |
2762 | if (!update_sd) | 2762 | if (!update_sd) |
2763 | mark_inode_dirty(inode); | 2763 | mark_inode_dirty(inode); |
2764 | ret = reiserfs_end_persistent_transaction(th); | 2764 | ret = reiserfs_end_persistent_transaction(th); |
2765 | if (ret) | 2765 | if (ret) |
2766 | goto out; | 2766 | goto out; |
2767 | } | 2767 | } |
2768 | 2768 | ||
2769 | out: | 2769 | out: |
2770 | if (locked) | 2770 | if (locked) |
2771 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 2771 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
2772 | unlock_page(page); | 2772 | unlock_page(page); |
2773 | page_cache_release(page); | 2773 | page_cache_release(page); |
2774 | 2774 | ||
2775 | if (pos + len > inode->i_size) | 2775 | if (pos + len > inode->i_size) |
2776 | reiserfs_truncate_failed_write(inode); | 2776 | reiserfs_truncate_failed_write(inode); |
2777 | 2777 | ||
2778 | return ret == 0 ? copied : ret; | 2778 | return ret == 0 ? copied : ret; |
2779 | 2779 | ||
2780 | journal_error: | 2780 | journal_error: |
2781 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 2781 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
2782 | locked = false; | 2782 | locked = false; |
2783 | if (th) { | 2783 | if (th) { |
2784 | if (!update_sd) | 2784 | if (!update_sd) |
2785 | reiserfs_update_sd(th, inode); | 2785 | reiserfs_update_sd(th, inode); |
2786 | ret = reiserfs_end_persistent_transaction(th); | 2786 | ret = reiserfs_end_persistent_transaction(th); |
2787 | } | 2787 | } |
2788 | goto out; | 2788 | goto out; |
2789 | } | 2789 | } |
2790 | 2790 | ||
2791 | int reiserfs_commit_write(struct file *f, struct page *page, | 2791 | int reiserfs_commit_write(struct file *f, struct page *page, |
2792 | unsigned from, unsigned to) | 2792 | unsigned from, unsigned to) |
2793 | { | 2793 | { |
2794 | struct inode *inode = page->mapping->host; | 2794 | struct inode *inode = page->mapping->host; |
2795 | loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; | 2795 | loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; |
2796 | int ret = 0; | 2796 | int ret = 0; |
2797 | int update_sd = 0; | 2797 | int update_sd = 0; |
2798 | struct reiserfs_transaction_handle *th = NULL; | 2798 | struct reiserfs_transaction_handle *th = NULL; |
2799 | 2799 | ||
2800 | reiserfs_write_unlock(inode->i_sb); | 2800 | reiserfs_write_unlock(inode->i_sb); |
2801 | reiserfs_wait_on_write_block(inode->i_sb); | 2801 | reiserfs_wait_on_write_block(inode->i_sb); |
2802 | reiserfs_write_lock(inode->i_sb); | 2802 | reiserfs_write_lock(inode->i_sb); |
2803 | 2803 | ||
2804 | if (reiserfs_transaction_running(inode->i_sb)) { | 2804 | if (reiserfs_transaction_running(inode->i_sb)) { |
2805 | th = current->journal_info; | 2805 | th = current->journal_info; |
2806 | } | 2806 | } |
2807 | reiserfs_commit_page(inode, page, from, to); | 2807 | reiserfs_commit_page(inode, page, from, to); |
2808 | 2808 | ||
2809 | /* generic_commit_write does this for us, but does not update the | 2809 | /* generic_commit_write does this for us, but does not update the |
2810 | ** transaction tracking stuff when the size changes. So, we have | 2810 | ** transaction tracking stuff when the size changes. So, we have |
2811 | ** to do the i_size updates here. | 2811 | ** to do the i_size updates here. |
2812 | */ | 2812 | */ |
2813 | if (pos > inode->i_size) { | 2813 | if (pos > inode->i_size) { |
2814 | struct reiserfs_transaction_handle myth; | 2814 | struct reiserfs_transaction_handle myth; |
2815 | /* If the file have grown beyond the border where it | 2815 | /* If the file have grown beyond the border where it |
2816 | can have a tail, unmark it as needing a tail | 2816 | can have a tail, unmark it as needing a tail |
2817 | packing */ | 2817 | packing */ |
2818 | if ((have_large_tails(inode->i_sb) | 2818 | if ((have_large_tails(inode->i_sb) |
2819 | && inode->i_size > i_block_size(inode) * 4) | 2819 | && inode->i_size > i_block_size(inode) * 4) |
2820 | || (have_small_tails(inode->i_sb) | 2820 | || (have_small_tails(inode->i_sb) |
2821 | && inode->i_size > i_block_size(inode))) | 2821 | && inode->i_size > i_block_size(inode))) |
2822 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 2822 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
2823 | 2823 | ||
2824 | ret = journal_begin(&myth, inode->i_sb, 1); | 2824 | ret = journal_begin(&myth, inode->i_sb, 1); |
2825 | if (ret) | 2825 | if (ret) |
2826 | goto journal_error; | 2826 | goto journal_error; |
2827 | 2827 | ||
2828 | reiserfs_update_inode_transaction(inode); | 2828 | reiserfs_update_inode_transaction(inode); |
2829 | inode->i_size = pos; | 2829 | inode->i_size = pos; |
2830 | /* | 2830 | /* |
2831 | * this will just nest into our transaction. It's important | 2831 | * this will just nest into our transaction. It's important |
2832 | * to use mark_inode_dirty so the inode gets pushed around on the | 2832 | * to use mark_inode_dirty so the inode gets pushed around on the |
2833 | * dirty lists, and so that O_SYNC works as expected | 2833 | * dirty lists, and so that O_SYNC works as expected |
2834 | */ | 2834 | */ |
2835 | mark_inode_dirty(inode); | 2835 | mark_inode_dirty(inode); |
2836 | reiserfs_update_sd(&myth, inode); | 2836 | reiserfs_update_sd(&myth, inode); |
2837 | update_sd = 1; | 2837 | update_sd = 1; |
2838 | ret = journal_end(&myth, inode->i_sb, 1); | 2838 | ret = journal_end(&myth, inode->i_sb, 1); |
2839 | if (ret) | 2839 | if (ret) |
2840 | goto journal_error; | 2840 | goto journal_error; |
2841 | } | 2841 | } |
2842 | if (th) { | 2842 | if (th) { |
2843 | if (!update_sd) | 2843 | if (!update_sd) |
2844 | mark_inode_dirty(inode); | 2844 | mark_inode_dirty(inode); |
2845 | ret = reiserfs_end_persistent_transaction(th); | 2845 | ret = reiserfs_end_persistent_transaction(th); |
2846 | if (ret) | 2846 | if (ret) |
2847 | goto out; | 2847 | goto out; |
2848 | } | 2848 | } |
2849 | 2849 | ||
2850 | out: | 2850 | out: |
2851 | return ret; | 2851 | return ret; |
2852 | 2852 | ||
2853 | journal_error: | 2853 | journal_error: |
2854 | if (th) { | 2854 | if (th) { |
2855 | if (!update_sd) | 2855 | if (!update_sd) |
2856 | reiserfs_update_sd(th, inode); | 2856 | reiserfs_update_sd(th, inode); |
2857 | ret = reiserfs_end_persistent_transaction(th); | 2857 | ret = reiserfs_end_persistent_transaction(th); |
2858 | } | 2858 | } |
2859 | 2859 | ||
2860 | return ret; | 2860 | return ret; |
2861 | } | 2861 | } |
2862 | 2862 | ||
2863 | void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) | 2863 | void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) |
2864 | { | 2864 | { |
2865 | if (reiserfs_attrs(inode->i_sb)) { | 2865 | if (reiserfs_attrs(inode->i_sb)) { |
2866 | if (sd_attrs & REISERFS_SYNC_FL) | 2866 | if (sd_attrs & REISERFS_SYNC_FL) |
2867 | inode->i_flags |= S_SYNC; | 2867 | inode->i_flags |= S_SYNC; |
2868 | else | 2868 | else |
2869 | inode->i_flags &= ~S_SYNC; | 2869 | inode->i_flags &= ~S_SYNC; |
2870 | if (sd_attrs & REISERFS_IMMUTABLE_FL) | 2870 | if (sd_attrs & REISERFS_IMMUTABLE_FL) |
2871 | inode->i_flags |= S_IMMUTABLE; | 2871 | inode->i_flags |= S_IMMUTABLE; |
2872 | else | 2872 | else |
2873 | inode->i_flags &= ~S_IMMUTABLE; | 2873 | inode->i_flags &= ~S_IMMUTABLE; |
2874 | if (sd_attrs & REISERFS_APPEND_FL) | 2874 | if (sd_attrs & REISERFS_APPEND_FL) |
2875 | inode->i_flags |= S_APPEND; | 2875 | inode->i_flags |= S_APPEND; |
2876 | else | 2876 | else |
2877 | inode->i_flags &= ~S_APPEND; | 2877 | inode->i_flags &= ~S_APPEND; |
2878 | if (sd_attrs & REISERFS_NOATIME_FL) | 2878 | if (sd_attrs & REISERFS_NOATIME_FL) |
2879 | inode->i_flags |= S_NOATIME; | 2879 | inode->i_flags |= S_NOATIME; |
2880 | else | 2880 | else |
2881 | inode->i_flags &= ~S_NOATIME; | 2881 | inode->i_flags &= ~S_NOATIME; |
2882 | if (sd_attrs & REISERFS_NOTAIL_FL) | 2882 | if (sd_attrs & REISERFS_NOTAIL_FL) |
2883 | REISERFS_I(inode)->i_flags |= i_nopack_mask; | 2883 | REISERFS_I(inode)->i_flags |= i_nopack_mask; |
2884 | else | 2884 | else |
2885 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; | 2885 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; |
2886 | } | 2886 | } |
2887 | } | 2887 | } |
2888 | 2888 | ||
2889 | void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) | 2889 | void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) |
2890 | { | 2890 | { |
2891 | if (reiserfs_attrs(inode->i_sb)) { | 2891 | if (reiserfs_attrs(inode->i_sb)) { |
2892 | if (inode->i_flags & S_IMMUTABLE) | 2892 | if (inode->i_flags & S_IMMUTABLE) |
2893 | *sd_attrs |= REISERFS_IMMUTABLE_FL; | 2893 | *sd_attrs |= REISERFS_IMMUTABLE_FL; |
2894 | else | 2894 | else |
2895 | *sd_attrs &= ~REISERFS_IMMUTABLE_FL; | 2895 | *sd_attrs &= ~REISERFS_IMMUTABLE_FL; |
2896 | if (inode->i_flags & S_SYNC) | 2896 | if (inode->i_flags & S_SYNC) |
2897 | *sd_attrs |= REISERFS_SYNC_FL; | 2897 | *sd_attrs |= REISERFS_SYNC_FL; |
2898 | else | 2898 | else |
2899 | *sd_attrs &= ~REISERFS_SYNC_FL; | 2899 | *sd_attrs &= ~REISERFS_SYNC_FL; |
2900 | if (inode->i_flags & S_NOATIME) | 2900 | if (inode->i_flags & S_NOATIME) |
2901 | *sd_attrs |= REISERFS_NOATIME_FL; | 2901 | *sd_attrs |= REISERFS_NOATIME_FL; |
2902 | else | 2902 | else |
2903 | *sd_attrs &= ~REISERFS_NOATIME_FL; | 2903 | *sd_attrs &= ~REISERFS_NOATIME_FL; |
2904 | if (REISERFS_I(inode)->i_flags & i_nopack_mask) | 2904 | if (REISERFS_I(inode)->i_flags & i_nopack_mask) |
2905 | *sd_attrs |= REISERFS_NOTAIL_FL; | 2905 | *sd_attrs |= REISERFS_NOTAIL_FL; |
2906 | else | 2906 | else |
2907 | *sd_attrs &= ~REISERFS_NOTAIL_FL; | 2907 | *sd_attrs &= ~REISERFS_NOTAIL_FL; |
2908 | } | 2908 | } |
2909 | } | 2909 | } |
2910 | 2910 | ||
2911 | /* decide if this buffer needs to stay around for data logging or ordered | 2911 | /* decide if this buffer needs to stay around for data logging or ordered |
2912 | ** write purposes | 2912 | ** write purposes |
2913 | */ | 2913 | */ |
2914 | static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | 2914 | static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) |
2915 | { | 2915 | { |
2916 | int ret = 1; | 2916 | int ret = 1; |
2917 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); | 2917 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); |
2918 | 2918 | ||
2919 | lock_buffer(bh); | 2919 | lock_buffer(bh); |
2920 | spin_lock(&j->j_dirty_buffers_lock); | 2920 | spin_lock(&j->j_dirty_buffers_lock); |
2921 | if (!buffer_mapped(bh)) { | 2921 | if (!buffer_mapped(bh)) { |
2922 | goto free_jh; | 2922 | goto free_jh; |
2923 | } | 2923 | } |
2924 | /* the page is locked, and the only places that log a data buffer | 2924 | /* the page is locked, and the only places that log a data buffer |
2925 | * also lock the page. | 2925 | * also lock the page. |
2926 | */ | 2926 | */ |
2927 | if (reiserfs_file_data_log(inode)) { | 2927 | if (reiserfs_file_data_log(inode)) { |
2928 | /* | 2928 | /* |
2929 | * very conservative, leave the buffer pinned if | 2929 | * very conservative, leave the buffer pinned if |
2930 | * anyone might need it. | 2930 | * anyone might need it. |
2931 | */ | 2931 | */ |
2932 | if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { | 2932 | if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { |
2933 | ret = 0; | 2933 | ret = 0; |
2934 | } | 2934 | } |
2935 | } else if (buffer_dirty(bh)) { | 2935 | } else if (buffer_dirty(bh)) { |
2936 | struct reiserfs_journal_list *jl; | 2936 | struct reiserfs_journal_list *jl; |
2937 | struct reiserfs_jh *jh = bh->b_private; | 2937 | struct reiserfs_jh *jh = bh->b_private; |
2938 | 2938 | ||
2939 | /* why is this safe? | 2939 | /* why is this safe? |
2940 | * reiserfs_setattr updates i_size in the on disk | 2940 | * reiserfs_setattr updates i_size in the on disk |
2941 | * stat data before allowing vmtruncate to be called. | 2941 | * stat data before allowing vmtruncate to be called. |
2942 | * | 2942 | * |
2943 | * If buffer was put onto the ordered list for this | 2943 | * If buffer was put onto the ordered list for this |
2944 | * transaction, we know for sure either this transaction | 2944 | * transaction, we know for sure either this transaction |
2945 | * or an older one already has updated i_size on disk, | 2945 | * or an older one already has updated i_size on disk, |
2946 | * and this ordered data won't be referenced in the file | 2946 | * and this ordered data won't be referenced in the file |
2947 | * if we crash. | 2947 | * if we crash. |
2948 | * | 2948 | * |
2949 | * if the buffer was put onto the ordered list for an older | 2949 | * if the buffer was put onto the ordered list for an older |
2950 | * transaction, we need to leave it around | 2950 | * transaction, we need to leave it around |
2951 | */ | 2951 | */ |
2952 | if (jh && (jl = jh->jl) | 2952 | if (jh && (jl = jh->jl) |
2953 | && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) | 2953 | && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) |
2954 | ret = 0; | 2954 | ret = 0; |
2955 | } | 2955 | } |
2956 | free_jh: | 2956 | free_jh: |
2957 | if (ret && bh->b_private) { | 2957 | if (ret && bh->b_private) { |
2958 | reiserfs_free_jh(bh); | 2958 | reiserfs_free_jh(bh); |
2959 | } | 2959 | } |
2960 | spin_unlock(&j->j_dirty_buffers_lock); | 2960 | spin_unlock(&j->j_dirty_buffers_lock); |
2961 | unlock_buffer(bh); | 2961 | unlock_buffer(bh); |
2962 | return ret; | 2962 | return ret; |
2963 | } | 2963 | } |
2964 | 2964 | ||
2965 | /* clm -- taken from fs/buffer.c:block_invalidate_page */ | 2965 | /* clm -- taken from fs/buffer.c:block_invalidate_page */ |
2966 | static void reiserfs_invalidatepage(struct page *page, unsigned long offset) | 2966 | static void reiserfs_invalidatepage(struct page *page, unsigned long offset) |
2967 | { | 2967 | { |
2968 | struct buffer_head *head, *bh, *next; | 2968 | struct buffer_head *head, *bh, *next; |
2969 | struct inode *inode = page->mapping->host; | 2969 | struct inode *inode = page->mapping->host; |
2970 | unsigned int curr_off = 0; | 2970 | unsigned int curr_off = 0; |
2971 | int ret = 1; | 2971 | int ret = 1; |
2972 | 2972 | ||
2973 | BUG_ON(!PageLocked(page)); | 2973 | BUG_ON(!PageLocked(page)); |
2974 | 2974 | ||
2975 | if (offset == 0) | 2975 | if (offset == 0) |
2976 | ClearPageChecked(page); | 2976 | ClearPageChecked(page); |
2977 | 2977 | ||
2978 | if (!page_has_buffers(page)) | 2978 | if (!page_has_buffers(page)) |
2979 | goto out; | 2979 | goto out; |
2980 | 2980 | ||
2981 | head = page_buffers(page); | 2981 | head = page_buffers(page); |
2982 | bh = head; | 2982 | bh = head; |
2983 | do { | 2983 | do { |
2984 | unsigned int next_off = curr_off + bh->b_size; | 2984 | unsigned int next_off = curr_off + bh->b_size; |
2985 | next = bh->b_this_page; | 2985 | next = bh->b_this_page; |
2986 | 2986 | ||
2987 | /* | 2987 | /* |
2988 | * is this block fully invalidated? | 2988 | * is this block fully invalidated? |
2989 | */ | 2989 | */ |
2990 | if (offset <= curr_off) { | 2990 | if (offset <= curr_off) { |
2991 | if (invalidatepage_can_drop(inode, bh)) | 2991 | if (invalidatepage_can_drop(inode, bh)) |
2992 | reiserfs_unmap_buffer(bh); | 2992 | reiserfs_unmap_buffer(bh); |
2993 | else | 2993 | else |
2994 | ret = 0; | 2994 | ret = 0; |
2995 | } | 2995 | } |
2996 | curr_off = next_off; | 2996 | curr_off = next_off; |
2997 | bh = next; | 2997 | bh = next; |
2998 | } while (bh != head); | 2998 | } while (bh != head); |
2999 | 2999 | ||
3000 | /* | 3000 | /* |
3001 | * We release buffers only if the entire page is being invalidated. | 3001 | * We release buffers only if the entire page is being invalidated. |
3002 | * The get_block cached value has been unconditionally invalidated, | 3002 | * The get_block cached value has been unconditionally invalidated, |
3003 | * so real IO is not possible anymore. | 3003 | * so real IO is not possible anymore. |
3004 | */ | 3004 | */ |
3005 | if (!offset && ret) { | 3005 | if (!offset && ret) { |
3006 | ret = try_to_release_page(page, 0); | 3006 | ret = try_to_release_page(page, 0); |
3007 | /* maybe should BUG_ON(!ret); - neilb */ | 3007 | /* maybe should BUG_ON(!ret); - neilb */ |
3008 | } | 3008 | } |
3009 | out: | 3009 | out: |
3010 | return; | 3010 | return; |
3011 | } | 3011 | } |
3012 | 3012 | ||
3013 | static int reiserfs_set_page_dirty(struct page *page) | 3013 | static int reiserfs_set_page_dirty(struct page *page) |
3014 | { | 3014 | { |
3015 | struct inode *inode = page->mapping->host; | 3015 | struct inode *inode = page->mapping->host; |
3016 | if (reiserfs_file_data_log(inode)) { | 3016 | if (reiserfs_file_data_log(inode)) { |
3017 | SetPageChecked(page); | 3017 | SetPageChecked(page); |
3018 | return __set_page_dirty_nobuffers(page); | 3018 | return __set_page_dirty_nobuffers(page); |
3019 | } | 3019 | } |
3020 | return __set_page_dirty_buffers(page); | 3020 | return __set_page_dirty_buffers(page); |
3021 | } | 3021 | } |
3022 | 3022 | ||
3023 | /* | 3023 | /* |
3024 | * Returns 1 if the page's buffers were dropped. The page is locked. | 3024 | * Returns 1 if the page's buffers were dropped. The page is locked. |
3025 | * | 3025 | * |
3026 | * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads | 3026 | * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads |
3027 | * in the buffers at page_buffers(page). | 3027 | * in the buffers at page_buffers(page). |
3028 | * | 3028 | * |
3029 | * even in -o notail mode, we can't be sure an old mount without -o notail | 3029 | * even in -o notail mode, we can't be sure an old mount without -o notail |
3030 | * didn't create files with tails. | 3030 | * didn't create files with tails. |
3031 | */ | 3031 | */ |
3032 | static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) | 3032 | static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) |
3033 | { | 3033 | { |
3034 | struct inode *inode = page->mapping->host; | 3034 | struct inode *inode = page->mapping->host; |
3035 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); | 3035 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); |
3036 | struct buffer_head *head; | 3036 | struct buffer_head *head; |
3037 | struct buffer_head *bh; | 3037 | struct buffer_head *bh; |
3038 | int ret = 1; | 3038 | int ret = 1; |
3039 | 3039 | ||
3040 | WARN_ON(PageChecked(page)); | 3040 | WARN_ON(PageChecked(page)); |
3041 | spin_lock(&j->j_dirty_buffers_lock); | 3041 | spin_lock(&j->j_dirty_buffers_lock); |
3042 | head = page_buffers(page); | 3042 | head = page_buffers(page); |
3043 | bh = head; | 3043 | bh = head; |
3044 | do { | 3044 | do { |
3045 | if (bh->b_private) { | 3045 | if (bh->b_private) { |
3046 | if (!buffer_dirty(bh) && !buffer_locked(bh)) { | 3046 | if (!buffer_dirty(bh) && !buffer_locked(bh)) { |
3047 | reiserfs_free_jh(bh); | 3047 | reiserfs_free_jh(bh); |
3048 | } else { | 3048 | } else { |
3049 | ret = 0; | 3049 | ret = 0; |
3050 | break; | 3050 | break; |
3051 | } | 3051 | } |
3052 | } | 3052 | } |
3053 | bh = bh->b_this_page; | 3053 | bh = bh->b_this_page; |
3054 | } while (bh != head); | 3054 | } while (bh != head); |
3055 | if (ret) | 3055 | if (ret) |
3056 | ret = try_to_free_buffers(page); | 3056 | ret = try_to_free_buffers(page); |
3057 | spin_unlock(&j->j_dirty_buffers_lock); | 3057 | spin_unlock(&j->j_dirty_buffers_lock); |
3058 | return ret; | 3058 | return ret; |
3059 | } | 3059 | } |
3060 | 3060 | ||
3061 | /* We thank Mingming Cao for helping us understand in great detail what | 3061 | /* We thank Mingming Cao for helping us understand in great detail what |
3062 | to do in this section of the code. */ | 3062 | to do in this section of the code. */ |
3063 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, | 3063 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, |
3064 | const struct iovec *iov, loff_t offset, | 3064 | const struct iovec *iov, loff_t offset, |
3065 | unsigned long nr_segs) | 3065 | unsigned long nr_segs) |
3066 | { | 3066 | { |
3067 | struct file *file = iocb->ki_filp; | 3067 | struct file *file = iocb->ki_filp; |
3068 | struct inode *inode = file->f_mapping->host; | 3068 | struct inode *inode = file->f_mapping->host; |
3069 | ssize_t ret; | 3069 | ssize_t ret; |
3070 | 3070 | ||
3071 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 3071 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
3072 | offset, nr_segs, | 3072 | offset, nr_segs, |
3073 | reiserfs_get_blocks_direct_io, NULL); | 3073 | reiserfs_get_blocks_direct_io, NULL); |
3074 | 3074 | ||
3075 | /* | 3075 | /* |
3076 | * In case of error extending write may have instantiated a few | 3076 | * In case of error extending write may have instantiated a few |
3077 | * blocks outside i_size. Trim these off again. | 3077 | * blocks outside i_size. Trim these off again. |
3078 | */ | 3078 | */ |
3079 | if (unlikely((rw & WRITE) && ret < 0)) { | 3079 | if (unlikely((rw & WRITE) && ret < 0)) { |
3080 | loff_t isize = i_size_read(inode); | 3080 | loff_t isize = i_size_read(inode); |
3081 | loff_t end = offset + iov_length(iov, nr_segs); | 3081 | loff_t end = offset + iov_length(iov, nr_segs); |
3082 | 3082 | ||
3083 | if (end > isize) | 3083 | if (end > isize) |
3084 | vmtruncate(inode, isize); | 3084 | vmtruncate(inode, isize); |
3085 | } | 3085 | } |
3086 | 3086 | ||
3087 | return ret; | 3087 | return ret; |
3088 | } | 3088 | } |
3089 | 3089 | ||
3090 | int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | 3090 | int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) |
3091 | { | 3091 | { |
3092 | struct inode *inode = dentry->d_inode; | 3092 | struct inode *inode = dentry->d_inode; |
3093 | unsigned int ia_valid; | 3093 | unsigned int ia_valid; |
3094 | int depth; | 3094 | int depth; |
3095 | int error; | 3095 | int error; |
3096 | 3096 | ||
3097 | error = inode_change_ok(inode, attr); | 3097 | error = inode_change_ok(inode, attr); |
3098 | if (error) | 3098 | if (error) |
3099 | return error; | 3099 | return error; |
3100 | 3100 | ||
3101 | /* must be turned off for recursive notify_change calls */ | 3101 | /* must be turned off for recursive notify_change calls */ |
3102 | ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); | 3102 | ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); |
3103 | 3103 | ||
3104 | depth = reiserfs_write_lock_once(inode->i_sb); | 3104 | depth = reiserfs_write_lock_once(inode->i_sb); |
3105 | if (is_quota_modification(inode, attr)) | 3105 | if (is_quota_modification(inode, attr)) |
3106 | dquot_initialize(inode); | 3106 | dquot_initialize(inode); |
3107 | 3107 | ||
3108 | if (attr->ia_valid & ATTR_SIZE) { | 3108 | if (attr->ia_valid & ATTR_SIZE) { |
3109 | /* version 2 items will be caught by the s_maxbytes check | 3109 | /* version 2 items will be caught by the s_maxbytes check |
3110 | ** done for us in vmtruncate | 3110 | ** done for us in vmtruncate |
3111 | */ | 3111 | */ |
3112 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && | 3112 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && |
3113 | attr->ia_size > MAX_NON_LFS) { | 3113 | attr->ia_size > MAX_NON_LFS) { |
3114 | error = -EFBIG; | 3114 | error = -EFBIG; |
3115 | goto out; | 3115 | goto out; |
3116 | } | 3116 | } |
3117 | |||
3118 | inode_dio_wait(inode); | ||
3119 | |||
3117 | /* fill in hole pointers in the expanding truncate case. */ | 3120 | /* fill in hole pointers in the expanding truncate case. */ |
3118 | if (attr->ia_size > inode->i_size) { | 3121 | if (attr->ia_size > inode->i_size) { |
3119 | error = generic_cont_expand_simple(inode, attr->ia_size); | 3122 | error = generic_cont_expand_simple(inode, attr->ia_size); |
3120 | if (REISERFS_I(inode)->i_prealloc_count > 0) { | 3123 | if (REISERFS_I(inode)->i_prealloc_count > 0) { |
3121 | int err; | 3124 | int err; |
3122 | struct reiserfs_transaction_handle th; | 3125 | struct reiserfs_transaction_handle th; |
3123 | /* we're changing at most 2 bitmaps, inode + super */ | 3126 | /* we're changing at most 2 bitmaps, inode + super */ |
3124 | err = journal_begin(&th, inode->i_sb, 4); | 3127 | err = journal_begin(&th, inode->i_sb, 4); |
3125 | if (!err) { | 3128 | if (!err) { |
3126 | reiserfs_discard_prealloc(&th, inode); | 3129 | reiserfs_discard_prealloc(&th, inode); |
3127 | err = journal_end(&th, inode->i_sb, 4); | 3130 | err = journal_end(&th, inode->i_sb, 4); |
3128 | } | 3131 | } |
3129 | if (err) | 3132 | if (err) |
3130 | error = err; | 3133 | error = err; |
3131 | } | 3134 | } |
3132 | if (error) | 3135 | if (error) |
3133 | goto out; | 3136 | goto out; |
3134 | /* | 3137 | /* |
3135 | * file size is changed, ctime and mtime are | 3138 | * file size is changed, ctime and mtime are |
3136 | * to be updated | 3139 | * to be updated |
3137 | */ | 3140 | */ |
3138 | attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME); | 3141 | attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME); |
3139 | } | 3142 | } |
3140 | } | 3143 | } |
3141 | 3144 | ||
3142 | if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || | 3145 | if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || |
3143 | ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && | 3146 | ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && |
3144 | (get_inode_sd_version(inode) == STAT_DATA_V1)) { | 3147 | (get_inode_sd_version(inode) == STAT_DATA_V1)) { |
3145 | /* stat data of format v3.5 has 16 bit uid and gid */ | 3148 | /* stat data of format v3.5 has 16 bit uid and gid */ |
3146 | error = -EINVAL; | 3149 | error = -EINVAL; |
3147 | goto out; | 3150 | goto out; |
3148 | } | 3151 | } |
3149 | 3152 | ||
3150 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 3153 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
3151 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 3154 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
3152 | struct reiserfs_transaction_handle th; | 3155 | struct reiserfs_transaction_handle th; |
3153 | int jbegin_count = | 3156 | int jbegin_count = |
3154 | 2 * | 3157 | 2 * |
3155 | (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + | 3158 | (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + |
3156 | REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + | 3159 | REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + |
3157 | 2; | 3160 | 2; |
3158 | 3161 | ||
3159 | error = reiserfs_chown_xattrs(inode, attr); | 3162 | error = reiserfs_chown_xattrs(inode, attr); |
3160 | 3163 | ||
3161 | if (error) | 3164 | if (error) |
3162 | return error; | 3165 | return error; |
3163 | 3166 | ||
3164 | /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ | 3167 | /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ |
3165 | error = journal_begin(&th, inode->i_sb, jbegin_count); | 3168 | error = journal_begin(&th, inode->i_sb, jbegin_count); |
3166 | if (error) | 3169 | if (error) |
3167 | goto out; | 3170 | goto out; |
3168 | error = dquot_transfer(inode, attr); | 3171 | error = dquot_transfer(inode, attr); |
3169 | if (error) { | 3172 | if (error) { |
3170 | journal_end(&th, inode->i_sb, jbegin_count); | 3173 | journal_end(&th, inode->i_sb, jbegin_count); |
3171 | goto out; | 3174 | goto out; |
3172 | } | 3175 | } |
3173 | 3176 | ||
3174 | /* Update corresponding info in inode so that everything is in | 3177 | /* Update corresponding info in inode so that everything is in |
3175 | * one transaction */ | 3178 | * one transaction */ |
3176 | if (attr->ia_valid & ATTR_UID) | 3179 | if (attr->ia_valid & ATTR_UID) |
3177 | inode->i_uid = attr->ia_uid; | 3180 | inode->i_uid = attr->ia_uid; |
3178 | if (attr->ia_valid & ATTR_GID) | 3181 | if (attr->ia_valid & ATTR_GID) |
3179 | inode->i_gid = attr->ia_gid; | 3182 | inode->i_gid = attr->ia_gid; |
3180 | mark_inode_dirty(inode); | 3183 | mark_inode_dirty(inode); |
3181 | error = journal_end(&th, inode->i_sb, jbegin_count); | 3184 | error = journal_end(&th, inode->i_sb, jbegin_count); |
3182 | if (error) | 3185 | if (error) |
3183 | goto out; | 3186 | goto out; |
3184 | } | 3187 | } |
3185 | 3188 | ||
3186 | /* | 3189 | /* |
3187 | * Relax the lock here, as it might truncate the | 3190 | * Relax the lock here, as it might truncate the |
3188 | * inode pages and wait for inode pages locks. | 3191 | * inode pages and wait for inode pages locks. |
3189 | * To release such page lock, the owner needs the | 3192 | * To release such page lock, the owner needs the |
3190 | * reiserfs lock | 3193 | * reiserfs lock |
3191 | */ | 3194 | */ |
3192 | reiserfs_write_unlock_once(inode->i_sb, depth); | 3195 | reiserfs_write_unlock_once(inode->i_sb, depth); |
3193 | if ((attr->ia_valid & ATTR_SIZE) && | 3196 | if ((attr->ia_valid & ATTR_SIZE) && |
3194 | attr->ia_size != i_size_read(inode)) | 3197 | attr->ia_size != i_size_read(inode)) |
3195 | error = vmtruncate(inode, attr->ia_size); | 3198 | error = vmtruncate(inode, attr->ia_size); |
3196 | 3199 | ||
3197 | if (!error) { | 3200 | if (!error) { |
3198 | setattr_copy(inode, attr); | 3201 | setattr_copy(inode, attr); |
3199 | mark_inode_dirty(inode); | 3202 | mark_inode_dirty(inode); |
3200 | } | 3203 | } |
3201 | depth = reiserfs_write_lock_once(inode->i_sb); | 3204 | depth = reiserfs_write_lock_once(inode->i_sb); |
3202 | 3205 | ||
3203 | if (!error && reiserfs_posixacl(inode->i_sb)) { | 3206 | if (!error && reiserfs_posixacl(inode->i_sb)) { |
3204 | if (attr->ia_valid & ATTR_MODE) | 3207 | if (attr->ia_valid & ATTR_MODE) |
3205 | error = reiserfs_acl_chmod(inode); | 3208 | error = reiserfs_acl_chmod(inode); |
3206 | } | 3209 | } |
3207 | 3210 | ||
3208 | out: | 3211 | out: |
3209 | reiserfs_write_unlock_once(inode->i_sb, depth); | 3212 | reiserfs_write_unlock_once(inode->i_sb, depth); |
3210 | 3213 | ||
3211 | return error; | 3214 | return error; |
3212 | } | 3215 | } |
3213 | 3216 | ||
3214 | const struct address_space_operations reiserfs_address_space_operations = { | 3217 | const struct address_space_operations reiserfs_address_space_operations = { |
3215 | .writepage = reiserfs_writepage, | 3218 | .writepage = reiserfs_writepage, |
3216 | .readpage = reiserfs_readpage, | 3219 | .readpage = reiserfs_readpage, |
3217 | .readpages = reiserfs_readpages, | 3220 | .readpages = reiserfs_readpages, |
3218 | .releasepage = reiserfs_releasepage, | 3221 | .releasepage = reiserfs_releasepage, |
3219 | .invalidatepage = reiserfs_invalidatepage, | 3222 | .invalidatepage = reiserfs_invalidatepage, |
3220 | .write_begin = reiserfs_write_begin, | 3223 | .write_begin = reiserfs_write_begin, |
3221 | .write_end = reiserfs_write_end, | 3224 | .write_end = reiserfs_write_end, |
3222 | .bmap = reiserfs_aop_bmap, | 3225 | .bmap = reiserfs_aop_bmap, |
3223 | .direct_IO = reiserfs_direct_IO, | 3226 | .direct_IO = reiserfs_direct_IO, |
3224 | .set_page_dirty = reiserfs_set_page_dirty, | 3227 | .set_page_dirty = reiserfs_set_page_dirty, |
3225 | }; | 3228 | }; |
3226 | 3229 |