Commit 89e107877b65bf6eff1d63a1302dee9a091586f5
Committed by
Linus Torvalds
1 parent
7765ec26ae
fs: new cont helpers
Rework the generic block "cont" routines to handle the new aops. Supporting cont_prepare_write would take quite a lot of code to support, so remove it instead (and we later convert all filesystems to use it). write_begin gets passed AOP_FLAG_CONT_EXPAND when called from generic_cont_expand, so filesystems can avoid the old hacks they used. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 4 changed files with 103 additions and 102 deletions Side-by-side Diff
fs/buffer.c
... | ... | @@ -2156,14 +2156,14 @@ |
2156 | 2156 | } |
2157 | 2157 | |
2158 | 2158 | /* utility function for filesystems that need to do work on expanding |
2159 | - * truncates. Uses prepare/commit_write to allow the filesystem to | |
2159 | + * truncates. Uses filesystem pagecache writes to allow the filesystem to | |
2160 | 2160 | * deal with the hole. |
2161 | 2161 | */ |
2162 | -static int __generic_cont_expand(struct inode *inode, loff_t size, | |
2163 | - pgoff_t index, unsigned int offset) | |
2162 | +int generic_cont_expand_simple(struct inode *inode, loff_t size) | |
2164 | 2163 | { |
2165 | 2164 | struct address_space *mapping = inode->i_mapping; |
2166 | 2165 | struct page *page; |
2166 | + void *fsdata; | |
2167 | 2167 | unsigned long limit; |
2168 | 2168 | int err; |
2169 | 2169 | |
2170 | 2170 | |
2171 | 2171 | |
2172 | 2172 | |
2173 | 2173 | |
2174 | 2174 | |
2175 | 2175 | |
2176 | 2176 | |
2177 | 2177 | |
2178 | 2178 | |
2179 | 2179 | |
2180 | 2180 | |
2181 | 2181 | |
2182 | 2182 | |
2183 | 2183 | |
2184 | 2184 | |
2185 | 2185 | |
2186 | 2186 | |
2187 | 2187 | |
2188 | 2188 | |
2189 | 2189 | |
2190 | 2190 | |
... | ... | @@ -2176,140 +2176,134 @@ |
2176 | 2176 | if (size > inode->i_sb->s_maxbytes) |
2177 | 2177 | goto out; |
2178 | 2178 | |
2179 | - err = -ENOMEM; | |
2180 | - page = grab_cache_page(mapping, index); | |
2181 | - if (!page) | |
2179 | + err = pagecache_write_begin(NULL, mapping, size, 0, | |
2180 | + AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND, | |
2181 | + &page, &fsdata); | |
2182 | + if (err) | |
2182 | 2183 | goto out; |
2183 | - err = mapping->a_ops->prepare_write(NULL, page, offset, offset); | |
2184 | - if (err) { | |
2185 | - /* | |
2186 | - * ->prepare_write() may have instantiated a few blocks | |
2187 | - * outside i_size. Trim these off again. | |
2188 | - */ | |
2189 | - unlock_page(page); | |
2190 | - page_cache_release(page); | |
2191 | - vmtruncate(inode, inode->i_size); | |
2192 | - goto out; | |
2193 | - } | |
2194 | 2184 | |
2195 | - err = mapping->a_ops->commit_write(NULL, page, offset, offset); | |
2185 | + err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata); | |
2186 | + BUG_ON(err > 0); | |
2196 | 2187 | |
2197 | - unlock_page(page); | |
2198 | - page_cache_release(page); | |
2199 | - if (err > 0) | |
2200 | - err = 0; | |
2201 | 2188 | out: |
2202 | 2189 | return err; |
2203 | 2190 | } |
2204 | 2191 | |
2205 | 2192 | int generic_cont_expand(struct inode *inode, loff_t size) |
2206 | 2193 | { |
2207 | - pgoff_t index; | |
2208 | 2194 | unsigned int offset; |
2209 | 2195 | |
2210 | 2196 | offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ |
2211 | 2197 | |
2212 | 2198 | /* ugh. in prepare/commit_write, if from==to==start of block, we |
2213 | - ** skip the prepare. make sure we never send an offset for the start | |
2214 | - ** of a block | |
2215 | - */ | |
2199 | + * skip the prepare. make sure we never send an offset for the start | |
2200 | + * of a block. | |
2201 | + * XXX: actually, this should be handled in those filesystems by | |
2202 | + * checking for the AOP_FLAG_CONT_EXPAND flag. | |
2203 | + */ | |
2216 | 2204 | if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { |
2217 | 2205 | /* caller must handle this extra byte. */ |
2218 | - offset++; | |
2206 | + size++; | |
2219 | 2207 | } |
2220 | - index = size >> PAGE_CACHE_SHIFT; | |
2221 | - | |
2222 | - return __generic_cont_expand(inode, size, index, offset); | |
2208 | + return generic_cont_expand_simple(inode, size); | |
2223 | 2209 | } |
2224 | 2210 | |
2225 | -int generic_cont_expand_simple(struct inode *inode, loff_t size) | |
2211 | +int cont_expand_zero(struct file *file, struct address_space *mapping, | |
2212 | + loff_t pos, loff_t *bytes) | |
2226 | 2213 | { |
2227 | - loff_t pos = size - 1; | |
2228 | - pgoff_t index = pos >> PAGE_CACHE_SHIFT; | |
2229 | - unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1; | |
2230 | - | |
2231 | - /* prepare/commit_write can handle even if from==to==start of block. */ | |
2232 | - return __generic_cont_expand(inode, size, index, offset); | |
2233 | -} | |
2234 | - | |
2235 | -/* | |
2236 | - * For moronic filesystems that do not allow holes in file. | |
2237 | - * We may have to extend the file. | |
2238 | - */ | |
2239 | - | |
2240 | -int cont_prepare_write(struct page *page, unsigned offset, | |
2241 | - unsigned to, get_block_t *get_block, loff_t *bytes) | |
2242 | -{ | |
2243 | - struct address_space *mapping = page->mapping; | |
2244 | 2214 | struct inode *inode = mapping->host; |
2245 | - struct page *new_page; | |
2246 | - pgoff_t pgpos; | |
2247 | - long status; | |
2248 | - unsigned zerofrom; | |
2249 | 2215 | unsigned blocksize = 1 << inode->i_blkbits; |
2216 | + struct page *page; | |
2217 | + void *fsdata; | |
2218 | + pgoff_t index, curidx; | |
2219 | + loff_t curpos; | |
2220 | + unsigned zerofrom, offset, len; | |
2221 | + int err = 0; | |
2250 | 2222 | |
2251 | - while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { | |
2252 | - status = -ENOMEM; | |
2253 | - new_page = grab_cache_page(mapping, pgpos); | |
2254 | - if (!new_page) | |
2255 | - goto out; | |
2256 | - /* we might sleep */ | |
2257 | - if (*bytes>>PAGE_CACHE_SHIFT != pgpos) { | |
2258 | - unlock_page(new_page); | |
2259 | - page_cache_release(new_page); | |
2260 | - continue; | |
2261 | - } | |
2262 | - zerofrom = *bytes & ~PAGE_CACHE_MASK; | |
2223 | + index = pos >> PAGE_CACHE_SHIFT; | |
2224 | + offset = pos & ~PAGE_CACHE_MASK; | |
2225 | + | |
2226 | + while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) { | |
2227 | + zerofrom = curpos & ~PAGE_CACHE_MASK; | |
2263 | 2228 | if (zerofrom & (blocksize-1)) { |
2264 | 2229 | *bytes |= (blocksize-1); |
2265 | 2230 | (*bytes)++; |
2266 | 2231 | } |
2267 | - status = __block_prepare_write(inode, new_page, zerofrom, | |
2268 | - PAGE_CACHE_SIZE, get_block); | |
2269 | - if (status) | |
2270 | - goto out_unmap; | |
2271 | - zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom, | |
2272 | - KM_USER0); | |
2273 | - generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); | |
2274 | - unlock_page(new_page); | |
2275 | - page_cache_release(new_page); | |
2232 | + len = PAGE_CACHE_SIZE - zerofrom; | |
2233 | + | |
2234 | + err = pagecache_write_begin(file, mapping, curpos, len, | |
2235 | + AOP_FLAG_UNINTERRUPTIBLE, | |
2236 | + &page, &fsdata); | |
2237 | + if (err) | |
2238 | + goto out; | |
2239 | + zero_user_page(page, zerofrom, len, KM_USER0); | |
2240 | + err = pagecache_write_end(file, mapping, curpos, len, len, | |
2241 | + page, fsdata); | |
2242 | + if (err < 0) | |
2243 | + goto out; | |
2244 | + BUG_ON(err != len); | |
2245 | + err = 0; | |
2276 | 2246 | } |
2277 | 2247 | |
2278 | - if (page->index < pgpos) { | |
2279 | - /* completely inside the area */ | |
2280 | - zerofrom = offset; | |
2281 | - } else { | |
2282 | - /* page covers the boundary, find the boundary offset */ | |
2283 | - zerofrom = *bytes & ~PAGE_CACHE_MASK; | |
2284 | - | |
2248 | + /* page covers the boundary, find the boundary offset */ | |
2249 | + if (index == curidx) { | |
2250 | + zerofrom = curpos & ~PAGE_CACHE_MASK; | |
2285 | 2251 | /* if we will expand the thing last block will be filled */ |
2286 | - if (to > zerofrom && (zerofrom & (blocksize-1))) { | |
2252 | + if (offset <= zerofrom) { | |
2253 | + goto out; | |
2254 | + } | |
2255 | + if (zerofrom & (blocksize-1)) { | |
2287 | 2256 | *bytes |= (blocksize-1); |
2288 | 2257 | (*bytes)++; |
2289 | 2258 | } |
2259 | + len = offset - zerofrom; | |
2290 | 2260 | |
2291 | - /* starting below the boundary? Nothing to zero out */ | |
2292 | - if (offset <= zerofrom) | |
2293 | - zerofrom = offset; | |
2261 | + err = pagecache_write_begin(file, mapping, curpos, len, | |
2262 | + AOP_FLAG_UNINTERRUPTIBLE, | |
2263 | + &page, &fsdata); | |
2264 | + if (err) | |
2265 | + goto out; | |
2266 | + zero_user_page(page, zerofrom, len, KM_USER0); | |
2267 | + err = pagecache_write_end(file, mapping, curpos, len, len, | |
2268 | + page, fsdata); | |
2269 | + if (err < 0) | |
2270 | + goto out; | |
2271 | + BUG_ON(err != len); | |
2272 | + err = 0; | |
2294 | 2273 | } |
2295 | - status = __block_prepare_write(inode, page, zerofrom, to, get_block); | |
2296 | - if (status) | |
2297 | - goto out1; | |
2298 | - if (zerofrom < offset) { | |
2299 | - zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0); | |
2300 | - __block_commit_write(inode, page, zerofrom, offset); | |
2274 | +out: | |
2275 | + return err; | |
2276 | +} | |
2277 | + | |
2278 | +/* | |
2279 | + * For moronic filesystems that do not allow holes in file. | |
2280 | + * We may have to extend the file. | |
2281 | + */ | |
2282 | +int cont_write_begin(struct file *file, struct address_space *mapping, | |
2283 | + loff_t pos, unsigned len, unsigned flags, | |
2284 | + struct page **pagep, void **fsdata, | |
2285 | + get_block_t *get_block, loff_t *bytes) | |
2286 | +{ | |
2287 | + struct inode *inode = mapping->host; | |
2288 | + unsigned blocksize = 1 << inode->i_blkbits; | |
2289 | + unsigned zerofrom; | |
2290 | + int err; | |
2291 | + | |
2292 | + err = cont_expand_zero(file, mapping, pos, bytes); | |
2293 | + if (err) | |
2294 | + goto out; | |
2295 | + | |
2296 | + zerofrom = *bytes & ~PAGE_CACHE_MASK; | |
2297 | + if (pos+len > *bytes && zerofrom & (blocksize-1)) { | |
2298 | + *bytes |= (blocksize-1); | |
2299 | + (*bytes)++; | |
2301 | 2300 | } |
2302 | - return 0; | |
2303 | -out1: | |
2304 | - ClearPageUptodate(page); | |
2305 | - return status; | |
2306 | 2301 | |
2307 | -out_unmap: | |
2308 | - ClearPageUptodate(new_page); | |
2309 | - unlock_page(new_page); | |
2310 | - page_cache_release(new_page); | |
2302 | + *pagep = NULL; | |
2303 | + err = block_write_begin(file, mapping, pos, len, | |
2304 | + flags, pagep, fsdata, get_block); | |
2311 | 2305 | out: |
2312 | - return status; | |
2306 | + return err; | |
2313 | 2307 | } |
2314 | 2308 | |
2315 | 2309 | int block_prepare_write(struct page *page, unsigned from, unsigned to, |
... | ... | @@ -3191,7 +3185,7 @@ |
3191 | 3185 | EXPORT_SYMBOL(block_sync_page); |
3192 | 3186 | EXPORT_SYMBOL(block_truncate_page); |
3193 | 3187 | EXPORT_SYMBOL(block_write_full_page); |
3194 | -EXPORT_SYMBOL(cont_prepare_write); | |
3188 | +EXPORT_SYMBOL(cont_write_begin); | |
3195 | 3189 | EXPORT_SYMBOL(end_buffer_read_sync); |
3196 | 3190 | EXPORT_SYMBOL(end_buffer_write_sync); |
3197 | 3191 | EXPORT_SYMBOL(file_fsync); |
include/linux/buffer_head.h
... | ... | @@ -214,8 +214,9 @@ |
214 | 214 | struct page *, void *); |
215 | 215 | void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); |
216 | 216 | int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); |
217 | -int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, | |
218 | - loff_t *); | |
217 | +int cont_write_begin(struct file *, struct address_space *, loff_t, | |
218 | + unsigned, unsigned, struct page **, void **, | |
219 | + get_block_t *, loff_t *); | |
219 | 220 | int generic_cont_expand(struct inode *inode, loff_t size); |
220 | 221 | int generic_cont_expand_simple(struct inode *inode, loff_t size); |
221 | 222 | int block_commit_write(struct page *page, unsigned from, unsigned to); |
include/linux/fs.h
mm/filemap.c
... | ... | @@ -1684,6 +1684,7 @@ |
1684 | 1684 | |
1685 | 1685 | return copied; |
1686 | 1686 | } |
1687 | +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | |
1687 | 1688 | |
1688 | 1689 | /* |
1689 | 1690 | * This has the same sideeffects and return value as |
... | ... | @@ -1710,6 +1711,7 @@ |
1710 | 1711 | kunmap(page); |
1711 | 1712 | return copied; |
1712 | 1713 | } |
1714 | +EXPORT_SYMBOL(iov_iter_copy_from_user); | |
1713 | 1715 | |
1714 | 1716 | static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes) |
1715 | 1717 | { |
... | ... | @@ -1741,6 +1743,7 @@ |
1741 | 1743 | __iov_iter_advance_iov(i, bytes); |
1742 | 1744 | i->count -= bytes; |
1743 | 1745 | } |
1746 | +EXPORT_SYMBOL(iov_iter_advance); | |
1744 | 1747 | |
1745 | 1748 | /* |
1746 | 1749 | * Fault in the first iovec of the given iov_iter, to a maximum length |
... | ... | @@ -1757,6 +1760,7 @@ |
1757 | 1760 | bytes = min(bytes, i->iov->iov_len - i->iov_offset); |
1758 | 1761 | return fault_in_pages_readable(buf, bytes); |
1759 | 1762 | } |
1763 | +EXPORT_SYMBOL(iov_iter_fault_in_readable); | |
1760 | 1764 | |
1761 | 1765 | /* |
1762 | 1766 | * Return the count of just the current iov_iter segment. |
... | ... | @@ -1769,6 +1773,7 @@ |
1769 | 1773 | else |
1770 | 1774 | return min(i->count, iov->iov_len - i->iov_offset); |
1771 | 1775 | } |
1776 | +EXPORT_SYMBOL(iov_iter_single_seg_count); | |
1772 | 1777 | |
1773 | 1778 | /* |
1774 | 1779 | * Performs necessary checks before doing a write |