Commit 89e107877b65bf6eff1d63a1302dee9a091586f5

Authored by Nick Piggin
Committed by Linus Torvalds
1 parent 7765ec26ae

fs: new cont helpers

Rework the generic block "cont" routines to handle the new aops.  Supporting
cont_prepare_write would take quite a lot of code to support, so remove it
instead (and we later convert all filesystems to use it).

write_begin gets passed AOP_FLAG_CONT_EXPAND when called from
generic_cont_expand, so filesystems can avoid the old hacks they used.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 103 additions and 102 deletions Side-by-side Diff

... ... @@ -2156,14 +2156,14 @@
2156 2156 }
2157 2157  
2158 2158 /* utility function for filesystems that need to do work on expanding
2159   - * truncates. Uses prepare/commit_write to allow the filesystem to
  2159 + * truncates. Uses filesystem pagecache writes to allow the filesystem to
2160 2160 * deal with the hole.
2161 2161 */
2162   -static int __generic_cont_expand(struct inode *inode, loff_t size,
2163   - pgoff_t index, unsigned int offset)
  2162 +int generic_cont_expand_simple(struct inode *inode, loff_t size)
2164 2163 {
2165 2164 struct address_space *mapping = inode->i_mapping;
2166 2165 struct page *page;
  2166 + void *fsdata;
2167 2167 unsigned long limit;
2168 2168 int err;
2169 2169  
2170 2170  
2171 2171  
2172 2172  
2173 2173  
2174 2174  
2175 2175  
2176 2176  
2177 2177  
2178 2178  
2179 2179  
2180 2180  
2181 2181  
2182 2182  
2183 2183  
2184 2184  
2185 2185  
2186 2186  
2187 2187  
2188 2188  
2189 2189  
2190 2190  
... ... @@ -2176,140 +2176,134 @@
2176 2176 if (size > inode->i_sb->s_maxbytes)
2177 2177 goto out;
2178 2178  
2179   - err = -ENOMEM;
2180   - page = grab_cache_page(mapping, index);
2181   - if (!page)
  2179 + err = pagecache_write_begin(NULL, mapping, size, 0,
  2180 + AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
  2181 + &page, &fsdata);
  2182 + if (err)
2182 2183 goto out;
2183   - err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
2184   - if (err) {
2185   - /*
2186   - * ->prepare_write() may have instantiated a few blocks
2187   - * outside i_size. Trim these off again.
2188   - */
2189   - unlock_page(page);
2190   - page_cache_release(page);
2191   - vmtruncate(inode, inode->i_size);
2192   - goto out;
2193   - }
2194 2184  
2195   - err = mapping->a_ops->commit_write(NULL, page, offset, offset);
  2185 + err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
  2186 + BUG_ON(err > 0);
2196 2187  
2197   - unlock_page(page);
2198   - page_cache_release(page);
2199   - if (err > 0)
2200   - err = 0;
2201 2188 out:
2202 2189 return err;
2203 2190 }
2204 2191  
2205 2192 int generic_cont_expand(struct inode *inode, loff_t size)
2206 2193 {
2207   - pgoff_t index;
2208 2194 unsigned int offset;
2209 2195  
2210 2196 offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
2211 2197  
2212 2198 /* ugh. in prepare/commit_write, if from==to==start of block, we
2213   - ** skip the prepare. make sure we never send an offset for the start
2214   - ** of a block
2215   - */
  2199 + * skip the prepare. make sure we never send an offset for the start
  2200 + * of a block.
  2201 + * XXX: actually, this should be handled in those filesystems by
  2202 + * checking for the AOP_FLAG_CONT_EXPAND flag.
  2203 + */
2216 2204 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
2217 2205 /* caller must handle this extra byte. */
2218   - offset++;
  2206 + size++;
2219 2207 }
2220   - index = size >> PAGE_CACHE_SHIFT;
2221   -
2222   - return __generic_cont_expand(inode, size, index, offset);
  2208 + return generic_cont_expand_simple(inode, size);
2223 2209 }
2224 2210  
2225   -int generic_cont_expand_simple(struct inode *inode, loff_t size)
  2211 +int cont_expand_zero(struct file *file, struct address_space *mapping,
  2212 + loff_t pos, loff_t *bytes)
2226 2213 {
2227   - loff_t pos = size - 1;
2228   - pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2229   - unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
2230   -
2231   - /* prepare/commit_write can handle even if from==to==start of block. */
2232   - return __generic_cont_expand(inode, size, index, offset);
2233   -}
2234   -
2235   -/*
2236   - * For moronic filesystems that do not allow holes in file.
2237   - * We may have to extend the file.
2238   - */
2239   -
2240   -int cont_prepare_write(struct page *page, unsigned offset,
2241   - unsigned to, get_block_t *get_block, loff_t *bytes)
2242   -{
2243   - struct address_space *mapping = page->mapping;
2244 2214 struct inode *inode = mapping->host;
2245   - struct page *new_page;
2246   - pgoff_t pgpos;
2247   - long status;
2248   - unsigned zerofrom;
2249 2215 unsigned blocksize = 1 << inode->i_blkbits;
  2216 + struct page *page;
  2217 + void *fsdata;
  2218 + pgoff_t index, curidx;
  2219 + loff_t curpos;
  2220 + unsigned zerofrom, offset, len;
  2221 + int err = 0;
2250 2222  
2251   - while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
2252   - status = -ENOMEM;
2253   - new_page = grab_cache_page(mapping, pgpos);
2254   - if (!new_page)
2255   - goto out;
2256   - /* we might sleep */
2257   - if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
2258   - unlock_page(new_page);
2259   - page_cache_release(new_page);
2260   - continue;
2261   - }
2262   - zerofrom = *bytes & ~PAGE_CACHE_MASK;
  2223 + index = pos >> PAGE_CACHE_SHIFT;
  2224 + offset = pos & ~PAGE_CACHE_MASK;
  2225 +
  2226 + while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
  2227 + zerofrom = curpos & ~PAGE_CACHE_MASK;
2263 2228 if (zerofrom & (blocksize-1)) {
2264 2229 *bytes |= (blocksize-1);
2265 2230 (*bytes)++;
2266 2231 }
2267   - status = __block_prepare_write(inode, new_page, zerofrom,
2268   - PAGE_CACHE_SIZE, get_block);
2269   - if (status)
2270   - goto out_unmap;
2271   - zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom,
2272   - KM_USER0);
2273   - generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
2274   - unlock_page(new_page);
2275   - page_cache_release(new_page);
  2232 + len = PAGE_CACHE_SIZE - zerofrom;
  2233 +
  2234 + err = pagecache_write_begin(file, mapping, curpos, len,
  2235 + AOP_FLAG_UNINTERRUPTIBLE,
  2236 + &page, &fsdata);
  2237 + if (err)
  2238 + goto out;
  2239 + zero_user_page(page, zerofrom, len, KM_USER0);
  2240 + err = pagecache_write_end(file, mapping, curpos, len, len,
  2241 + page, fsdata);
  2242 + if (err < 0)
  2243 + goto out;
  2244 + BUG_ON(err != len);
  2245 + err = 0;
2276 2246 }
2277 2247  
2278   - if (page->index < pgpos) {
2279   - /* completely inside the area */
2280   - zerofrom = offset;
2281   - } else {
2282   - /* page covers the boundary, find the boundary offset */
2283   - zerofrom = *bytes & ~PAGE_CACHE_MASK;
2284   -
  2248 + /* page covers the boundary, find the boundary offset */
  2249 + if (index == curidx) {
  2250 + zerofrom = curpos & ~PAGE_CACHE_MASK;
2285 2251 /* if we will expand the thing last block will be filled */
2286   - if (to > zerofrom && (zerofrom & (blocksize-1))) {
  2252 + if (offset <= zerofrom) {
  2253 + goto out;
  2254 + }
  2255 + if (zerofrom & (blocksize-1)) {
2287 2256 *bytes |= (blocksize-1);
2288 2257 (*bytes)++;
2289 2258 }
  2259 + len = offset - zerofrom;
2290 2260  
2291   - /* starting below the boundary? Nothing to zero out */
2292   - if (offset <= zerofrom)
2293   - zerofrom = offset;
  2261 + err = pagecache_write_begin(file, mapping, curpos, len,
  2262 + AOP_FLAG_UNINTERRUPTIBLE,
  2263 + &page, &fsdata);
  2264 + if (err)
  2265 + goto out;
  2266 + zero_user_page(page, zerofrom, len, KM_USER0);
  2267 + err = pagecache_write_end(file, mapping, curpos, len, len,
  2268 + page, fsdata);
  2269 + if (err < 0)
  2270 + goto out;
  2271 + BUG_ON(err != len);
  2272 + err = 0;
2294 2273 }
2295   - status = __block_prepare_write(inode, page, zerofrom, to, get_block);
2296   - if (status)
2297   - goto out1;
2298   - if (zerofrom < offset) {
2299   - zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0);
2300   - __block_commit_write(inode, page, zerofrom, offset);
  2274 +out:
  2275 + return err;
  2276 +}
  2277 +
  2278 +/*
  2279 + * For moronic filesystems that do not allow holes in file.
  2280 + * We may have to extend the file.
  2281 + */
  2282 +int cont_write_begin(struct file *file, struct address_space *mapping,
  2283 + loff_t pos, unsigned len, unsigned flags,
  2284 + struct page **pagep, void **fsdata,
  2285 + get_block_t *get_block, loff_t *bytes)
  2286 +{
  2287 + struct inode *inode = mapping->host;
  2288 + unsigned blocksize = 1 << inode->i_blkbits;
  2289 + unsigned zerofrom;
  2290 + int err;
  2291 +
  2292 + err = cont_expand_zero(file, mapping, pos, bytes);
  2293 + if (err)
  2294 + goto out;
  2295 +
  2296 + zerofrom = *bytes & ~PAGE_CACHE_MASK;
  2297 + if (pos+len > *bytes && zerofrom & (blocksize-1)) {
  2298 + *bytes |= (blocksize-1);
  2299 + (*bytes)++;
2301 2300 }
2302   - return 0;
2303   -out1:
2304   - ClearPageUptodate(page);
2305   - return status;
2306 2301  
2307   -out_unmap:
2308   - ClearPageUptodate(new_page);
2309   - unlock_page(new_page);
2310   - page_cache_release(new_page);
  2302 + *pagep = NULL;
  2303 + err = block_write_begin(file, mapping, pos, len,
  2304 + flags, pagep, fsdata, get_block);
2311 2305 out:
2312   - return status;
  2306 + return err;
2313 2307 }
2314 2308  
2315 2309 int block_prepare_write(struct page *page, unsigned from, unsigned to,
... ... @@ -3191,7 +3185,7 @@
3191 3185 EXPORT_SYMBOL(block_sync_page);
3192 3186 EXPORT_SYMBOL(block_truncate_page);
3193 3187 EXPORT_SYMBOL(block_write_full_page);
3194   -EXPORT_SYMBOL(cont_prepare_write);
  3188 +EXPORT_SYMBOL(cont_write_begin);
3195 3189 EXPORT_SYMBOL(end_buffer_read_sync);
3196 3190 EXPORT_SYMBOL(end_buffer_write_sync);
3197 3191 EXPORT_SYMBOL(file_fsync);
include/linux/buffer_head.h
... ... @@ -214,8 +214,9 @@
214 214 struct page *, void *);
215 215 void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
216 216 int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
217   -int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
218   - loff_t *);
  217 +int cont_write_begin(struct file *, struct address_space *, loff_t,
  218 + unsigned, unsigned, struct page **, void **,
  219 + get_block_t *, loff_t *);
219 220 int generic_cont_expand(struct inode *inode, loff_t size);
220 221 int generic_cont_expand_simple(struct inode *inode, loff_t size);
221 222 int block_commit_write(struct page *page, unsigned from, unsigned to);
... ... @@ -395,6 +395,7 @@
395 395 };
396 396  
397 397 #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
  398 +#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
398 399  
399 400 /*
400 401 * oh the beauties of C type declarations.
... ... @@ -1684,6 +1684,7 @@
1684 1684  
1685 1685 return copied;
1686 1686 }
  1687 +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1687 1688  
1688 1689 /*
1689 1690 * This has the same sideeffects and return value as
... ... @@ -1710,6 +1711,7 @@
1710 1711 kunmap(page);
1711 1712 return copied;
1712 1713 }
  1714 +EXPORT_SYMBOL(iov_iter_copy_from_user);
1713 1715  
1714 1716 static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes)
1715 1717 {
... ... @@ -1741,6 +1743,7 @@
1741 1743 __iov_iter_advance_iov(i, bytes);
1742 1744 i->count -= bytes;
1743 1745 }
  1746 +EXPORT_SYMBOL(iov_iter_advance);
1744 1747  
1745 1748 /*
1746 1749 * Fault in the first iovec of the given iov_iter, to a maximum length
... ... @@ -1757,6 +1760,7 @@
1757 1760 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
1758 1761 return fault_in_pages_readable(buf, bytes);
1759 1762 }
  1763 +EXPORT_SYMBOL(iov_iter_fault_in_readable);
1760 1764  
1761 1765 /*
1762 1766 * Return the count of just the current iov_iter segment.
... ... @@ -1769,6 +1773,7 @@
1769 1773 else
1770 1774 return min(i->count, iov->iov_len - i->iov_offset);
1771 1775 }
  1776 +EXPORT_SYMBOL(iov_iter_single_seg_count);
1772 1777  
1773 1778 /*
1774 1779 * Performs necessary checks before doing a write