Commit 413ef8cb302511d8e995e2b0e5517ee1a65b9c77

Authored by Miklos Szeredi
Committed by Linus Torvalds
1 parent 5a53368277

[PATCH] FUSE - direct I/O

This patch adds support for the "direct_io" mount option of FUSE.

When this mount option is specified, the page cache is bypassed for
read and write operations.  This is useful for example, if the
filesystem doesn't know the size of files before reading them, or when
any kind of caching is harmful.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 3 changed files with 146 additions and 2 deletions Side-by-side Diff

... ... @@ -363,6 +363,118 @@
363 363 return err;
364 364 }
365 365  
  366 +static void fuse_release_user_pages(struct fuse_req *req, int write)
  367 +{
  368 + unsigned i;
  369 +
  370 + for (i = 0; i < req->num_pages; i++) {
  371 + struct page *page = req->pages[i];
  372 + if (write)
  373 + set_page_dirty_lock(page);
  374 + put_page(page);
  375 + }
  376 +}
  377 +
  378 +static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
  379 + unsigned nbytes, int write)
  380 +{
  381 + unsigned long user_addr = (unsigned long) buf;
  382 + unsigned offset = user_addr & ~PAGE_MASK;
  383 + int npages;
  384 +
  385 + /* This doesn't work with nfsd */
  386 + if (!current->mm)
  387 + return -EPERM;
  388 +
  389 + nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
  390 + npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
  391 + npages = min(npages, FUSE_MAX_PAGES_PER_REQ);
  392 + down_read(&current->mm->mmap_sem);
  393 + npages = get_user_pages(current, current->mm, user_addr, npages, write,
  394 + 0, req->pages, NULL);
  395 + up_read(&current->mm->mmap_sem);
  396 + if (npages < 0)
  397 + return npages;
  398 +
  399 + req->num_pages = npages;
  400 + req->page_offset = offset;
  401 + return 0;
  402 +}
  403 +
  404 +static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
  405 + size_t count, loff_t *ppos, int write)
  406 +{
  407 + struct inode *inode = file->f_dentry->d_inode;
  408 + struct fuse_conn *fc = get_fuse_conn(inode);
  409 + size_t nmax = write ? fc->max_write : fc->max_read;
  410 + loff_t pos = *ppos;
  411 + ssize_t res = 0;
  412 + struct fuse_req *req = fuse_get_request(fc);
  413 + if (!req)
  414 + return -ERESTARTSYS;
  415 +
  416 + while (count) {
  417 + size_t tmp;
  418 + size_t nres;
  419 + size_t nbytes = min(count, nmax);
  420 + int err = fuse_get_user_pages(req, buf, nbytes, !write);
  421 + if (err) {
  422 + res = err;
  423 + break;
  424 + }
  425 + tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset;
  426 + nbytes = min(nbytes, tmp);
  427 + if (write)
  428 + nres = fuse_send_write(req, file, inode, pos, nbytes);
  429 + else
  430 + nres = fuse_send_read(req, file, inode, pos, nbytes);
  431 + fuse_release_user_pages(req, !write);
  432 + if (req->out.h.error) {
  433 + if (!res)
  434 + res = req->out.h.error;
  435 + break;
  436 + } else if (nres > nbytes) {
  437 + res = -EIO;
  438 + break;
  439 + }
  440 + count -= nres;
  441 + res += nres;
  442 + pos += nres;
  443 + buf += nres;
  444 + if (nres != nbytes)
  445 + break;
  446 + if (count)
  447 + fuse_reset_request(req);
  448 + }
  449 + fuse_put_request(fc, req);
  450 + if (res > 0) {
  451 + if (write && pos > i_size_read(inode))
  452 + i_size_write(inode, pos);
  453 + *ppos = pos;
  454 + } else if (write && (res == -EINTR || res == -EIO))
  455 + fuse_invalidate_attr(inode);
  456 +
  457 + return res;
  458 +}
  459 +
  460 +static ssize_t fuse_direct_read(struct file *file, char __user *buf,
  461 + size_t count, loff_t *ppos)
  462 +{
  463 + return fuse_direct_io(file, buf, count, ppos, 0);
  464 +}
  465 +
  466 +static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
  467 + size_t count, loff_t *ppos)
  468 +{
  469 + struct inode *inode = file->f_dentry->d_inode;
  470 + ssize_t res;
  471 + /* Don't allow parallel writes to the same file */
  472 + down(&inode->i_sem);
  473 + res = fuse_direct_io(file, buf, count, ppos, 1);
  474 + up(&inode->i_sem);
  475 + return res;
  476 +}
  477 +
366 478 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
367 479 {
368 480 if ((vma->vm_flags & VM_SHARED)) {
... ... @@ -393,6 +505,17 @@
393 505 .sendfile = generic_file_sendfile,
394 506 };
395 507  
  508 +static struct file_operations fuse_direct_io_file_operations = {
  509 + .llseek = generic_file_llseek,
  510 + .read = fuse_direct_read,
  511 + .write = fuse_direct_write,
  512 + .open = fuse_open,
  513 + .flush = fuse_flush,
  514 + .release = fuse_release,
  515 + .fsync = fuse_fsync,
  516 + /* no mmap and sendfile */
  517 +};
  518 +
396 519 static struct address_space_operations fuse_file_aops = {
397 520 .readpage = fuse_readpage,
398 521 .prepare_write = fuse_prepare_write,
... ... @@ -403,7 +526,13 @@
403 526  
404 527 void fuse_init_file_inode(struct inode *inode)
405 528 {
406   - inode->i_fop = &fuse_file_operations;
407   - inode->i_data.a_ops = &fuse_file_aops;
  529 + struct fuse_conn *fc = get_fuse_conn(inode);
  530 +
  531 + if (fc->flags & FUSE_DIRECT_IO)
  532 + inode->i_fop = &fuse_direct_io_file_operations;
  533 + else {
  534 + inode->i_fop = &fuse_file_operations;
  535 + inode->i_data.a_ops = &fuse_file_aops;
  536 + }
408 537 }
... ... @@ -34,6 +34,9 @@
34 34 be flushed on open */
35 35 #define FUSE_KERNEL_CACHE (1 << 2)
36 36  
  37 +/** Bypass the page cache for read and write operations */
  38 +#define FUSE_DIRECT_IO (1 << 3)
  39 +
37 40 /** FUSE inode */
38 41 struct fuse_inode {
39 42 /** Inode data */
... ... @@ -206,6 +209,9 @@
206 209  
207 210 /** Maximum read size */
208 211 unsigned max_read;
  212 +
  213 + /** Maximum write size */
  214 + unsigned max_write;
209 215  
210 216 /** Readers of the connection are waiting on this */
211 217 wait_queue_head_t waitq;
... ... @@ -258,6 +258,7 @@
258 258 OPT_DEFAULT_PERMISSIONS,
259 259 OPT_ALLOW_OTHER,
260 260 OPT_KERNEL_CACHE,
  261 + OPT_DIRECT_IO,
261 262 OPT_MAX_READ,
262 263 OPT_ERR
263 264 };
... ... @@ -270,6 +271,7 @@
270 271 {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
271 272 {OPT_ALLOW_OTHER, "allow_other"},
272 273 {OPT_KERNEL_CACHE, "kernel_cache"},
  274 + {OPT_DIRECT_IO, "direct_io"},
273 275 {OPT_MAX_READ, "max_read=%u"},
274 276 {OPT_ERR, NULL}
275 277 };
... ... @@ -329,6 +331,10 @@
329 331 d->flags |= FUSE_KERNEL_CACHE;
330 332 break;
331 333  
  334 + case OPT_DIRECT_IO:
  335 + d->flags |= FUSE_DIRECT_IO;
  336 + break;
  337 +
332 338 case OPT_MAX_READ:
333 339 if (match_int(&args[0], &value))
334 340 return 0;
... ... @@ -359,6 +365,8 @@
359 365 seq_puts(m, ",allow_other");
360 366 if (fc->flags & FUSE_KERNEL_CACHE)
361 367 seq_puts(m, ",kernel_cache");
  368 + if (fc->flags & FUSE_DIRECT_IO)
  369 + seq_puts(m, ",direct_io");
362 370 if (fc->max_read != ~0)
363 371 seq_printf(m, ",max_read=%u", fc->max_read);
364 372 return 0;
... ... @@ -489,6 +497,7 @@
489 497 fc->max_read = d.max_read;
490 498 if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
491 499 fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
  500 + fc->max_write = FUSE_MAX_IN / 2;
492 501  
493 502 err = -ENOMEM;
494 503 root = get_root_inode(sb, d.rootmode);