Commit 9cd684551124e71630ab96d238747051463f5b56

Authored by Miklos Szeredi
Committed by Linus Torvalds
1 parent caf736085f

[PATCH] fuse: fix async read for legacy filesystems

While asynchronous reads mean a performance improvement in most cases, if
the filesystem assumed that reads are synchronous, then async reads may
degrade performance (filesystem may receive reads out of order, which can
confuse it's own readahead logic).

With sshfs a 1.5 to 4 times slowdown can be measured.

There's also a need for userspace filesystems to know whether asynchronous
reads are supported by the kernel or not.

To achive these, negotiate in the INIT request whether async reads will be
used and the maximum readahead value.  Update interface version to 7.6

If userspace uses a version earlier than 7.6, then disable async reads, and
set maximum readahead value to the maximum read size, as done in previous
versions.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 4 changed files with 36 additions and 6 deletions Side-by-side Diff

... ... @@ -335,9 +335,14 @@
335 335 loff_t pos = page_offset(req->pages[0]);
336 336 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
337 337 req->out.page_zeroing = 1;
338   - req->end = fuse_readpages_end;
339 338 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
340   - request_send_background(fc, req);
  339 + if (fc->async_read) {
  340 + req->end = fuse_readpages_end;
  341 + request_send_background(fc, req);
  342 + } else {
  343 + request_send(fc, req);
  344 + fuse_readpages_end(fc, req);
  345 + }
341 346 }
342 347  
343 348 struct fuse_readpages_data {
... ... @@ -272,6 +272,9 @@
272 272 reply, before any other request, and never cleared */
273 273 unsigned conn_error : 1;
274 274  
  275 + /** Do readpages asynchronously? Only set in INIT */
  276 + unsigned async_read : 1;
  277 +
275 278 /*
276 279 * The following bitfields are only for optimization purposes
277 280 * and hence races in setting them will not cause malfunction
... ... @@ -473,6 +473,16 @@
473 473 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
474 474 fc->conn_error = 1;
475 475 else {
  476 + unsigned long ra_pages;
  477 +
  478 + if (arg->minor >= 6) {
  479 + ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
  480 + if (arg->flags & FUSE_ASYNC_READ)
  481 + fc->async_read = 1;
  482 + } else
  483 + ra_pages = fc->max_read / PAGE_CACHE_SIZE;
  484 +
  485 + fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
476 486 fc->minor = arg->minor;
477 487 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
478 488 }
... ... @@ -496,6 +506,8 @@
496 506  
497 507 arg->major = FUSE_KERNEL_VERSION;
498 508 arg->minor = FUSE_KERNEL_MINOR_VERSION;
  509 + arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
  510 + arg->flags |= FUSE_ASYNC_READ;
499 511 req->in.h.opcode = FUSE_INIT;
500 512 req->in.numargs = 1;
501 513 req->in.args[0].size = sizeof(*arg);
... ... @@ -552,8 +564,6 @@
552 564 fc->user_id = d.user_id;
553 565 fc->group_id = d.group_id;
554 566 fc->max_read = d.max_read;
555   - if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
556   - fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
557 567  
558 568 /* Used by get_root_inode() */
559 569 sb->s_fs_info = fc;
include/linux/fuse.h
... ... @@ -14,7 +14,7 @@
14 14 #define FUSE_KERNEL_VERSION 7
15 15  
16 16 /** Minor version number of this interface */
17   -#define FUSE_KERNEL_MINOR_VERSION 5
  17 +#define FUSE_KERNEL_MINOR_VERSION 6
18 18  
19 19 /** The node ID of the root inode */
20 20 #define FUSE_ROOT_ID 1
... ... @@ -58,6 +58,9 @@
58 58 __u32 spare[6];
59 59 };
60 60  
  61 +/**
  62 + * Bitmasks for fuse_setattr_in.valid
  63 + */
61 64 #define FATTR_MODE (1 << 0)
62 65 #define FATTR_UID (1 << 1)
63 66 #define FATTR_GID (1 << 2)
... ... @@ -75,6 +78,11 @@
75 78 #define FOPEN_DIRECT_IO (1 << 0)
76 79 #define FOPEN_KEEP_CACHE (1 << 1)
77 80  
  81 +/**
  82 + * INIT request/reply flags
  83 + */
  84 +#define FUSE_ASYNC_READ (1 << 0)
  85 +
78 86 enum fuse_opcode {
79 87 FUSE_LOOKUP = 1,
80 88 FUSE_FORGET = 2, /* no reply */
81 89  
... ... @@ -247,12 +255,16 @@
247 255 struct fuse_init_in {
248 256 __u32 major;
249 257 __u32 minor;
  258 + __u32 max_readahead;
  259 + __u32 flags;
250 260 };
251 261  
252 262 struct fuse_init_out {
253 263 __u32 major;
254 264 __u32 minor;
255   - __u32 unused[3];
  265 + __u32 max_readahead;
  266 + __u32 flags;
  267 + __u32 unused;
256 268 __u32 max_write;
257 269 };
258 270