Commit f6762b7ad8edd6abc802542ce845d3bc8adcb92f

Authored by Jens Axboe
1 parent e27dedd84c

[PATCH] pipe: enable atomic copying of pipe data to/from user space

The pipe ->map() method uses kmap() to virtually map the pages, which
is both slow and has known scalability issues on SMP. This patch enables
atomic copying of pipe pages, by pre-faulting data and using kmap_atomic()
instead.

lmbench bw_pipe and lat_pipe measurements agree this is a Good Thing. Here
are results from that on a UP machine with highmem (1.5GiB of RAM), running
first a UP kernel, SMP kernel, and SMP kernel patched.

Vanilla-UP:
Pipe bandwidth: 1622.28 MB/sec
Pipe bandwidth: 1610.59 MB/sec
Pipe bandwidth: 1608.30 MB/sec
Pipe latency: 7.3275 microseconds
Pipe latency: 7.2995 microseconds
Pipe latency: 7.3097 microseconds

Vanilla-SMP:
Pipe bandwidth: 1382.19 MB/sec
Pipe bandwidth: 1317.27 MB/sec
Pipe bandwidth: 1355.61 MB/sec
Pipe latency: 9.6402 microseconds
Pipe latency: 9.6696 microseconds
Pipe latency: 9.6153 microseconds

Patched-SMP:
Pipe bandwidth: 1578.70 MB/sec
Pipe bandwidth: 1579.95 MB/sec
Pipe bandwidth: 1578.63 MB/sec
Pipe latency: 9.1654 microseconds
Pipe latency: 9.2266 microseconds
Pipe latency: 9.1527 microseconds

Signed-off-by: Jens Axboe <axboe@suse.de>

Showing 3 changed files with 126 additions and 30 deletions Side-by-side Diff

... ... @@ -55,7 +55,8 @@
55 55 }
56 56  
57 57 static int
58   -pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
  58 +pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
  59 + int atomic)
59 60 {
60 61 unsigned long copy;
61 62  
... ... @@ -64,8 +65,13 @@
64 65 iov++;
65 66 copy = min_t(unsigned long, len, iov->iov_len);
66 67  
67   - if (copy_from_user(to, iov->iov_base, copy))
68   - return -EFAULT;
  68 + if (atomic) {
  69 + if (__copy_from_user_inatomic(to, iov->iov_base, copy))
  70 + return -EFAULT;
  71 + } else {
  72 + if (copy_from_user(to, iov->iov_base, copy))
  73 + return -EFAULT;
  74 + }
69 75 to += copy;
70 76 len -= copy;
71 77 iov->iov_base += copy;
... ... @@ -75,7 +81,8 @@
75 81 }
76 82  
77 83 static int
78   -pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
  84 +pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
  85 + int atomic)
79 86 {
80 87 unsigned long copy;
81 88  
... ... @@ -84,8 +91,13 @@
84 91 iov++;
85 92 copy = min_t(unsigned long, len, iov->iov_len);
86 93  
87   - if (copy_to_user(iov->iov_base, from, copy))
88   - return -EFAULT;
  94 + if (atomic) {
  95 + if (__copy_to_user_inatomic(iov->iov_base, from, copy))
  96 + return -EFAULT;
  97 + } else {
  98 + if (copy_to_user(iov->iov_base, from, copy))
  99 + return -EFAULT;
  100 + }
89 101 from += copy;
90 102 len -= copy;
91 103 iov->iov_base += copy;
... ... @@ -94,6 +106,47 @@
94 106 return 0;
95 107 }
96 108  
  109 +/*
  110 + * Attempt to pre-fault in the user memory, so we can use atomic copies.
  111 + * Returns the number of bytes not faulted in.
  112 + */
  113 +static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
  114 +{
  115 + while (!iov->iov_len)
  116 + iov++;
  117 +
  118 + while (len > 0) {
  119 + unsigned long this_len;
  120 +
  121 + this_len = min_t(unsigned long, len, iov->iov_len);
  122 + if (fault_in_pages_writeable(iov->iov_base, this_len))
  123 + break;
  124 +
  125 + len -= this_len;
  126 + iov++;
  127 + }
  128 +
  129 + return len;
  130 +}
  131 +
  132 +/*
  133 + * Pre-fault in the user memory, so we can use atomic copies.
  134 + */
  135 +static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
  136 +{
  137 + while (!iov->iov_len)
  138 + iov++;
  139 +
  140 + while (len > 0) {
  141 + unsigned long this_len;
  142 +
  143 + this_len = min_t(unsigned long, len, iov->iov_len);
  144 + fault_in_pages_readable(iov->iov_base, this_len);
  145 + len -= this_len;
  146 + iov++;
  147 + }
  148 +}
  149 +
97 150 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 151 struct pipe_buffer *buf)
99 152 {
100 153  
101 154  
102 155  
... ... @@ -111,15 +164,24 @@
111 164 }
112 165  
113 166 void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
114   - struct pipe_buffer *buf)
  167 + struct pipe_buffer *buf, int atomic)
115 168 {
  169 + if (atomic) {
  170 + buf->flags |= PIPE_BUF_FLAG_ATOMIC;
  171 + return kmap_atomic(buf->page, KM_USER0);
  172 + }
  173 +
116 174 return kmap(buf->page);
117 175 }
118 176  
119 177 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
120   - struct pipe_buffer *buf)
  178 + struct pipe_buffer *buf, void *map_data)
121 179 {
122   - kunmap(buf->page);
  180 + if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
  181 + buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
  182 + kunmap_atomic(map_data, KM_USER0);
  183 + } else
  184 + kunmap(buf->page);
123 185 }
124 186  
125 187 static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
... ... @@ -183,7 +245,7 @@
183 245 struct pipe_buf_operations *ops = buf->ops;
184 246 void *addr;
185 247 size_t chars = buf->len;
186   - int error;
  248 + int error, atomic;
187 249  
188 250 if (chars > total_len)
189 251 chars = total_len;
190 252  
191 253  
... ... @@ -195,12 +257,21 @@
195 257 break;
196 258 }
197 259  
198   - addr = ops->map(pipe, buf);
199   - error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
200   - ops->unmap(pipe, buf);
  260 + atomic = !iov_fault_in_pages_write(iov, chars);
  261 +redo:
  262 + addr = ops->map(pipe, buf, atomic);
  263 + error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
  264 + ops->unmap(pipe, buf, addr);
201 265 if (unlikely(error)) {
  266 + /*
  267 + * Just retry with the slow path if we failed.
  268 + */
  269 + if (atomic) {
  270 + atomic = 0;
  271 + goto redo;
  272 + }
202 273 if (!ret)
203   - ret = -EFAULT;
  274 + ret = error;
204 275 break;
205 276 }
206 277 ret += chars;
207 278  
208 279  
209 280  
210 281  
211 282  
... ... @@ -304,21 +375,28 @@
304 375 int offset = buf->offset + buf->len;
305 376  
306 377 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
  378 + int error, atomic = 1;
307 379 void *addr;
308   - int error;
309 380  
310 381 error = ops->pin(pipe, buf);
311 382 if (error)
312 383 goto out;
313 384  
314   - addr = ops->map(pipe, buf);
  385 + iov_fault_in_pages_read(iov, chars);
  386 +redo1:
  387 + addr = ops->map(pipe, buf, atomic);
315 388 error = pipe_iov_copy_from_user(offset + addr, iov,
316   - chars);
317   - ops->unmap(pipe, buf);
  389 + chars, atomic);
  390 + ops->unmap(pipe, buf, addr);
318 391 ret = error;
319 392 do_wakeup = 1;
320   - if (error)
  393 + if (error) {
  394 + if (atomic) {
  395 + atomic = 0;
  396 + goto redo1;
  397 + }
321 398 goto out;
  399 + }
322 400 buf->len += chars;
323 401 total_len -= chars;
324 402 ret = chars;
... ... @@ -341,7 +419,8 @@
341 419 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
342 420 struct pipe_buffer *buf = pipe->bufs + newbuf;
343 421 struct page *page = pipe->tmp_page;
344   - int error;
  422 + char *src;
  423 + int error, atomic = 1;
345 424  
346 425 if (!page) {
347 426 page = alloc_page(GFP_HIGHUSER);
348 427  
349 428  
... ... @@ -361,11 +440,27 @@
361 440 if (chars > total_len)
362 441 chars = total_len;
363 442  
364   - error = pipe_iov_copy_from_user(kmap(page), iov, chars);
365   - kunmap(page);
  443 + iov_fault_in_pages_read(iov, chars);
  444 +redo2:
  445 + if (atomic)
  446 + src = kmap_atomic(page, KM_USER0);
  447 + else
  448 + src = kmap(page);
  449 +
  450 + error = pipe_iov_copy_from_user(src, iov, chars,
  451 + atomic);
  452 + if (atomic)
  453 + kunmap_atomic(src, KM_USER0);
  454 + else
  455 + kunmap(page);
  456 +
366 457 if (unlikely(error)) {
  458 + if (atomic) {
  459 + atomic = 0;
  460 + goto redo2;
  461 + }
367 462 if (!ret)
368   - ret = -EFAULT;
  463 + ret = error;
369 464 break;
370 465 }
371 466 ret += chars;
... ... @@ -640,13 +640,13 @@
640 640 /*
641 641 * Careful, ->map() uses KM_USER0!
642 642 */
643   - char *src = buf->ops->map(info, buf);
  643 + char *src = buf->ops->map(info, buf, 1);
644 644 char *dst = kmap_atomic(page, KM_USER1);
645 645  
646 646 memcpy(dst + offset, src + buf->offset, this_len);
647 647 flush_dcache_page(page);
648 648 kunmap_atomic(dst, KM_USER1);
649   - buf->ops->unmap(info, buf);
  649 + buf->ops->unmap(info, buf, src);
650 650 }
651 651  
652 652 ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
include/linux/pipe_fs_i.h
... ... @@ -5,7 +5,8 @@
5 5  
6 6 #define PIPE_BUFFERS (16)
7 7  
8   -#define PIPE_BUF_FLAG_LRU 0x01
  8 +#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */
  9 +#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */
9 10  
10 11 struct pipe_buffer {
11 12 struct page *page;
... ... @@ -28,8 +29,8 @@
28 29 */
29 30 struct pipe_buf_operations {
30 31 int can_merge;
31   - void * (*map)(struct pipe_inode_info *, struct pipe_buffer *);
32   - void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *);
  32 + void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int);
  33 + void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *);
33 34 int (*pin)(struct pipe_inode_info *, struct pipe_buffer *);
34 35 void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
35 36 int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
... ... @@ -64,8 +65,8 @@
64 65 void __free_pipe_info(struct pipe_inode_info *);
65 66  
66 67 /* Generic pipe buffer ops functions */
67   -void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *);
68   -void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *);
  68 +void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
  69 +void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
69 70 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
70 71 int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *);
71 72