Commit f6762b7ad8edd6abc802542ce845d3bc8adcb92f
1 parent
e27dedd84c
Exists in
master
and in
39 other branches
[PATCH] pipe: enable atomic copying of pipe data to/from user space
The pipe ->map() method uses kmap() to virtually map the pages, which is both slow and has known scalability issues on SMP. This patch enables atomic copying of pipe pages, by pre-faulting data and using kmap_atomic() instead. lmbench bw_pipe and lat_pipe measurements agree this is a Good Thing. Here are results from that on a UP machine with highmem (1.5GiB of RAM), running first a UP kernel, SMP kernel, and SMP kernel patched. Vanilla-UP: Pipe bandwidth: 1622.28 MB/sec Pipe bandwidth: 1610.59 MB/sec Pipe bandwidth: 1608.30 MB/sec Pipe latency: 7.3275 microseconds Pipe latency: 7.2995 microseconds Pipe latency: 7.3097 microseconds Vanilla-SMP: Pipe bandwidth: 1382.19 MB/sec Pipe bandwidth: 1317.27 MB/sec Pipe bandwidth: 1355.61 MB/sec Pipe latency: 9.6402 microseconds Pipe latency: 9.6696 microseconds Pipe latency: 9.6153 microseconds Patched-SMP: Pipe bandwidth: 1578.70 MB/sec Pipe bandwidth: 1579.95 MB/sec Pipe bandwidth: 1578.63 MB/sec Pipe latency: 9.1654 microseconds Pipe latency: 9.2266 microseconds Pipe latency: 9.1527 microseconds Signed-off-by: Jens Axboe <axboe@suse.de>
Showing 3 changed files with 126 additions and 30 deletions Side-by-side Diff
fs/pipe.c
... | ... | @@ -55,7 +55,8 @@ |
55 | 55 | } |
56 | 56 | |
57 | 57 | static int |
58 | -pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) | |
58 | +pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, | |
59 | + int atomic) | |
59 | 60 | { |
60 | 61 | unsigned long copy; |
61 | 62 | |
... | ... | @@ -64,8 +65,13 @@ |
64 | 65 | iov++; |
65 | 66 | copy = min_t(unsigned long, len, iov->iov_len); |
66 | 67 | |
67 | - if (copy_from_user(to, iov->iov_base, copy)) | |
68 | - return -EFAULT; | |
68 | + if (atomic) { | |
69 | + if (__copy_from_user_inatomic(to, iov->iov_base, copy)) | |
70 | + return -EFAULT; | |
71 | + } else { | |
72 | + if (copy_from_user(to, iov->iov_base, copy)) | |
73 | + return -EFAULT; | |
74 | + } | |
69 | 75 | to += copy; |
70 | 76 | len -= copy; |
71 | 77 | iov->iov_base += copy; |
... | ... | @@ -75,7 +81,8 @@ |
75 | 81 | } |
76 | 82 | |
77 | 83 | static int |
78 | -pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) | |
84 | +pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, | |
85 | + int atomic) | |
79 | 86 | { |
80 | 87 | unsigned long copy; |
81 | 88 | |
... | ... | @@ -84,8 +91,13 @@ |
84 | 91 | iov++; |
85 | 92 | copy = min_t(unsigned long, len, iov->iov_len); |
86 | 93 | |
87 | - if (copy_to_user(iov->iov_base, from, copy)) | |
88 | - return -EFAULT; | |
94 | + if (atomic) { | |
95 | + if (__copy_to_user_inatomic(iov->iov_base, from, copy)) | |
96 | + return -EFAULT; | |
97 | + } else { | |
98 | + if (copy_to_user(iov->iov_base, from, copy)) | |
99 | + return -EFAULT; | |
100 | + } | |
89 | 101 | from += copy; |
90 | 102 | len -= copy; |
91 | 103 | iov->iov_base += copy; |
... | ... | @@ -94,6 +106,47 @@ |
94 | 106 | return 0; |
95 | 107 | } |
96 | 108 | |
109 | +/* | |
110 | + * Attempt to pre-fault in the user memory, so we can use atomic copies. | |
111 | + * Returns the number of bytes not faulted in. | |
112 | + */ | |
113 | +static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) | |
114 | +{ | |
115 | + while (!iov->iov_len) | |
116 | + iov++; | |
117 | + | |
118 | + while (len > 0) { | |
119 | + unsigned long this_len; | |
120 | + | |
121 | + this_len = min_t(unsigned long, len, iov->iov_len); | |
122 | + if (fault_in_pages_writeable(iov->iov_base, this_len)) | |
123 | + break; | |
124 | + | |
125 | + len -= this_len; | |
126 | + iov++; | |
127 | + } | |
128 | + | |
129 | + return len; | |
130 | +} | |
131 | + | |
132 | +/* | |
133 | + * Pre-fault in the user memory, so we can use atomic copies. | |
134 | + */ | |
135 | +static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) | |
136 | +{ | |
137 | + while (!iov->iov_len) | |
138 | + iov++; | |
139 | + | |
140 | + while (len > 0) { | |
141 | + unsigned long this_len; | |
142 | + | |
143 | + this_len = min_t(unsigned long, len, iov->iov_len); | |
144 | + fault_in_pages_readable(iov->iov_base, this_len); | |
145 | + len -= this_len; | |
146 | + iov++; | |
147 | + } | |
148 | +} | |
149 | + | |
97 | 150 | static void anon_pipe_buf_release(struct pipe_inode_info *pipe, |
98 | 151 | struct pipe_buffer *buf) |
99 | 152 | { |
100 | 153 | |
101 | 154 | |
102 | 155 | |
... | ... | @@ -111,15 +164,24 @@ |
111 | 164 | } |
112 | 165 | |
113 | 166 | void *generic_pipe_buf_map(struct pipe_inode_info *pipe, |
114 | - struct pipe_buffer *buf) | |
167 | + struct pipe_buffer *buf, int atomic) | |
115 | 168 | { |
169 | + if (atomic) { | |
170 | + buf->flags |= PIPE_BUF_FLAG_ATOMIC; | |
171 | + return kmap_atomic(buf->page, KM_USER0); | |
172 | + } | |
173 | + | |
116 | 174 | return kmap(buf->page); |
117 | 175 | } |
118 | 176 | |
119 | 177 | void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, |
120 | - struct pipe_buffer *buf) | |
178 | + struct pipe_buffer *buf, void *map_data) | |
121 | 179 | { |
122 | - kunmap(buf->page); | |
180 | + if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { | |
181 | + buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; | |
182 | + kunmap_atomic(map_data, KM_USER0); | |
183 | + } else | |
184 | + kunmap(buf->page); | |
123 | 185 | } |
124 | 186 | |
125 | 187 | static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, |
... | ... | @@ -183,7 +245,7 @@ |
183 | 245 | struct pipe_buf_operations *ops = buf->ops; |
184 | 246 | void *addr; |
185 | 247 | size_t chars = buf->len; |
186 | - int error; | |
248 | + int error, atomic; | |
187 | 249 | |
188 | 250 | if (chars > total_len) |
189 | 251 | chars = total_len; |
190 | 252 | |
191 | 253 | |
... | ... | @@ -195,12 +257,21 @@ |
195 | 257 | break; |
196 | 258 | } |
197 | 259 | |
198 | - addr = ops->map(pipe, buf); | |
199 | - error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); | |
200 | - ops->unmap(pipe, buf); | |
260 | + atomic = !iov_fault_in_pages_write(iov, chars); | |
261 | +redo: | |
262 | + addr = ops->map(pipe, buf, atomic); | |
263 | + error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); | |
264 | + ops->unmap(pipe, buf, addr); | |
201 | 265 | if (unlikely(error)) { |
266 | + /* | |
267 | + * Just retry with the slow path if we failed. | |
268 | + */ | |
269 | + if (atomic) { | |
270 | + atomic = 0; | |
271 | + goto redo; | |
272 | + } | |
202 | 273 | if (!ret) |
203 | - ret = -EFAULT; | |
274 | + ret = error; | |
204 | 275 | break; |
205 | 276 | } |
206 | 277 | ret += chars; |
207 | 278 | |
208 | 279 | |
209 | 280 | |
210 | 281 | |
211 | 282 | |
... | ... | @@ -304,21 +375,28 @@ |
304 | 375 | int offset = buf->offset + buf->len; |
305 | 376 | |
306 | 377 | if (ops->can_merge && offset + chars <= PAGE_SIZE) { |
378 | + int error, atomic = 1; | |
307 | 379 | void *addr; |
308 | - int error; | |
309 | 380 | |
310 | 381 | error = ops->pin(pipe, buf); |
311 | 382 | if (error) |
312 | 383 | goto out; |
313 | 384 | |
314 | - addr = ops->map(pipe, buf); | |
385 | + iov_fault_in_pages_read(iov, chars); | |
386 | +redo1: | |
387 | + addr = ops->map(pipe, buf, atomic); | |
315 | 388 | error = pipe_iov_copy_from_user(offset + addr, iov, |
316 | - chars); | |
317 | - ops->unmap(pipe, buf); | |
389 | + chars, atomic); | |
390 | + ops->unmap(pipe, buf, addr); | |
318 | 391 | ret = error; |
319 | 392 | do_wakeup = 1; |
320 | - if (error) | |
393 | + if (error) { | |
394 | + if (atomic) { | |
395 | + atomic = 0; | |
396 | + goto redo1; | |
397 | + } | |
321 | 398 | goto out; |
399 | + } | |
322 | 400 | buf->len += chars; |
323 | 401 | total_len -= chars; |
324 | 402 | ret = chars; |
... | ... | @@ -341,7 +419,8 @@ |
341 | 419 | int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); |
342 | 420 | struct pipe_buffer *buf = pipe->bufs + newbuf; |
343 | 421 | struct page *page = pipe->tmp_page; |
344 | - int error; | |
422 | + char *src; | |
423 | + int error, atomic = 1; | |
345 | 424 | |
346 | 425 | if (!page) { |
347 | 426 | page = alloc_page(GFP_HIGHUSER); |
348 | 427 | |
349 | 428 | |
... | ... | @@ -361,11 +440,27 @@ |
361 | 440 | if (chars > total_len) |
362 | 441 | chars = total_len; |
363 | 442 | |
364 | - error = pipe_iov_copy_from_user(kmap(page), iov, chars); | |
365 | - kunmap(page); | |
443 | + iov_fault_in_pages_read(iov, chars); | |
444 | +redo2: | |
445 | + if (atomic) | |
446 | + src = kmap_atomic(page, KM_USER0); | |
447 | + else | |
448 | + src = kmap(page); | |
449 | + | |
450 | + error = pipe_iov_copy_from_user(src, iov, chars, | |
451 | + atomic); | |
452 | + if (atomic) | |
453 | + kunmap_atomic(src, KM_USER0); | |
454 | + else | |
455 | + kunmap(page); | |
456 | + | |
366 | 457 | if (unlikely(error)) { |
458 | + if (atomic) { | |
459 | + atomic = 0; | |
460 | + goto redo2; | |
461 | + } | |
367 | 462 | if (!ret) |
368 | - ret = -EFAULT; | |
463 | + ret = error; | |
369 | 464 | break; |
370 | 465 | } |
371 | 466 | ret += chars; |
fs/splice.c
... | ... | @@ -640,13 +640,13 @@ |
640 | 640 | /* |
641 | 641 | * Careful, ->map() uses KM_USER0! |
642 | 642 | */ |
643 | - char *src = buf->ops->map(info, buf); | |
643 | + char *src = buf->ops->map(info, buf, 1); | |
644 | 644 | char *dst = kmap_atomic(page, KM_USER1); |
645 | 645 | |
646 | 646 | memcpy(dst + offset, src + buf->offset, this_len); |
647 | 647 | flush_dcache_page(page); |
648 | 648 | kunmap_atomic(dst, KM_USER1); |
649 | - buf->ops->unmap(info, buf); | |
649 | + buf->ops->unmap(info, buf, src); | |
650 | 650 | } |
651 | 651 | |
652 | 652 | ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); |
include/linux/pipe_fs_i.h
... | ... | @@ -5,7 +5,8 @@ |
5 | 5 | |
6 | 6 | #define PIPE_BUFFERS (16) |
7 | 7 | |
8 | -#define PIPE_BUF_FLAG_LRU 0x01 | |
8 | +#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ | |
9 | +#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ | |
9 | 10 | |
10 | 11 | struct pipe_buffer { |
11 | 12 | struct page *page; |
... | ... | @@ -28,8 +29,8 @@ |
28 | 29 | */ |
29 | 30 | struct pipe_buf_operations { |
30 | 31 | int can_merge; |
31 | - void * (*map)(struct pipe_inode_info *, struct pipe_buffer *); | |
32 | - void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); | |
32 | + void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); | |
33 | + void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); | |
33 | 34 | int (*pin)(struct pipe_inode_info *, struct pipe_buffer *); |
34 | 35 | void (*release)(struct pipe_inode_info *, struct pipe_buffer *); |
35 | 36 | int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); |
... | ... | @@ -64,8 +65,8 @@ |
64 | 65 | void __free_pipe_info(struct pipe_inode_info *); |
65 | 66 | |
66 | 67 | /* Generic pipe buffer ops functions */ |
67 | -void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *); | |
68 | -void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *); | |
68 | +void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); | |
69 | +void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); | |
69 | 70 | void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); |
70 | 71 | int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); |
71 | 72 |