Commit d2bf6be8ab63aa84e6149aac934649aadf3828b1
Committed by
Linus Torvalds
1 parent
7ffc59b4d0
Exists in
master
and in
4 other branches
mm: clean up get_user_pages_fast() documentation
Move more documentation for get_user_pages_fast into the new kerneldoc comment. Add some comments for get_user_pages as well. Also, move get_user_pages_fast declaration up to get_user_pages. It wasn't there initially because it was once a static inline function. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Andy Grover <andy.grover@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 3 changed files with 67 additions and 19 deletions Side-by-side Diff
include/linux/mm.h
... | ... | @@ -824,8 +824,11 @@ |
824 | 824 | extern int make_pages_present(unsigned long addr, unsigned long end); |
825 | 825 | extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); |
826 | 826 | |
827 | -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, | |
828 | - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); | |
827 | +int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |
828 | + unsigned long start, int len, int write, int force, | |
829 | + struct page **pages, struct vm_area_struct **vmas); | |
830 | +int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |
831 | + struct page **pages); | |
829 | 832 | |
830 | 833 | extern int try_to_release_page(struct page * page, gfp_t gfp_mask); |
831 | 834 | extern void do_invalidatepage(struct page *page, unsigned long offset); |
... | ... | @@ -848,19 +851,6 @@ |
848 | 851 | extern int mprotect_fixup(struct vm_area_struct *vma, |
849 | 852 | struct vm_area_struct **pprev, unsigned long start, |
850 | 853 | unsigned long end, unsigned long newflags); |
851 | - | |
852 | -/* | |
853 | - * get_user_pages_fast provides equivalent functionality to get_user_pages, | |
854 | - * operating on current and current->mm (force=0 and doesn't return any vmas). | |
855 | - * | |
856 | - * get_user_pages_fast may take mmap_sem and page tables, so no assumptions | |
857 | - * can be made about locking. get_user_pages_fast is to be implemented in a | |
858 | - * way that is advantageous (vs get_user_pages()) when the user memory area is | |
859 | - * already faulted in and present in ptes. However if the pages have to be | |
860 | - * faulted in, it may turn out to be slightly slower). | |
861 | - */ | |
862 | -int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |
863 | - struct page **pages); | |
864 | 854 | |
865 | 855 | /* |
866 | 856 | * A callback you can register to apply pressure to ageable caches. |
mm/memory.c
... | ... | @@ -1360,6 +1360,56 @@ |
1360 | 1360 | return i; |
1361 | 1361 | } |
1362 | 1362 | |
1363 | +/** | |
1364 | + * get_user_pages() - pin user pages in memory | |
1365 | + * @tsk: task_struct of target task | |
1366 | + * @mm: mm_struct of target mm | |
1367 | + * @start: starting user address | |
1368 | + * @len: number of pages from start to pin | |
1369 | + * @write: whether pages will be written to by the caller | |
1370 | + * @force: whether to force write access even if user mapping is | |
1371 | + * readonly. This will result in the page being COWed even | |
1372 | + * in MAP_SHARED mappings. You do not want this. | |
1373 | + * @pages: array that receives pointers to the pages pinned. | |
1374 | + * Should be at least nr_pages long. Or NULL, if caller | |
1375 | + * only intends to ensure the pages are faulted in. | |
1376 | + * @vmas: array of pointers to vmas corresponding to each page. | |
1377 | + * Or NULL if the caller does not require them. | |
1378 | + * | |
1379 | + * Returns number of pages pinned. This may be fewer than the number | |
1380 | + * requested. If len is 0 or negative, returns 0. If no pages | |
1381 | + * were pinned, returns -errno. Each page returned must be released | |
1382 | + * with a put_page() call when it is finished with. vmas will only | |
1383 | + * remain valid while mmap_sem is held. | |
1384 | + * | |
1385 | + * Must be called with mmap_sem held for read or write. | |
1386 | + * | |
1387 | + * get_user_pages walks a process's page tables and takes a reference to | |
1388 | + * each struct page that each user address corresponds to at a given | |
1389 | + * instant. That is, it takes the page that would be accessed if a user | |
1390 | + * thread accesses the given user virtual address at that instant. | |
1391 | + * | |
1392 | + * This does not guarantee that the page exists in the user mappings when | |
1393 | + * get_user_pages returns, and there may even be a completely different | |
1394 | + * page there in some cases (eg. if mmapped pagecache has been invalidated | |
1395 | + * and subsequently re faulted). However it does guarantee that the page | |
1396 | + * won't be freed completely. And mostly callers simply care that the page | |
1397 | + * contains data that was valid *at some point in time*. Typically, an IO | |
1398 | + * or similar operation cannot guarantee anything stronger anyway because | |
1399 | + * locks can't be held over the syscall boundary. | |
1400 | + * | |
1401 | + * If write=0, the page must not be written to. If the page is written to, | |
1402 | + * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called | |
1403 | + * after the page is finished with, and before put_page is called. | |
1404 | + * | |
1405 | + * get_user_pages is typically used for fewer-copy IO operations, to get a | |
1406 | + * handle on the memory by some means other than accesses via the user virtual | |
1407 | + * addresses. The pages may be submitted for DMA to devices or accessed via | |
1408 | + * their kernel linear mapping (via the kmap APIs). Care should be taken to | |
1409 | + * use the correct cache flushing APIs. | |
1410 | + * | |
1411 | + * See also get_user_pages_fast, for performance critical applications. | |
1412 | + */ | |
1363 | 1413 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
1364 | 1414 | unsigned long start, int len, int write, int force, |
1365 | 1415 | struct page **pages, struct vm_area_struct **vmas) |
mm/util.c
... | ... | @@ -233,13 +233,21 @@ |
233 | 233 | * @pages: array that receives pointers to the pages pinned. |
234 | 234 | * Should be at least nr_pages long. |
235 | 235 | * |
236 | - * Attempt to pin user pages in memory without taking mm->mmap_sem. | |
237 | - * If not successful, it will fall back to taking the lock and | |
238 | - * calling get_user_pages(). | |
239 | - * | |
240 | 236 | * Returns number of pages pinned. This may be fewer than the number |
241 | 237 | * requested. If nr_pages is 0 or negative, returns 0. If no pages |
242 | 238 | * were pinned, returns -errno. |
239 | + * | |
240 | + * get_user_pages_fast provides equivalent functionality to get_user_pages, | |
241 | + * operating on current and current->mm, with force=0 and vma=NULL. However | |
242 | + * unlike get_user_pages, it must be called without mmap_sem held. | |
243 | + * | |
244 | + * get_user_pages_fast may take mmap_sem and page table locks, so no | |
245 | + * assumptions can be made about lack of locking. get_user_pages_fast is to be | |
246 | + * implemented in a way that is advantageous (vs get_user_pages()) when the | |
247 | + * user memory area is already faulted in and present in ptes. However if the | |
248 | + * pages have to be faulted in, it may turn out to be slightly slower so | |
249 | + * callers need to carefully consider what to use. On many architectures, | |
250 | + * get_user_pages_fast simply falls back to get_user_pages. | |
243 | 251 | */ |
244 | 252 | int __attribute__((weak)) get_user_pages_fast(unsigned long start, |
245 | 253 | int nr_pages, int write, struct page **pages) |