Commit f8af4da3b4c14e7267c4ffb952079af3912c51c5

Authored by Hugh Dickins
Committed by Linus Torvalds
1 parent d19f352484

ksm: the mm interface to ksm

This patch presents the mm interface to a dummy version of ksm.c, for
better scrutiny of that interface: the real ksm.c follows later.

When CONFIG_KSM is not set, madvise(2) reject MADV_MERGEABLE and
MADV_UNMERGEABLE with EINVAL, since that seems more helpful than
pretending that they can be serviced.  But when CONFIG_KSM=y, accept them
even if KSM is not currently running, and even on areas which KSM will not
touch (e.g.  hugetlb or shared file or special driver mappings).

Like other madvices, report ENOMEM despite success if any area in the
range is unmapped, and use EAGAIN to report out of memory.

Define vma flag VM_MERGEABLE to identify an area on which KSM may try
merging pages: leave it to ksm_madvise() to decide whether to set it.
Define mm flag MMF_VM_MERGEABLE to identify an mm which might contain
VM_MERGEABLE areas, to minimize callouts when forking or exiting.

Based upon earlier patches by Chris Wright and Izik Eidus.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 8 changed files with 147 additions and 1 deletions Side-by-side Diff

  1 +#ifndef __LINUX_KSM_H
  2 +#define __LINUX_KSM_H
  3 +/*
  4 + * Memory merging support.
  5 + *
  6 + * This code enables dynamic sharing of identical pages found in different
  7 + * memory areas, even if they are not shared by fork().
  8 + */
  9 +
  10 +#include <linux/bitops.h>
  11 +#include <linux/mm.h>
  12 +#include <linux/sched.h>
  13 +
  14 +#ifdef CONFIG_KSM
  15 +int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
  16 + unsigned long end, int advice, unsigned long *vm_flags);
  17 +int __ksm_enter(struct mm_struct *mm);
  18 +void __ksm_exit(struct mm_struct *mm);
  19 +
  20 +static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
  21 +{
  22 + if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
  23 + return __ksm_enter(mm);
  24 + return 0;
  25 +}
  26 +
  27 +static inline void ksm_exit(struct mm_struct *mm)
  28 +{
  29 + if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
  30 + __ksm_exit(mm);
  31 +}
  32 +#else /* !CONFIG_KSM */
  33 +
  34 +static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
  35 + unsigned long end, int advice, unsigned long *vm_flags)
  36 +{
  37 + return 0;
  38 +}
  39 +
  40 +static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
  41 +{
  42 + return 0;
  43 +}
  44 +
  45 +static inline void ksm_exit(struct mm_struct *mm)
  46 +{
  47 +}
  48 +#endif /* !CONFIG_KSM */
  49 +
  50 +#endif
... ... @@ -103,6 +103,7 @@
103 103 #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
104 104 #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
105 105 #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */
  106 +#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
106 107  
107 108 #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
108 109 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
include/linux/sched.h
... ... @@ -434,7 +434,9 @@
434 434 /* dumpable bits */
435 435 #define MMF_DUMPABLE 0 /* core dump is permitted */
436 436 #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */
  437 +
437 438 #define MMF_DUMPABLE_BITS 2
  439 +#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
438 440  
439 441 /* coredump filter bits */
440 442 #define MMF_DUMP_ANON_PRIVATE 2
... ... @@ -444,6 +446,7 @@
444 446 #define MMF_DUMP_ELF_HEADERS 6
445 447 #define MMF_DUMP_HUGETLB_PRIVATE 7
446 448 #define MMF_DUMP_HUGETLB_SHARED 8
  449 +
447 450 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
448 451 #define MMF_DUMP_FILTER_BITS 7
449 452 #define MMF_DUMP_FILTER_MASK \
... ... @@ -457,6 +460,10 @@
457 460 #else
458 461 # define MMF_DUMP_MASK_DEFAULT_ELF 0
459 462 #endif
  463 + /* leave room for more dump flags */
  464 +#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
  465 +
  466 +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
460 467  
461 468 struct sighand_struct {
462 469 atomic_t count;
... ... @@ -49,6 +49,7 @@
49 49 #include <linux/ftrace.h>
50 50 #include <linux/profile.h>
51 51 #include <linux/rmap.h>
  52 +#include <linux/ksm.h>
52 53 #include <linux/acct.h>
53 54 #include <linux/tsacct_kern.h>
54 55 #include <linux/cn_proc.h>
... ... @@ -299,6 +300,9 @@
299 300 rb_link = &mm->mm_rb.rb_node;
300 301 rb_parent = NULL;
301 302 pprev = &mm->mmap;
  303 + retval = ksm_fork(mm, oldmm);
  304 + if (retval)
  305 + goto out;
302 306  
303 307 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
304 308 struct file *file;
... ... @@ -435,7 +439,8 @@
435 439 atomic_set(&mm->mm_count, 1);
436 440 init_rwsem(&mm->mmap_sem);
437 441 INIT_LIST_HEAD(&mm->mmlist);
438   - mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
  442 + mm->flags = (current->mm) ?
  443 + (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
439 444 mm->core_state = NULL;
440 445 mm->nr_ptes = 0;
441 446 set_mm_counter(mm, file_rss, 0);
... ... @@ -496,6 +501,7 @@
496 501  
497 502 if (atomic_dec_and_test(&mm->mm_users)) {
498 503 exit_aio(mm);
  504 + ksm_exit(mm);
499 505 exit_mmap(mm);
500 506 set_mm_exe_file(mm, NULL);
501 507 if (!list_empty(&mm->mmlist)) {
... ... @@ -214,6 +214,17 @@
214 214 config MMU_NOTIFIER
215 215 bool
216 216  
  217 +config KSM
  218 + bool "Enable KSM for page merging"
  219 + depends on MMU
  220 + help
  221 + Enable Kernel Samepage Merging: KSM periodically scans those areas
  222 + of an application's address space that an app has advised may be
  223 + mergeable. When it finds pages of identical content, it replaces
  224 + the many instances by a single resident page with that content, so
  225 + saving memory until one or another app needs to modify the content.
  226 + Recommended for use with KVM, or with other duplicative applications.
  227 +
217 228 config DEFAULT_MMAP_MIN_ADDR
218 229 int "Low address space to protect from user allocation"
219 230 default 4096
... ... @@ -25,6 +25,7 @@
25 25 obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
26 26 obj-$(CONFIG_SLOB) += slob.o
27 27 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
  28 +obj-$(CONFIG_KSM) += ksm.o
28 29 obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
29 30 obj-$(CONFIG_SLAB) += slab.o
30 31 obj-$(CONFIG_SLUB) += slub.o
  1 +/*
  2 + * Initial dummy version just to illustrate KSM's interface to other files.
  3 + */
  4 +
  5 +#include <linux/errno.h>
  6 +#include <linux/mman.h>
  7 +#include <linux/ksm.h>
  8 +
  9 +int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
  10 + unsigned long end, int advice, unsigned long *vm_flags)
  11 +{
  12 + struct mm_struct *mm = vma->vm_mm;
  13 +
  14 + switch (advice) {
  15 + case MADV_MERGEABLE:
  16 + /*
  17 + * Be somewhat over-protective for now!
  18 + */
  19 + if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
  20 + VM_PFNMAP | VM_IO | VM_DONTEXPAND |
  21 + VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
  22 + VM_MIXEDMAP | VM_SAO))
  23 + return 0; /* just ignore the advice */
  24 +
  25 + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
  26 + if (__ksm_enter(mm) < 0)
  27 + return -EAGAIN;
  28 +
  29 + *vm_flags |= VM_MERGEABLE;
  30 + break;
  31 +
  32 + case MADV_UNMERGEABLE:
  33 + if (!(*vm_flags & VM_MERGEABLE))
  34 + return 0; /* just ignore the advice */
  35 +
  36 + /* Unmerge any merged pages here */
  37 +
  38 + *vm_flags &= ~VM_MERGEABLE;
  39 + break;
  40 + }
  41 +
  42 + return 0;
  43 +}
  44 +
  45 +int __ksm_enter(struct mm_struct *mm)
  46 +{
  47 + /* Allocate a structure to track mm and link it into KSM's list */
  48 + set_bit(MMF_VM_MERGEABLE, &mm->flags);
  49 + return 0;
  50 +}
  51 +
  52 +void __ksm_exit(struct mm_struct *mm)
  53 +{
  54 + /* Unlink and free all KSM's structures which track this mm */
  55 + clear_bit(MMF_VM_MERGEABLE, &mm->flags);
  56 +}
... ... @@ -11,6 +11,7 @@
11 11 #include <linux/mempolicy.h>
12 12 #include <linux/hugetlb.h>
13 13 #include <linux/sched.h>
  14 +#include <linux/ksm.h>
14 15  
15 16 /*
16 17 * Any behaviour which results in changes to the vma->vm_flags needs to
... ... @@ -63,6 +64,12 @@
63 64 }
64 65 new_flags &= ~VM_DONTCOPY;
65 66 break;
  67 + case MADV_MERGEABLE:
  68 + case MADV_UNMERGEABLE:
  69 + error = ksm_madvise(vma, start, end, behavior, &new_flags);
  70 + if (error)
  71 + goto out;
  72 + break;
66 73 }
67 74  
68 75 if (new_flags == vma->vm_flags) {
... ... @@ -239,6 +246,10 @@
239 246 case MADV_REMOVE:
240 247 case MADV_WILLNEED:
241 248 case MADV_DONTNEED:
  249 +#ifdef CONFIG_KSM
  250 + case MADV_MERGEABLE:
  251 + case MADV_UNMERGEABLE:
  252 +#endif
242 253 return 1;
243 254  
244 255 default:
... ... @@ -273,6 +284,9 @@
273 284 * MADV_DONTFORK - omit this area from child's address space when forking:
274 285 * typically, to avoid COWing pages pinned by get_user_pages().
275 286 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
  287 + * MADV_MERGEABLE - the application recommends that KSM try to merge pages in
  288 + * this area with pages of identical content from other such areas.
  289 + * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
276 290 *
277 291 * return values:
278 292 * zero - success