Blame view

mm/ksm.c 63.3 KB
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2
3
4
5
6
   * Memory merging support.
   *
   * This code enables dynamic sharing of identical pages found in different
   * memory areas, even if they are not shared by fork()
   *
36b2528dc   Izik Eidus   ksm: change copyr...
7
   * Copyright (C) 2008-2009 Red Hat, Inc.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
8
9
10
11
   * Authors:
   *	Izik Eidus
   *	Andrea Arcangeli
   *	Chris Wright
36b2528dc   Izik Eidus   ksm: change copyr...
12
   *	Hugh Dickins
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
13
14
   *
   * This work is licensed under the terms of the GNU GPL, version 2.
f8af4da3b   Hugh Dickins   ksm: the mm inter...
15
16
17
   */
  
  #include <linux/errno.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
18
19
  #include <linux/mm.h>
  #include <linux/fs.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
20
  #include <linux/mman.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
21
22
23
24
25
26
27
28
29
30
31
  #include <linux/sched.h>
  #include <linux/rwsem.h>
  #include <linux/pagemap.h>
  #include <linux/rmap.h>
  #include <linux/spinlock.h>
  #include <linux/jhash.h>
  #include <linux/delay.h>
  #include <linux/kthread.h>
  #include <linux/wait.h>
  #include <linux/slab.h>
  #include <linux/rbtree.h>
62b61f611   Hugh Dickins   ksm: memory hotre...
32
  #include <linux/memory.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
33
  #include <linux/mmu_notifier.h>
2c6854fda   Izik Eidus   ksm: change defau...
34
  #include <linux/swap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
35
  #include <linux/ksm.h>
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
36
  #include <linux/hashtable.h>
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
37
  #include <linux/freezer.h>
72788c385   David Rientjes   oom: replace PF_O...
38
  #include <linux/oom.h>
90bd6fd31   Petr Holasek   ksm: allow trees ...
39
  #include <linux/numa.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
40

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
41
  #include <asm/tlbflush.h>
73848b468   Hugh Dickins   ksm: fix mlockfre...
42
  #include "internal.h"
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
43

e850dcf53   Hugh Dickins   ksm: trivial tidyups
44
45
46
47
48
49
50
  #ifdef CONFIG_NUMA
  #define NUMA(x)		(x)
  #define DO_NUMA(x)	do { (x); } while (0)
  #else
  #define NUMA(x)		(0)
  #define DO_NUMA(x)	do { } while (0)
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  /*
   * A few notes about the KSM scanning process,
   * to make it easier to understand the data structures below:
   *
   * In order to reduce excessive scanning, KSM sorts the memory pages by their
   * contents into a data structure that holds pointers to the pages' locations.
   *
   * Since the contents of the pages may change at any moment, KSM cannot just
   * insert the pages into a normal sorted tree and expect it to find anything.
   * Therefore KSM uses two data structures - the stable and the unstable tree.
   *
   * The stable tree holds pointers to all the merged pages (ksm pages), sorted
   * by their contents.  Because each such page is write-protected, searching on
   * this tree is fully assured to be working (except when pages are unmapped),
   * and therefore this tree is called the stable tree.
   *
   * In addition to the stable tree, KSM uses a second data structure called the
   * unstable tree: this tree holds pointers to pages which have been found to
   * be "unchanged for a period of time".  The unstable tree sorts these pages
   * by their contents, but since they are not write-protected, KSM cannot rely
   * upon the unstable tree to work correctly - the unstable tree is liable to
   * be corrupted as its contents are modified, and so it is called unstable.
   *
   * KSM solves this problem by several techniques:
   *
   * 1) The unstable tree is flushed every time KSM completes scanning all
   *    memory areas, and then the tree is rebuilt again from the beginning.
   * 2) KSM will only insert into the unstable tree, pages whose hash value
   *    has not changed since the previous scan of all memory areas.
   * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
   *    colors of the nodes and not on their contents, assuring that even when
   *    the tree gets "corrupted" it won't get out of balance, so scanning time
   *    remains the same (also, searching and inserting nodes in an rbtree uses
   *    the same algorithm, so we have no overhead when we flush and rebuild).
   * 4) KSM never flushes the stable tree, which means that even if it were to
   *    take 10 attempts to find a page in the unstable tree, once it is found,
   *    it is secured in the stable tree.  (When we scan a new page, we first
   *    compare it against the stable tree, and then against the unstable tree.)
8fdb3dbf0   Hugh Dickins   ksm: add some com...
89
90
91
   *
   * If the merge_across_nodes tunable is unset, then KSM maintains multiple
   * stable trees and multiple unstable trees: one of each for each NUMA node.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
92
93
94
95
96
97
   */
  
  /**
   * struct mm_slot - ksm information per mm that is being scanned
   * @link: link to the mm_slots hash list
   * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
6514d511d   Hugh Dickins   ksm: singly-linke...
98
   * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
99
100
101
102
103
   * @mm: the mm that this information is valid for
   */
  struct mm_slot {
  	struct hlist_node link;
  	struct list_head mm_list;
6514d511d   Hugh Dickins   ksm: singly-linke...
104
  	struct rmap_item *rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
105
106
107
108
109
110
111
  	struct mm_struct *mm;
  };
  
  /**
   * struct ksm_scan - cursor for scanning
   * @mm_slot: the current mm_slot we are scanning
   * @address: the next address inside that to be scanned
6514d511d   Hugh Dickins   ksm: singly-linke...
112
   * @rmap_list: link to the next rmap to be scanned in the rmap_list
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
113
114
115
116
117
118
119
   * @seqnr: count of completed full scans (needed when removing unstable node)
   *
   * There is only the one ksm_scan instance of this cursor structure.
   */
  struct ksm_scan {
  	struct mm_slot *mm_slot;
  	unsigned long address;
6514d511d   Hugh Dickins   ksm: singly-linke...
120
  	struct rmap_item **rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
121
122
123
124
  	unsigned long seqnr;
  };
  
  /**
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
125
126
   * struct stable_node - node of the stable rbtree
   * @node: rb node of this ksm page in the stable tree
4146d2d67   Hugh Dickins   ksm: make !merge_...
127
128
   * @head: (overlaying parent) &migrate_nodes indicates temporarily on that list
   * @list: linked into migrate_nodes, pending placement in the proper node tree
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
129
   * @hlist: hlist head of rmap_items using this ksm page
4146d2d67   Hugh Dickins   ksm: make !merge_...
130
131
   * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
   * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
132
133
   */
  struct stable_node {
4146d2d67   Hugh Dickins   ksm: make !merge_...
134
135
136
137
138
139
140
  	union {
  		struct rb_node node;	/* when node of stable tree */
  		struct {		/* when listed for migration */
  			struct list_head *head;
  			struct list_head list;
  		};
  	};
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
141
  	struct hlist_head hlist;
62b61f611   Hugh Dickins   ksm: memory hotre...
142
  	unsigned long kpfn;
4146d2d67   Hugh Dickins   ksm: make !merge_...
143
144
145
  #ifdef CONFIG_NUMA
  	int nid;
  #endif
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
146
147
148
  };
  
  /**
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
149
   * struct rmap_item - reverse mapping item for virtual addresses
6514d511d   Hugh Dickins   ksm: singly-linke...
150
   * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
db114b83a   Hugh Dickins   ksm: hold anon_vm...
151
   * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
152
   * @nid: NUMA node id of unstable tree in which linked (may not match page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
153
154
155
   * @mm: the memory structure this rmap_item is pointing into
   * @address: the virtual address this rmap_item tracks (+ flags in low bits)
   * @oldchecksum: previous checksum of the page at that virtual address
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
156
157
158
   * @node: rb node of this rmap_item in the unstable tree
   * @head: pointer to stable_node heading this list in the stable tree
   * @hlist: link into hlist of rmap_items hanging off that stable_node
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
159
160
   */
  struct rmap_item {
6514d511d   Hugh Dickins   ksm: singly-linke...
161
  	struct rmap_item *rmap_list;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
162
163
164
165
166
167
  	union {
  		struct anon_vma *anon_vma;	/* when stable */
  #ifdef CONFIG_NUMA
  		int nid;		/* when node of unstable tree */
  #endif
  	};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
168
169
  	struct mm_struct *mm;
  	unsigned long address;		/* + low bits used for flags below */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
170
  	unsigned int oldchecksum;	/* when unstable */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
171
  	union {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
172
173
174
175
176
  		struct rb_node node;	/* when node of unstable tree */
  		struct {		/* when listed from stable tree */
  			struct stable_node *head;
  			struct hlist_node hlist;
  		};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
177
178
179
180
  	};
  };
  
  #define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
181
182
  #define UNSTABLE_FLAG	0x100	/* is a node of the unstable tree */
  #define STABLE_FLAG	0x200	/* is listed from the stable tree */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
183
184
  
  /* The stable and unstable tree heads */
ef53d16cd   Hugh Dickins   ksm: allocate roo...
185
186
187
188
  static struct rb_root one_stable_tree[1] = { RB_ROOT };
  static struct rb_root one_unstable_tree[1] = { RB_ROOT };
  static struct rb_root *root_stable_tree = one_stable_tree;
  static struct rb_root *root_unstable_tree = one_unstable_tree;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
189

4146d2d67   Hugh Dickins   ksm: make !merge_...
190
191
  /* Recently migrated nodes of stable tree, pending proper placement */
  static LIST_HEAD(migrate_nodes);
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
192
193
  #define MM_SLOTS_HASH_BITS 10
  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
194
195
196
197
198
199
200
201
202
  
  static struct mm_slot ksm_mm_head = {
  	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
  };
  static struct ksm_scan ksm_scan = {
  	.mm_slot = &ksm_mm_head,
  };
  
  static struct kmem_cache *rmap_item_cache;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
203
  static struct kmem_cache *stable_node_cache;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
204
205
206
  static struct kmem_cache *mm_slot_cache;
  
  /* The number of nodes in the stable tree */
b40282603   Hugh Dickins   ksm: rename kerne...
207
  static unsigned long ksm_pages_shared;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
208

e178dfde3   Hugh Dickins   ksm: move pages_s...
209
  /* The number of page slots additionally sharing those nodes */
b40282603   Hugh Dickins   ksm: rename kerne...
210
  static unsigned long ksm_pages_sharing;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
211

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
212
213
214
215
216
  /* The number of nodes in the unstable tree */
  static unsigned long ksm_pages_unshared;
  
  /* The number of rmap_items in use: to calculate pages_volatile */
  static unsigned long ksm_rmap_items;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
217
  /* Number of pages ksmd should scan in one batch */
2c6854fda   Izik Eidus   ksm: change defau...
218
  static unsigned int ksm_thread_pages_to_scan = 100;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
219
220
  
  /* Milliseconds ksmd should sleep between batches */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
221
  static unsigned int ksm_thread_sleep_millisecs = 20;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
222

e850dcf53   Hugh Dickins   ksm: trivial tidyups
223
  #ifdef CONFIG_NUMA
90bd6fd31   Petr Holasek   ksm: allow trees ...
224
225
  /* Zeroed when merging across nodes is not allowed */
  static unsigned int ksm_merge_across_nodes = 1;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
226
  static int ksm_nr_node_ids = 1;
e850dcf53   Hugh Dickins   ksm: trivial tidyups
227
228
  #else
  #define ksm_merge_across_nodes	1U
ef53d16cd   Hugh Dickins   ksm: allocate roo...
229
  #define ksm_nr_node_ids		1
e850dcf53   Hugh Dickins   ksm: trivial tidyups
230
  #endif
90bd6fd31   Petr Holasek   ksm: allow trees ...
231

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
232
233
234
  #define KSM_RUN_STOP	0
  #define KSM_RUN_MERGE	1
  #define KSM_RUN_UNMERGE	2
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
235
236
237
  #define KSM_RUN_OFFLINE	4
  static unsigned long ksm_run = KSM_RUN_STOP;
  static void wait_while_offlining(void);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
238
239
240
241
242
243
244
245
246
247
248
249
250
251
  
  static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
  static DEFINE_MUTEX(ksm_thread_mutex);
  static DEFINE_SPINLOCK(ksm_mmlist_lock);
  
  #define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
  		sizeof(struct __struct), __alignof__(struct __struct),\
  		(__flags), NULL)
  
  static int __init ksm_slab_init(void)
  {
  	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
  	if (!rmap_item_cache)
  		goto out;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
252
253
254
  	stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
  	if (!stable_node_cache)
  		goto out_free1;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
255
256
  	mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
  	if (!mm_slot_cache)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
257
  		goto out_free2;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
258
259
  
  	return 0;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
260
261
262
  out_free2:
  	kmem_cache_destroy(stable_node_cache);
  out_free1:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
263
264
265
266
267
268
269
270
  	kmem_cache_destroy(rmap_item_cache);
  out:
  	return -ENOMEM;
  }
  
  static void __init ksm_slab_free(void)
  {
  	kmem_cache_destroy(mm_slot_cache);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
271
  	kmem_cache_destroy(stable_node_cache);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
272
273
274
275
276
277
  	kmem_cache_destroy(rmap_item_cache);
  	mm_slot_cache = NULL;
  }
  
  static inline struct rmap_item *alloc_rmap_item(void)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
278
279
280
281
282
283
  	struct rmap_item *rmap_item;
  
  	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
  	if (rmap_item)
  		ksm_rmap_items++;
  	return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
284
285
286
287
  }
  
  static inline void free_rmap_item(struct rmap_item *rmap_item)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
288
  	ksm_rmap_items--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
289
290
291
  	rmap_item->mm = NULL;	/* debug safety */
  	kmem_cache_free(rmap_item_cache, rmap_item);
  }
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
292
293
294
295
296
297
298
299
300
  static inline struct stable_node *alloc_stable_node(void)
  {
  	return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
  }
  
  static inline void free_stable_node(struct stable_node *stable_node)
  {
  	kmem_cache_free(stable_node_cache, stable_node);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
301
302
303
304
305
306
307
308
309
310
311
  static inline struct mm_slot *alloc_mm_slot(void)
  {
  	if (!mm_slot_cache)	/* initialization failed */
  		return NULL;
  	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
  }
  
  static inline void free_mm_slot(struct mm_slot *mm_slot)
  {
  	kmem_cache_free(mm_slot_cache, mm_slot);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
312
313
  static struct mm_slot *get_mm_slot(struct mm_struct *mm)
  {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
314
  	struct mm_slot *slot;
b67bfe0d4   Sasha Levin   hlist: drop the n...
315
  	hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm)
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
316
317
  		if (slot->mm == mm)
  			return slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
318

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
319
320
321
322
323
324
  	return NULL;
  }
  
  static void insert_to_mm_slots_hash(struct mm_struct *mm,
  				    struct mm_slot *mm_slot)
  {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
325
  	mm_slot->mm = mm;
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
326
  	hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
327
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
328
  /*
a913e182a   Hugh Dickins   ksm: clean up obs...
329
330
331
332
333
334
335
336
337
338
339
340
341
   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
   * page tables after it has passed through ksm_exit() - which, if necessary,
   * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
   * a special flag: they can just back out as soon as mm_users goes to zero.
   * ksm_test_exit() is used throughout to make this test for exit: in some
   * places for correctness, in some places just to avoid unnecessary work.
   */
  static inline bool ksm_test_exit(struct mm_struct *mm)
  {
  	return atomic_read(&mm->mm_users) == 0;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
342
343
344
345
346
347
348
349
350
351
   * We use break_ksm to break COW on a ksm page: it's a stripped down
   *
   *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
   *		put_page(page);
   *
   * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
   * in case the application has unmapped and remapped mm,addr meanwhile.
   * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
   * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
352
  static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
353
354
  {
  	struct page *page;
d952b7913   Hugh Dickins   ksm: fix endless ...
355
  	int ret = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
356
357
358
  
  	do {
  		cond_resched();
5117b3b83   Hugh Dickins   mm,ksm: FOLL_MIGR...
359
  		page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION);
22eccdd7d   Dan Carpenter   ksm: check for ER...
360
  		if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
361
362
363
364
365
366
367
  			break;
  		if (PageKsm(page))
  			ret = handle_mm_fault(vma->vm_mm, vma, addr,
  							FAULT_FLAG_WRITE);
  		else
  			ret = VM_FAULT_WRITE;
  		put_page(page);
33692f275   Linus Torvalds   vm: add VM_FAULT_...
368
  	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
d952b7913   Hugh Dickins   ksm: fix endless ...
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
  	/*
  	 * We must loop because handle_mm_fault() may back out if there's
  	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
  	 *
  	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
  	 * COW has been broken, even if the vma does not permit VM_WRITE;
  	 * but note that a concurrent fault might break PageKsm for us.
  	 *
  	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
  	 * backing file, which also invalidates anonymous pages: that's
  	 * okay, that truncation will have unmapped the PageKsm for us.
  	 *
  	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
  	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
  	 * current task has TIF_MEMDIE set, and will be OOM killed on return
  	 * to user; and ksmd, having no mm, would never be chosen for that.
  	 *
  	 * But if the mm is in a limited mem_cgroup, then the fault may fail
  	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
  	 * even ksmd can fail in this way - though it's usually breaking ksm
  	 * just to undo a merge it made a moment before, so unlikely to oom.
  	 *
  	 * That's a pity: we might therefore have more kernel pages allocated
  	 * than we're counting as nodes in the stable tree; but ksm_do_scan
  	 * will retry to break_cow on each pass, so should recover the page
  	 * in due course.  The important thing is to not let VM_MERGEABLE
  	 * be cleared while any such pages might remain in the area.
  	 */
  	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
398
  }
ef6942224   Bob Liu   ksm: cleanup: int...
399
400
401
402
403
404
405
406
407
408
409
410
411
  static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
  		unsigned long addr)
  {
  	struct vm_area_struct *vma;
  	if (ksm_test_exit(mm))
  		return NULL;
  	vma = find_vma(mm, addr);
  	if (!vma || vma->vm_start > addr)
  		return NULL;
  	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  		return NULL;
  	return vma;
  }
8dd3557a5   Hugh Dickins   ksm: cleanup some...
412
  static void break_cow(struct rmap_item *rmap_item)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
413
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
414
415
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
416
  	struct vm_area_struct *vma;
4035c07a8   Hugh Dickins   ksm: take keyhole...
417
418
419
420
  	/*
  	 * It is not an accident that whenever we want to break COW
  	 * to undo, we also need to drop a reference to the anon_vma.
  	 */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
421
  	put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
422

81464e306   Hugh Dickins   ksm: five little ...
423
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
424
425
426
  	vma = find_mergeable_vma(mm, addr);
  	if (vma)
  		break_ksm(vma, addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
427
428
  	up_read(&mm->mmap_sem);
  }
29ad768cf   Andrea Arcangeli   thp: KSM on THP
429
430
431
  static struct page *page_trans_compound_anon(struct page *page)
  {
  	if (PageTransCompound(page)) {
668f9abbd   David Rientjes   mm: close PageTai...
432
  		struct page *head = compound_head(page);
29ad768cf   Andrea Arcangeli   thp: KSM on THP
433
  		/*
22e5c47ee   Andrea Arcangeli   thp: add compound...
434
435
  		 * head may actually be splitted and freed from under
  		 * us but it's ok here.
29ad768cf   Andrea Arcangeli   thp: KSM on THP
436
  		 */
29ad768cf   Andrea Arcangeli   thp: KSM on THP
437
438
439
440
441
  		if (PageAnon(head))
  			return head;
  	}
  	return NULL;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
442
443
444
445
446
447
448
449
  static struct page *get_mergeable_page(struct rmap_item *rmap_item)
  {
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
  	struct vm_area_struct *vma;
  	struct page *page;
  
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
450
451
  	vma = find_mergeable_vma(mm, addr);
  	if (!vma)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
452
453
454
  		goto out;
  
  	page = follow_page(vma, addr, FOLL_GET);
22eccdd7d   Dan Carpenter   ksm: check for ER...
455
  	if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
456
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
457
  	if (PageAnon(page) || page_trans_compound_anon(page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
458
459
460
461
462
463
464
465
466
  		flush_anon_page(vma, page, addr);
  		flush_dcache_page(page);
  	} else {
  		put_page(page);
  out:		page = NULL;
  	}
  	up_read(&mm->mmap_sem);
  	return page;
  }
90bd6fd31   Petr Holasek   ksm: allow trees ...
467
468
469
470
471
472
473
474
  /*
   * This helper is used for getting right index into array of tree roots.
   * When merge_across_nodes knob is set to 1, there are only two rb-trees for
   * stable and unstable pages from all nodes with roots in index 0. Otherwise,
   * every node has its own stable and unstable tree.
   */
  static inline int get_kpfn_nid(unsigned long kpfn)
  {
d8fc16a82   Hugh Dickins   ksm: fix m68k bui...
475
  	return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn));
90bd6fd31   Petr Holasek   ksm: allow trees ...
476
  }
4035c07a8   Hugh Dickins   ksm: take keyhole...
477
478
479
  static void remove_node_from_stable_tree(struct stable_node *stable_node)
  {
  	struct rmap_item *rmap_item;
4035c07a8   Hugh Dickins   ksm: take keyhole...
480

b67bfe0d4   Sasha Levin   hlist: drop the n...
481
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
482
483
484
485
  		if (rmap_item->hlist.next)
  			ksm_pages_sharing--;
  		else
  			ksm_pages_shared--;
9e60109f1   Peter Zijlstra   mm: rename drop_a...
486
  		put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
487
488
489
  		rmap_item->address &= PAGE_MASK;
  		cond_resched();
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
490
491
492
493
  	if (stable_node->head == &migrate_nodes)
  		list_del(&stable_node->list);
  	else
  		rb_erase(&stable_node->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
494
  			 root_stable_tree + NUMA(stable_node->nid));
4035c07a8   Hugh Dickins   ksm: take keyhole...
495
496
497
498
499
500
501
502
503
  	free_stable_node(stable_node);
  }
  
  /*
   * get_ksm_page: checks if the page indicated by the stable node
   * is still its ksm page, despite having held no reference to it.
   * In which case we can trust the content of the page, and it
   * returns the gotten page; but if the page has now been zapped,
   * remove the stale node from the stable tree and return NULL.
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
504
   * But beware, the stable node's page might be being migrated.
4035c07a8   Hugh Dickins   ksm: take keyhole...
505
506
507
508
509
510
511
512
513
514
   *
   * You would expect the stable_node to hold a reference to the ksm page.
   * But if it increments the page's count, swapping out has to wait for
   * ksmd to come around again before it can free the page, which may take
   * seconds or even minutes: much too unresponsive.  So instead we use a
   * "keyhole reference": access to the ksm page from the stable node peeps
   * out through its keyhole to see if that page still holds the right key,
   * pointing back to this stable node.  This relies on freeing a PageAnon
   * page to reset its page->mapping to NULL, and relies on no other use of
   * a page to put something that might look like our key in page->mapping.
4035c07a8   Hugh Dickins   ksm: take keyhole...
515
516
   * is on its way to being freed; but it is an anomaly to bear in mind.
   */
8fdb3dbf0   Hugh Dickins   ksm: add some com...
517
  static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
4035c07a8   Hugh Dickins   ksm: take keyhole...
518
519
520
  {
  	struct page *page;
  	void *expected_mapping;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
521
  	unsigned long kpfn;
4035c07a8   Hugh Dickins   ksm: take keyhole...
522

4035c07a8   Hugh Dickins   ksm: take keyhole...
523
524
  	expected_mapping = (void *)stable_node +
  				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
525
  again:
4db0c3c29   Jason Low   mm: remove rest o...
526
  	kpfn = READ_ONCE(stable_node->kpfn);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
527
528
529
530
531
532
533
534
  	page = pfn_to_page(kpfn);
  
  	/*
  	 * page is computed from kpfn, so on most architectures reading
  	 * page->mapping is naturally ordered after reading node->kpfn,
  	 * but on Alpha we need to be more careful.
  	 */
  	smp_read_barrier_depends();
4db0c3c29   Jason Low   mm: remove rest o...
535
  	if (READ_ONCE(page->mapping) != expected_mapping)
4035c07a8   Hugh Dickins   ksm: take keyhole...
536
  		goto stale;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
  
  	/*
  	 * We cannot do anything with the page while its refcount is 0.
  	 * Usually 0 means free, or tail of a higher-order page: in which
  	 * case this node is no longer referenced, and should be freed;
  	 * however, it might mean that the page is under page_freeze_refs().
  	 * The __remove_mapping() case is easy, again the node is now stale;
  	 * but if page is swapcache in migrate_page_move_mapping(), it might
  	 * still be our page, in which case it's essential to keep the node.
  	 */
  	while (!get_page_unless_zero(page)) {
  		/*
  		 * Another check for page->mapping != expected_mapping would
  		 * work here too.  We have chosen the !PageSwapCache test to
  		 * optimize the common case, when the page is or is about to
  		 * be freed: PageSwapCache is cleared (under spin_lock_irq)
  		 * in the freeze_refs section of __remove_mapping(); but Anon
  		 * page->mapping reset to NULL later, in free_pages_prepare().
  		 */
  		if (!PageSwapCache(page))
  			goto stale;
  		cpu_relax();
  	}
4db0c3c29   Jason Low   mm: remove rest o...
560
  	if (READ_ONCE(page->mapping) != expected_mapping) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
561
562
563
  		put_page(page);
  		goto stale;
  	}
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
564

8fdb3dbf0   Hugh Dickins   ksm: add some com...
565
  	if (lock_it) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
566
  		lock_page(page);
4db0c3c29   Jason Low   mm: remove rest o...
567
  		if (READ_ONCE(page->mapping) != expected_mapping) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
568
569
570
571
572
  			unlock_page(page);
  			put_page(page);
  			goto stale;
  		}
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
573
  	return page;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
574

4035c07a8   Hugh Dickins   ksm: take keyhole...
575
  stale:
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
576
577
578
579
580
581
582
  	/*
  	 * We come here from above when page->mapping or !PageSwapCache
  	 * suggests that the node is stale; but it might be under migration.
  	 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
  	 * before checking whether node->kpfn has been changed.
  	 */
  	smp_rmb();
4db0c3c29   Jason Low   mm: remove rest o...
583
  	if (READ_ONCE(stable_node->kpfn) != kpfn)
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
584
  		goto again;
4035c07a8   Hugh Dickins   ksm: take keyhole...
585
586
587
  	remove_node_from_stable_tree(stable_node);
  	return NULL;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
588
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
589
590
591
592
593
   * Removing rmap_item from stable or unstable tree.
   * This function will clean the information from the stable/unstable tree.
   */
  static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
  {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
594
595
  	if (rmap_item->address & STABLE_FLAG) {
  		struct stable_node *stable_node;
5ad646880   Hugh Dickins   ksm: let shared p...
596
  		struct page *page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
597

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
598
  		stable_node = rmap_item->head;
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
599
  		page = get_ksm_page(stable_node, true);
4035c07a8   Hugh Dickins   ksm: take keyhole...
600
601
  		if (!page)
  			goto out;
5ad646880   Hugh Dickins   ksm: let shared p...
602

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
603
  		hlist_del(&rmap_item->hlist);
4035c07a8   Hugh Dickins   ksm: take keyhole...
604
605
  		unlock_page(page);
  		put_page(page);
08beca44d   Hugh Dickins   ksm: stable_node ...
606

4035c07a8   Hugh Dickins   ksm: take keyhole...
607
608
609
  		if (stable_node->hlist.first)
  			ksm_pages_sharing--;
  		else
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
610
  			ksm_pages_shared--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
611

9e60109f1   Peter Zijlstra   mm: rename drop_a...
612
  		put_anon_vma(rmap_item->anon_vma);
93d17715a   Hugh Dickins   ksm: three remove...
613
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
614

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
615
  	} else if (rmap_item->address & UNSTABLE_FLAG) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
616
617
  		unsigned char age;
  		/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
618
  		 * Usually ksmd can and must skip the rb_erase, because
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
619
  		 * root_unstable_tree was already reset to RB_ROOT.
9ba692948   Hugh Dickins   ksm: fix oom dead...
620
621
622
  		 * But be careful when an mm is exiting: do the rb_erase
  		 * if this rmap_item was inserted by this scan, rather
  		 * than left over from before.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
623
624
  		 */
  		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
cd551f975   Hugh Dickins   ksm: distribute r...
625
  		BUG_ON(age > 1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
626
  		if (!age)
90bd6fd31   Petr Holasek   ksm: allow trees ...
627
  			rb_erase(&rmap_item->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
628
  				 root_unstable_tree + NUMA(rmap_item->nid));
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
629
  		ksm_pages_unshared--;
93d17715a   Hugh Dickins   ksm: three remove...
630
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
631
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
632
  out:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
633
634
  	cond_resched();		/* we're called from many long loops */
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
635
  static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
636
  				       struct rmap_item **rmap_list)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
637
  {
6514d511d   Hugh Dickins   ksm: singly-linke...
638
639
640
  	while (*rmap_list) {
  		struct rmap_item *rmap_item = *rmap_list;
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
641
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
642
643
644
645
646
  		free_rmap_item(rmap_item);
  	}
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
647
   * Though it's very tempting to unmerge rmap_items from stable tree rather
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
648
649
650
651
652
   * than check every pte of a given vma, the locking doesn't quite work for
   * that - an rmap_item is assigned to the stable tree after inserting ksm
   * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
   * rmap_items from parent to child at fork time (so as not to waste time
   * if exit comes before the next scan reaches it).
81464e306   Hugh Dickins   ksm: five little ...
653
654
655
656
657
   *
   * Similarly, although we'd like to remove rmap_items (so updating counts
   * and freeing memory) when unmerging an area, it's easier to leave that
   * to the next pass of ksmd - consider, for example, how ksmd might be
   * in cmp_and_merge_page on one of the rmap_items we would be removing.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
658
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
659
660
  static int unmerge_ksm_pages(struct vm_area_struct *vma,
  			     unsigned long start, unsigned long end)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
661
662
  {
  	unsigned long addr;
d952b7913   Hugh Dickins   ksm: fix endless ...
663
  	int err = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
664

d952b7913   Hugh Dickins   ksm: fix endless ...
665
  	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
666
667
  		if (ksm_test_exit(vma->vm_mm))
  			break;
d952b7913   Hugh Dickins   ksm: fix endless ...
668
669
670
671
672
673
  		if (signal_pending(current))
  			err = -ERESTARTSYS;
  		else
  			err = break_ksm(vma, addr);
  	}
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
674
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
675
676
677
678
  #ifdef CONFIG_SYSFS
  /*
   * Only called through the sysfs control interface:
   */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
679
680
681
682
683
684
685
686
687
688
689
690
  static int remove_stable_node(struct stable_node *stable_node)
  {
  	struct page *page;
  	int err;
  
  	page = get_ksm_page(stable_node, true);
  	if (!page) {
  		/*
  		 * get_ksm_page did remove_node_from_stable_tree itself.
  		 */
  		return 0;
  	}
8fdb3dbf0   Hugh Dickins   ksm: add some com...
691
692
693
694
695
  	if (WARN_ON_ONCE(page_mapped(page))) {
  		/*
  		 * This should not happen: but if it does, just refuse to let
  		 * merge_across_nodes be switched - there is no need to panic.
  		 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
696
  		err = -EBUSY;
8fdb3dbf0   Hugh Dickins   ksm: add some com...
697
  	} else {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
698
  		/*
8fdb3dbf0   Hugh Dickins   ksm: add some com...
699
700
701
  		 * The stable node did not yet appear stale to get_ksm_page(),
  		 * since that allows for an unmapped ksm page to be recognized
  		 * right up until it is freed; but the node is safe to remove.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
  		 * This page might be in a pagevec waiting to be freed,
  		 * or it might be PageSwapCache (perhaps under writeback),
  		 * or it might have been removed from swapcache a moment ago.
  		 */
  		set_page_stable_node(page, NULL);
  		remove_node_from_stable_tree(stable_node);
  		err = 0;
  	}
  
  	unlock_page(page);
  	put_page(page);
  	return err;
  }
  
  static int remove_all_stable_nodes(void)
  {
  	struct stable_node *stable_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
719
  	struct list_head *this, *next;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
720
721
  	int nid;
  	int err = 0;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
722
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
723
724
725
726
727
728
729
730
731
732
  		while (root_stable_tree[nid].rb_node) {
  			stable_node = rb_entry(root_stable_tree[nid].rb_node,
  						struct stable_node, node);
  			if (remove_stable_node(stable_node)) {
  				err = -EBUSY;
  				break;	/* proceed to next nid */
  			}
  			cond_resched();
  		}
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
733
734
735
736
737
738
  	list_for_each_safe(this, next, &migrate_nodes) {
  		stable_node = list_entry(this, struct stable_node, list);
  		if (remove_stable_node(stable_node))
  			err = -EBUSY;
  		cond_resched();
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
739
740
  	return err;
  }
d952b7913   Hugh Dickins   ksm: fix endless ...
741
  static int unmerge_and_remove_all_rmap_items(void)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
742
743
744
745
  {
  	struct mm_slot *mm_slot;
  	struct mm_struct *mm;
  	struct vm_area_struct *vma;
d952b7913   Hugh Dickins   ksm: fix endless ...
746
747
748
  	int err = 0;
  
  	spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
749
  	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
750
751
  						struct mm_slot, mm_list);
  	spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
752

9ba692948   Hugh Dickins   ksm: fix oom dead...
753
754
  	for (mm_slot = ksm_scan.mm_slot;
  			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
755
756
757
  		mm = mm_slot->mm;
  		down_read(&mm->mmap_sem);
  		for (vma = mm->mmap; vma; vma = vma->vm_next) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
758
759
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
760
761
  			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  				continue;
d952b7913   Hugh Dickins   ksm: fix endless ...
762
763
  			err = unmerge_ksm_pages(vma,
  						vma->vm_start, vma->vm_end);
9ba692948   Hugh Dickins   ksm: fix oom dead...
764
765
  			if (err)
  				goto error;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
766
  		}
9ba692948   Hugh Dickins   ksm: fix oom dead...
767

6514d511d   Hugh Dickins   ksm: singly-linke...
768
  		remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
d952b7913   Hugh Dickins   ksm: fix endless ...
769
770
  
  		spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
771
  		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
772
  						struct mm_slot, mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
773
  		if (ksm_test_exit(mm)) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
774
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
775
776
777
778
779
780
781
782
783
784
785
  			list_del(&mm_slot->mm_list);
  			spin_unlock(&ksm_mmlist_lock);
  
  			free_mm_slot(mm_slot);
  			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
  			up_read(&mm->mmap_sem);
  			mmdrop(mm);
  		} else {
  			spin_unlock(&ksm_mmlist_lock);
  			up_read(&mm->mmap_sem);
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
786
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
787
788
  	/* Clean up stable nodes, but don't worry if some are still busy */
  	remove_all_stable_nodes();
d952b7913   Hugh Dickins   ksm: fix endless ...
789
  	ksm_scan.seqnr = 0;
9ba692948   Hugh Dickins   ksm: fix oom dead...
790
791
792
793
  	return 0;
  
  error:
  	up_read(&mm->mmap_sem);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
794
  	spin_lock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
795
  	ksm_scan.mm_slot = &ksm_mm_head;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
796
  	spin_unlock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
797
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
798
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
799
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
800

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
801
802
803
  static u32 calc_checksum(struct page *page)
  {
  	u32 checksum;
9b04c5fec   Cong Wang   mm: remove the se...
804
  	void *addr = kmap_atomic(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
805
  	checksum = jhash2(addr, PAGE_SIZE / 4, 17);
9b04c5fec   Cong Wang   mm: remove the se...
806
  	kunmap_atomic(addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
807
808
809
810
811
812
813
  	return checksum;
  }
  
  static int memcmp_pages(struct page *page1, struct page *page2)
  {
  	char *addr1, *addr2;
  	int ret;
9b04c5fec   Cong Wang   mm: remove the se...
814
815
  	addr1 = kmap_atomic(page1);
  	addr2 = kmap_atomic(page2);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
816
  	ret = memcmp(addr1, addr2, PAGE_SIZE);
9b04c5fec   Cong Wang   mm: remove the se...
817
818
  	kunmap_atomic(addr2);
  	kunmap_atomic(addr1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
  	return ret;
  }
  
  static inline int pages_identical(struct page *page1, struct page *page2)
  {
  	return !memcmp_pages(page1, page2);
  }
  
  static int write_protect_page(struct vm_area_struct *vma, struct page *page,
  			      pte_t *orig_pte)
  {
  	struct mm_struct *mm = vma->vm_mm;
  	unsigned long addr;
  	pte_t *ptep;
  	spinlock_t *ptl;
  	int swapped;
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
836
837
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
838
839
840
841
  
  	addr = page_address_in_vma(page, vma);
  	if (addr == -EFAULT)
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
842
  	BUG_ON(PageTransCompound(page));
6bdb913f0   Haggai Eran   mm: wrap calls to...
843
844
845
846
  
  	mmun_start = addr;
  	mmun_end   = addr + PAGE_SIZE;
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
847
848
  	ptep = page_check_address(page, mm, addr, &ptl, 0);
  	if (!ptep)
6bdb913f0   Haggai Eran   mm: wrap calls to...
849
  		goto out_mn;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
850

4e31635c3   Hugh Dickins   ksm: fix bad user...
851
  	if (pte_write(*ptep) || pte_dirty(*ptep)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
852
853
854
855
856
  		pte_t entry;
  
  		swapped = PageSwapCache(page);
  		flush_cache_page(vma, addr, page_to_pfn(page));
  		/*
25985edce   Lucas De Marchi   Fix common misspe...
857
  		 * Ok this is tricky, when get_user_pages_fast() run it doesn't
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
858
859
860
861
862
863
864
  		 * take any lock, therefore the check that we are going to make
  		 * with the pagecount against the mapcount is racey and
  		 * O_DIRECT can happen right after the check.
  		 * So we clear the pte and flush the tlb before the check
  		 * this assure us that no O_DIRECT can happen after the check
  		 * or in the middle of the check.
  		 */
34ee645e8   Joerg Roedel   mmu_notifier: cal...
865
  		entry = ptep_clear_flush_notify(vma, addr, ptep);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
866
867
868
869
  		/*
  		 * Check that no O_DIRECT or similar I/O is in progress on the
  		 * page
  		 */
31e855ea7   Hugh Dickins   ksm: remove redun...
870
  		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
cb5323751   Robin Holt   mm/ksm.c is doing...
871
  			set_pte_at(mm, addr, ptep, entry);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
872
873
  			goto out_unlock;
  		}
4e31635c3   Hugh Dickins   ksm: fix bad user...
874
875
876
  		if (pte_dirty(entry))
  			set_page_dirty(page);
  		entry = pte_mkclean(pte_wrprotect(entry));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
877
878
879
880
881
882
883
  		set_pte_at_notify(mm, addr, ptep, entry);
  	}
  	*orig_pte = *ptep;
  	err = 0;
  
  out_unlock:
  	pte_unmap_unlock(ptep, ptl);
6bdb913f0   Haggai Eran   mm: wrap calls to...
884
885
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
886
887
888
889
890
891
  out:
  	return err;
  }
  
  /**
   * replace_page - replace page in vma by new ksm page
8dd3557a5   Hugh Dickins   ksm: cleanup some...
892
893
894
   * @vma:      vma that holds the pte pointing to page
   * @page:     the page we are replacing by kpage
   * @kpage:    the ksm page we replace page by
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
895
896
897
898
   * @orig_pte: the original value of the pte
   *
   * Returns 0 on success, -EFAULT on failure.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
899
900
  static int replace_page(struct vm_area_struct *vma, struct page *page,
  			struct page *kpage, pte_t orig_pte)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
901
902
  {
  	struct mm_struct *mm = vma->vm_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
903
904
905
906
  	pmd_t *pmd;
  	pte_t *ptep;
  	spinlock_t *ptl;
  	unsigned long addr;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
907
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
908
909
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
910

8dd3557a5   Hugh Dickins   ksm: cleanup some...
911
  	addr = page_address_in_vma(page, vma);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
912
913
  	if (addr == -EFAULT)
  		goto out;
6219049ae   Bob Liu   mm: introduce mm_...
914
915
  	pmd = mm_find_pmd(mm, addr);
  	if (!pmd)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
916
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
917

6bdb913f0   Haggai Eran   mm: wrap calls to...
918
919
920
  	mmun_start = addr;
  	mmun_end   = addr + PAGE_SIZE;
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
921
922
923
  	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
  	if (!pte_same(*ptep, orig_pte)) {
  		pte_unmap_unlock(ptep, ptl);
6bdb913f0   Haggai Eran   mm: wrap calls to...
924
  		goto out_mn;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
925
  	}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
926
  	get_page(kpage);
5ad646880   Hugh Dickins   ksm: let shared p...
927
  	page_add_anon_rmap(kpage, vma, addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
928
929
  
  	flush_cache_page(vma, addr, pte_pfn(*ptep));
34ee645e8   Joerg Roedel   mmu_notifier: cal...
930
  	ptep_clear_flush_notify(vma, addr, ptep);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
931
  	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
932

8dd3557a5   Hugh Dickins   ksm: cleanup some...
933
  	page_remove_rmap(page);
ae52a2adb   Hugh Dickins   thp: ksm: free sw...
934
935
  	if (!page_mapped(page))
  		try_to_free_swap(page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
936
  	put_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
937
938
939
  
  	pte_unmap_unlock(ptep, ptl);
  	err = 0;
6bdb913f0   Haggai Eran   mm: wrap calls to...
940
941
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
942
943
944
  out:
  	return err;
  }
29ad768cf   Andrea Arcangeli   thp: KSM on THP
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
  static int page_trans_compound_anon_split(struct page *page)
  {
  	int ret = 0;
  	struct page *transhuge_head = page_trans_compound_anon(page);
  	if (transhuge_head) {
  		/* Get the reference on the head to split it. */
  		if (get_page_unless_zero(transhuge_head)) {
  			/*
  			 * Recheck we got the reference while the head
  			 * was still anonymous.
  			 */
  			if (PageAnon(transhuge_head))
  				ret = split_huge_page(transhuge_head);
  			else
  				/*
  				 * Retry later if split_huge_page run
  				 * from under us.
  				 */
  				ret = 1;
  			put_page(transhuge_head);
  		} else
  			/* Retry later if split_huge_page run from under us. */
  			ret = 1;
  	}
  	return ret;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
971
972
  /*
   * try_to_merge_one_page - take two pages and merge them into one
8dd3557a5   Hugh Dickins   ksm: cleanup some...
973
974
   * @vma: the vma that holds the pte pointing to page
   * @page: the PageAnon page that we want to replace with kpage
80e148226   Hugh Dickins   ksm: share anon p...
975
976
   * @kpage: the PageKsm page that we want to map instead of page,
   *         or NULL the first time when we want to use page as kpage.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
977
978
979
980
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
   */
  static int try_to_merge_one_page(struct vm_area_struct *vma,
8dd3557a5   Hugh Dickins   ksm: cleanup some...
981
  				 struct page *page, struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
982
983
984
  {
  	pte_t orig_pte = __pte(0);
  	int err = -EFAULT;
db114b83a   Hugh Dickins   ksm: hold anon_vm...
985
986
  	if (page == kpage)			/* ksm page forked */
  		return 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
987
988
  	if (!(vma->vm_flags & VM_MERGEABLE))
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
989
990
991
  	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
  		goto out;
  	BUG_ON(PageTransCompound(page));
8dd3557a5   Hugh Dickins   ksm: cleanup some...
992
  	if (!PageAnon(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
993
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
994
995
996
997
998
999
1000
  	/*
  	 * We need the page lock to read a stable PageSwapCache in
  	 * write_protect_page().  We use trylock_page() instead of
  	 * lock_page() because we don't want to wait here - we
  	 * prefer to continue scanning and merging different pages,
  	 * then come back to this page when it is unlocked.
  	 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1001
  	if (!trylock_page(page))
31e855ea7   Hugh Dickins   ksm: remove redun...
1002
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1003
1004
1005
1006
1007
1008
  	/*
  	 * If this anonymous page is mapped only here, its pte may need
  	 * to be write-protected.  If it's mapped elsewhere, all of its
  	 * ptes are necessarily already write-protected.  But in either
  	 * case, we need to lock and check page_count is not raised.
  	 */
80e148226   Hugh Dickins   ksm: share anon p...
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
  	if (write_protect_page(vma, page, &orig_pte) == 0) {
  		if (!kpage) {
  			/*
  			 * While we hold page lock, upgrade page from
  			 * PageAnon+anon_vma to PageKsm+NULL stable_node:
  			 * stable_tree_insert() will update stable_node.
  			 */
  			set_page_stable_node(page, NULL);
  			mark_page_accessed(page);
  			err = 0;
  		} else if (pages_identical(page, kpage))
  			err = replace_page(vma, page, kpage, orig_pte);
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1022

80e148226   Hugh Dickins   ksm: share anon p...
1023
  	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
73848b468   Hugh Dickins   ksm: fix mlockfre...
1024
  		munlock_vma_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1025
1026
  		if (!PageMlocked(kpage)) {
  			unlock_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1027
1028
1029
1030
1031
  			lock_page(kpage);
  			mlock_vma_page(kpage);
  			page = kpage;		/* for final unlock */
  		}
  	}
73848b468   Hugh Dickins   ksm: fix mlockfre...
1032

8dd3557a5   Hugh Dickins   ksm: cleanup some...
1033
  	unlock_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1034
1035
1036
1037
1038
  out:
  	return err;
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1039
1040
   * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
   * but no new kernel page is allocated: kpage must already be a ksm page.
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1041
1042
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
81464e306   Hugh Dickins   ksm: five little ...
1043
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1044
1045
  static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
  				      struct page *page, struct page *kpage)
81464e306   Hugh Dickins   ksm: five little ...
1046
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1047
  	struct mm_struct *mm = rmap_item->mm;
81464e306   Hugh Dickins   ksm: five little ...
1048
1049
  	struct vm_area_struct *vma;
  	int err = -EFAULT;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1050
1051
  	down_read(&mm->mmap_sem);
  	if (ksm_test_exit(mm))
9ba692948   Hugh Dickins   ksm: fix oom dead...
1052
  		goto out;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1053
1054
  	vma = find_vma(mm, rmap_item->address);
  	if (!vma || vma->vm_start > rmap_item->address)
81464e306   Hugh Dickins   ksm: five little ...
1055
  		goto out;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1056
  	err = try_to_merge_one_page(vma, page, kpage);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1057
1058
  	if (err)
  		goto out;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
1059
1060
  	/* Unstable nid is in union with stable anon_vma: remove first */
  	remove_rmap_item_from_tree(rmap_item);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1061
  	/* Must get reference to anon_vma while still holding mmap_sem */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1062
1063
  	rmap_item->anon_vma = vma->anon_vma;
  	get_anon_vma(vma->anon_vma);
81464e306   Hugh Dickins   ksm: five little ...
1064
  out:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1065
  	up_read(&mm->mmap_sem);
81464e306   Hugh Dickins   ksm: five little ...
1066
1067
1068
1069
  	return err;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1070
1071
1072
   * try_to_merge_two_pages - take two identical pages and prepare them
   * to be merged into one page.
   *
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1073
1074
   * This function returns the kpage if we successfully merged two identical
   * pages into one ksm page, NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1075
   *
80e148226   Hugh Dickins   ksm: share anon p...
1076
   * Note that this function upgrades page to ksm page: if one of the pages
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1077
1078
   * is already a ksm page, try_to_merge_with_ksm_page should be used.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1079
1080
1081
1082
  static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
  					   struct page *page,
  					   struct rmap_item *tree_rmap_item,
  					   struct page *tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1083
  {
80e148226   Hugh Dickins   ksm: share anon p...
1084
  	int err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1085

80e148226   Hugh Dickins   ksm: share anon p...
1086
  	err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1087
  	if (!err) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1088
  		err = try_to_merge_with_ksm_page(tree_rmap_item,
80e148226   Hugh Dickins   ksm: share anon p...
1089
  							tree_page, page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1090
  		/*
81464e306   Hugh Dickins   ksm: five little ...
1091
1092
  		 * If that fails, we have a ksm page with only one pte
  		 * pointing to it: so break it.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1093
  		 */
4035c07a8   Hugh Dickins   ksm: take keyhole...
1094
  		if (err)
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1095
  			break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1096
  	}
80e148226   Hugh Dickins   ksm: share anon p...
1097
  	return err ? NULL : page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1098
1099
1100
  }
  
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1101
   * stable_tree_search - search for page inside the stable tree
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1102
1103
1104
1105
   *
   * This function checks if there is a page inside the stable tree
   * with identical content to the page that we are scanning right now.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1106
   * This function returns the stable tree node of identical content if found,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1107
1108
   * NULL otherwise.
   */
62b61f611   Hugh Dickins   ksm: memory hotre...
1109
  static struct page *stable_tree_search(struct page *page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1110
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1111
  	int nid;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1112
  	struct rb_root *root;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1113
1114
1115
1116
  	struct rb_node **new;
  	struct rb_node *parent;
  	struct stable_node *stable_node;
  	struct stable_node *page_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1117

4146d2d67   Hugh Dickins   ksm: make !merge_...
1118
1119
1120
  	page_node = page_stable_node(page);
  	if (page_node && page_node->head != &migrate_nodes) {
  		/* ksm page forked */
08beca44d   Hugh Dickins   ksm: stable_node ...
1121
  		get_page(page);
62b61f611   Hugh Dickins   ksm: memory hotre...
1122
  		return page;
08beca44d   Hugh Dickins   ksm: stable_node ...
1123
  	}
90bd6fd31   Petr Holasek   ksm: allow trees ...
1124
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1125
  	root = root_stable_tree + nid;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1126
  again:
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1127
  	new = &root->rb_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1128
  	parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1129

4146d2d67   Hugh Dickins   ksm: make !merge_...
1130
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1131
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1132
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1133
  		cond_resched();
4146d2d67   Hugh Dickins   ksm: make !merge_...
1134
  		stable_node = rb_entry(*new, struct stable_node, node);
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
1135
  		tree_page = get_ksm_page(stable_node, false);
4035c07a8   Hugh Dickins   ksm: take keyhole...
1136
1137
  		if (!tree_page)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1138

4035c07a8   Hugh Dickins   ksm: take keyhole...
1139
  		ret = memcmp_pages(page, tree_page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1140
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1141

4146d2d67   Hugh Dickins   ksm: make !merge_...
1142
  		parent = *new;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1143
  		if (ret < 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1144
  			new = &parent->rb_left;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1145
  		else if (ret > 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1146
  			new = &parent->rb_right;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1147
1148
1149
1150
1151
1152
1153
1154
1155
  		else {
  			/*
  			 * Lock and unlock the stable_node's page (which
  			 * might already have been migrated) so that page
  			 * migration is sure to notice its raised count.
  			 * It would be more elegant to return stable_node
  			 * than kpage, but that involves more changes.
  			 */
  			tree_page = get_ksm_page(stable_node, true);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1156
  			if (tree_page) {
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1157
  				unlock_page(tree_page);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
  				if (get_kpfn_nid(stable_node->kpfn) !=
  						NUMA(stable_node->nid)) {
  					put_page(tree_page);
  					goto replace;
  				}
  				return tree_page;
  			}
  			/*
  			 * There is now a place for page_node, but the tree may
  			 * have been rebalanced, so re-evaluate parent and new.
  			 */
  			if (page_node)
  				goto again;
  			return NULL;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1172
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1173
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1174
1175
1176
1177
1178
1179
  	if (!page_node)
  		return NULL;
  
  	list_del(&page_node->list);
  	DO_NUMA(page_node->nid = nid);
  	rb_link_node(&page_node->node, parent, new);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1180
  	rb_insert_color(&page_node->node, root);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1181
1182
1183
1184
1185
1186
1187
  	get_page(page);
  	return page;
  
  replace:
  	if (page_node) {
  		list_del(&page_node->list);
  		DO_NUMA(page_node->nid = nid);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1188
  		rb_replace_node(&stable_node->node, &page_node->node, root);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1189
1190
  		get_page(page);
  	} else {
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1191
  		rb_erase(&stable_node->node, root);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1192
1193
1194
1195
1196
  		page = NULL;
  	}
  	stable_node->head = &migrate_nodes;
  	list_add(&stable_node->list, stable_node->head);
  	return page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1197
1198
1199
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1200
   * stable_tree_insert - insert stable tree node pointing to new ksm page
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1201
1202
   * into the stable tree.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1203
1204
   * This function returns the stable tree node just allocated on success,
   * NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1205
   */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1206
  static struct stable_node *stable_tree_insert(struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1207
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1208
1209
  	int nid;
  	unsigned long kpfn;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1210
  	struct rb_root *root;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1211
  	struct rb_node **new;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1212
  	struct rb_node *parent = NULL;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1213
  	struct stable_node *stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1214

90bd6fd31   Petr Holasek   ksm: allow trees ...
1215
1216
  	kpfn = page_to_pfn(kpage);
  	nid = get_kpfn_nid(kpfn);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1217
1218
  	root = root_stable_tree + nid;
  	new = &root->rb_node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1219

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1220
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1221
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1222
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1223
  		cond_resched();
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1224
  		stable_node = rb_entry(*new, struct stable_node, node);
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
1225
  		tree_page = get_ksm_page(stable_node, false);
4035c07a8   Hugh Dickins   ksm: take keyhole...
1226
1227
  		if (!tree_page)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1228

4035c07a8   Hugh Dickins   ksm: take keyhole...
1229
1230
  		ret = memcmp_pages(kpage, tree_page);
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
  
  		parent = *new;
  		if (ret < 0)
  			new = &parent->rb_left;
  		else if (ret > 0)
  			new = &parent->rb_right;
  		else {
  			/*
  			 * It is not a bug that stable_tree_search() didn't
  			 * find this node: because at that time our page was
  			 * not yet write-protected, so may have changed since.
  			 */
  			return NULL;
  		}
  	}
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1246
1247
1248
  	stable_node = alloc_stable_node();
  	if (!stable_node)
  		return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1249

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1250
  	INIT_HLIST_HEAD(&stable_node->hlist);
90bd6fd31   Petr Holasek   ksm: allow trees ...
1251
  	stable_node->kpfn = kpfn;
08beca44d   Hugh Dickins   ksm: stable_node ...
1252
  	set_page_stable_node(kpage, stable_node);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1253
  	DO_NUMA(stable_node->nid = nid);
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1254
  	rb_link_node(&stable_node->node, parent, new);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1255
  	rb_insert_color(&stable_node->node, root);
08beca44d   Hugh Dickins   ksm: stable_node ...
1256

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1257
  	return stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1258
1259
1260
  }
  
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1261
1262
   * unstable_tree_search_insert - search for identical page,
   * else insert rmap_item into the unstable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
   *
   * This function searches for a page in the unstable tree identical to the
   * page currently being scanned; and if no identical page is found in the
   * tree, we insert rmap_item as a new object into the unstable tree.
   *
   * This function returns pointer to rmap_item found to be identical
   * to the currently scanned page, NULL otherwise.
   *
   * This function does both searching and inserting, because they share
   * the same walking algorithm in an rbtree.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1274
1275
1276
1277
  static
  struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
  					      struct page *page,
  					      struct page **tree_pagep)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1278
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1279
1280
  	struct rb_node **new;
  	struct rb_root *root;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1281
  	struct rb_node *parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1282
1283
1284
  	int nid;
  
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1285
  	root = root_unstable_tree + nid;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1286
  	new = &root->rb_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1287
1288
1289
  
  	while (*new) {
  		struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1290
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1291
  		int ret;
d178f27fc   Hugh Dickins   ksm: cond_resched...
1292
  		cond_resched();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1293
  		tree_rmap_item = rb_entry(*new, struct rmap_item, node);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1294
  		tree_page = get_mergeable_page(tree_rmap_item);
22eccdd7d   Dan Carpenter   ksm: check for ER...
1295
  		if (IS_ERR_OR_NULL(tree_page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1296
1297
1298
  			return NULL;
  
  		/*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1299
  		 * Don't substitute a ksm page for a forked page.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1300
  		 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1301
1302
  		if (page == tree_page) {
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1303
1304
  			return NULL;
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1305
  		ret = memcmp_pages(page, tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1306
1307
1308
  
  		parent = *new;
  		if (ret < 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1309
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1310
1311
  			new = &parent->rb_left;
  		} else if (ret > 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1312
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1313
  			new = &parent->rb_right;
b599cbdf1   Hugh Dickins   ksm: treat unstab...
1314
1315
1316
1317
1318
1319
1320
1321
1322
  		} else if (!ksm_merge_across_nodes &&
  			   page_to_nid(tree_page) != nid) {
  			/*
  			 * If tree_page has been migrated to another NUMA node,
  			 * it will be flushed out and put in the right unstable
  			 * tree next time: only merge with it when across_nodes.
  			 */
  			put_page(tree_page);
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1323
  		} else {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1324
  			*tree_pagep = tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1325
1326
1327
  			return tree_rmap_item;
  		}
  	}
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1328
  	rmap_item->address |= UNSTABLE_FLAG;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1329
  	rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1330
  	DO_NUMA(rmap_item->nid = nid);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1331
  	rb_link_node(&rmap_item->node, parent, new);
90bd6fd31   Petr Holasek   ksm: allow trees ...
1332
  	rb_insert_color(&rmap_item->node, root);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1333

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
1334
  	ksm_pages_unshared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1335
1336
1337
1338
1339
1340
1341
1342
1343
  	return NULL;
  }
  
  /*
   * stable_tree_append - add another rmap_item to the linked list of
   * rmap_items hanging off a given node of the stable tree, all sharing
   * the same ksm page.
   */
  static void stable_tree_append(struct rmap_item *rmap_item,
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1344
  			       struct stable_node *stable_node)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1345
  {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1346
  	rmap_item->head = stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1347
  	rmap_item->address |= STABLE_FLAG;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1348
  	hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
e178dfde3   Hugh Dickins   ksm: move pages_s...
1349

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1350
1351
1352
1353
  	if (rmap_item->hlist.next)
  		ksm_pages_sharing++;
  	else
  		ksm_pages_shared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1354
1355
1356
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1357
1358
1359
1360
   * cmp_and_merge_page - first see if page can be merged into the stable tree;
   * if not, compare checksum to previous and if it's the same, see if page can
   * be inserted into the unstable tree, or merged with a page already there and
   * both transferred to the stable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1361
1362
1363
1364
1365
1366
   *
   * @page: the page that we are searching identical page to.
   * @rmap_item: the reverse mapping into the virtual address of this page
   */
  static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
  {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1367
  	struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1368
  	struct page *tree_page = NULL;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1369
  	struct stable_node *stable_node;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1370
  	struct page *kpage;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1371
1372
  	unsigned int checksum;
  	int err;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1373
1374
1375
1376
1377
  	stable_node = page_stable_node(page);
  	if (stable_node) {
  		if (stable_node->head != &migrate_nodes &&
  		    get_kpfn_nid(stable_node->kpfn) != NUMA(stable_node->nid)) {
  			rb_erase(&stable_node->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1378
  				 root_stable_tree + NUMA(stable_node->nid));
4146d2d67   Hugh Dickins   ksm: make !merge_...
1379
1380
1381
1382
1383
1384
1385
  			stable_node->head = &migrate_nodes;
  			list_add(&stable_node->list, stable_node->head);
  		}
  		if (stable_node->head != &migrate_nodes &&
  		    rmap_item->head == stable_node)
  			return;
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1386
1387
  
  	/* We first start with searching the page inside the stable tree */
62b61f611   Hugh Dickins   ksm: memory hotre...
1388
  	kpage = stable_tree_search(page);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1389
1390
1391
1392
1393
1394
  	if (kpage == page && rmap_item->head == stable_node) {
  		put_page(kpage);
  		return;
  	}
  
  	remove_rmap_item_from_tree(rmap_item);
62b61f611   Hugh Dickins   ksm: memory hotre...
1395
  	if (kpage) {
08beca44d   Hugh Dickins   ksm: stable_node ...
1396
  		err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1397
1398
1399
1400
1401
  		if (!err) {
  			/*
  			 * The page was successfully merged:
  			 * add its rmap_item to the stable tree.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
1402
  			lock_page(kpage);
62b61f611   Hugh Dickins   ksm: memory hotre...
1403
  			stable_tree_append(rmap_item, page_stable_node(kpage));
5ad646880   Hugh Dickins   ksm: let shared p...
1404
  			unlock_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1405
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1406
  		put_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1407
1408
1409
1410
  		return;
  	}
  
  	/*
4035c07a8   Hugh Dickins   ksm: take keyhole...
1411
1412
1413
1414
  	 * If the hash value of the page has changed from the last time
  	 * we calculated it, this page is changing frequently: therefore we
  	 * don't want to insert it in the unstable tree, and we don't want
  	 * to waste our time searching for something identical to it there.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1415
1416
1417
1418
1419
1420
  	 */
  	checksum = calc_checksum(page);
  	if (rmap_item->oldchecksum != checksum) {
  		rmap_item->oldchecksum = checksum;
  		return;
  	}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1421
1422
  	tree_rmap_item =
  		unstable_tree_search_insert(rmap_item, page, &tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1423
  	if (tree_rmap_item) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1424
1425
1426
  		kpage = try_to_merge_two_pages(rmap_item, page,
  						tree_rmap_item, tree_page);
  		put_page(tree_page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1427
  		if (kpage) {
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
1428
1429
1430
1431
  			/*
  			 * The pages were successfully merged: insert new
  			 * node in the stable tree and add both rmap_items.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
1432
  			lock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1433
1434
1435
1436
1437
  			stable_node = stable_tree_insert(kpage);
  			if (stable_node) {
  				stable_tree_append(tree_rmap_item, stable_node);
  				stable_tree_append(rmap_item, stable_node);
  			}
5ad646880   Hugh Dickins   ksm: let shared p...
1438
  			unlock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1439

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1440
1441
1442
1443
1444
1445
  			/*
  			 * If we fail to insert the page into the stable tree,
  			 * we will have 2 virtual addresses that are pointing
  			 * to a ksm page left outside the stable tree,
  			 * in which case we need to break_cow on both.
  			 */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1446
  			if (!stable_node) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1447
1448
  				break_cow(tree_rmap_item);
  				break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1449
1450
  			}
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1451
1452
1453
1454
  	}
  }
  
  static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
1455
  					    struct rmap_item **rmap_list,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1456
1457
1458
  					    unsigned long addr)
  {
  	struct rmap_item *rmap_item;
6514d511d   Hugh Dickins   ksm: singly-linke...
1459
1460
  	while (*rmap_list) {
  		rmap_item = *rmap_list;
93d17715a   Hugh Dickins   ksm: three remove...
1461
  		if ((rmap_item->address & PAGE_MASK) == addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1462
  			return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1463
1464
  		if (rmap_item->address > addr)
  			break;
6514d511d   Hugh Dickins   ksm: singly-linke...
1465
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1466
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1467
1468
1469
1470
1471
1472
1473
1474
  		free_rmap_item(rmap_item);
  	}
  
  	rmap_item = alloc_rmap_item();
  	if (rmap_item) {
  		/* It has already been zeroed */
  		rmap_item->mm = mm_slot->mm;
  		rmap_item->address = addr;
6514d511d   Hugh Dickins   ksm: singly-linke...
1475
1476
  		rmap_item->rmap_list = *rmap_list;
  		*rmap_list = rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
  	}
  	return rmap_item;
  }
  
  static struct rmap_item *scan_get_next_rmap_item(struct page **page)
  {
  	struct mm_struct *mm;
  	struct mm_slot *slot;
  	struct vm_area_struct *vma;
  	struct rmap_item *rmap_item;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1487
  	int nid;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1488
1489
1490
1491
1492
1493
  
  	if (list_empty(&ksm_mm_head.mm_list))
  		return NULL;
  
  	slot = ksm_scan.mm_slot;
  	if (slot == &ksm_mm_head) {
2919bfd07   Hugh Dickins   ksm: drain pageve...
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
  		/*
  		 * A number of pages can hang around indefinitely on per-cpu
  		 * pagevecs, raised page count preventing write_protect_page
  		 * from merging them.  Though it doesn't really matter much,
  		 * it is puzzling to see some stuck in pages_volatile until
  		 * other activity jostles them out, and they also prevented
  		 * LTP's KSM test from succeeding deterministically; so drain
  		 * them here (here rather than on entry to ksm_do_scan(),
  		 * so we don't IPI too often when pages_to_scan is set low).
  		 */
  		lru_add_drain_all();
4146d2d67   Hugh Dickins   ksm: make !merge_...
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
  		/*
  		 * Whereas stale stable_nodes on the stable_tree itself
  		 * get pruned in the regular course of stable_tree_search(),
  		 * those moved out to the migrate_nodes list can accumulate:
  		 * so prune them once before each full scan.
  		 */
  		if (!ksm_merge_across_nodes) {
  			struct stable_node *stable_node;
  			struct list_head *this, *next;
  			struct page *page;
  
  			list_for_each_safe(this, next, &migrate_nodes) {
  				stable_node = list_entry(this,
  						struct stable_node, list);
  				page = get_ksm_page(stable_node, false);
  				if (page)
  					put_page(page);
  				cond_resched();
  			}
  		}
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1525
  		for (nid = 0; nid < ksm_nr_node_ids; nid++)
90bd6fd31   Petr Holasek   ksm: allow trees ...
1526
  			root_unstable_tree[nid] = RB_ROOT;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1527
1528
1529
1530
1531
  
  		spin_lock(&ksm_mmlist_lock);
  		slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
  		ksm_scan.mm_slot = slot;
  		spin_unlock(&ksm_mmlist_lock);
2b472611a   Hugh Dickins   ksm: fix NULL poi...
1532
1533
1534
1535
1536
1537
  		/*
  		 * Although we tested list_empty() above, a racing __ksm_exit
  		 * of the last mm on the list may have removed it since then.
  		 */
  		if (slot == &ksm_mm_head)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1538
1539
  next_mm:
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
1540
  		ksm_scan.rmap_list = &slot->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1541
1542
1543
1544
  	}
  
  	mm = slot->mm;
  	down_read(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1545
1546
1547
1548
1549
1550
  	if (ksm_test_exit(mm))
  		vma = NULL;
  	else
  		vma = find_vma(mm, ksm_scan.address);
  
  	for (; vma; vma = vma->vm_next) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1551
1552
1553
1554
1555
1556
1557
1558
  		if (!(vma->vm_flags & VM_MERGEABLE))
  			continue;
  		if (ksm_scan.address < vma->vm_start)
  			ksm_scan.address = vma->vm_start;
  		if (!vma->anon_vma)
  			ksm_scan.address = vma->vm_end;
  
  		while (ksm_scan.address < vma->vm_end) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
1559
1560
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1561
  			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
21ae5b017   Andrea Arcangeli   thp: skip transhu...
1562
1563
1564
1565
1566
  			if (IS_ERR_OR_NULL(*page)) {
  				ksm_scan.address += PAGE_SIZE;
  				cond_resched();
  				continue;
  			}
29ad768cf   Andrea Arcangeli   thp: KSM on THP
1567
1568
  			if (PageAnon(*page) ||
  			    page_trans_compound_anon(*page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1569
1570
1571
  				flush_anon_page(vma, *page, ksm_scan.address);
  				flush_dcache_page(*page);
  				rmap_item = get_next_rmap_item(slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
1572
  					ksm_scan.rmap_list, ksm_scan.address);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1573
  				if (rmap_item) {
6514d511d   Hugh Dickins   ksm: singly-linke...
1574
1575
  					ksm_scan.rmap_list =
  							&rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1576
1577
1578
1579
1580
1581
  					ksm_scan.address += PAGE_SIZE;
  				} else
  					put_page(*page);
  				up_read(&mm->mmap_sem);
  				return rmap_item;
  			}
21ae5b017   Andrea Arcangeli   thp: skip transhu...
1582
  			put_page(*page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1583
1584
1585
1586
  			ksm_scan.address += PAGE_SIZE;
  			cond_resched();
  		}
  	}
9ba692948   Hugh Dickins   ksm: fix oom dead...
1587
1588
  	if (ksm_test_exit(mm)) {
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
1589
  		ksm_scan.rmap_list = &slot->rmap_list;
9ba692948   Hugh Dickins   ksm: fix oom dead...
1590
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1591
1592
1593
1594
  	/*
  	 * Nuke all the rmap_items that are above this current rmap:
  	 * because there were no VM_MERGEABLE vmas with such addresses.
  	 */
6514d511d   Hugh Dickins   ksm: singly-linke...
1595
  	remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1596
1597
  
  	spin_lock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
1598
1599
1600
1601
1602
1603
1604
  	ksm_scan.mm_slot = list_entry(slot->mm_list.next,
  						struct mm_slot, mm_list);
  	if (ksm_scan.address == 0) {
  		/*
  		 * We've completed a full scan of all vmas, holding mmap_sem
  		 * throughout, and found no VM_MERGEABLE: so do the same as
  		 * __ksm_exit does to remove this mm from all our lists now.
9ba692948   Hugh Dickins   ksm: fix oom dead...
1605
1606
1607
1608
  		 * This applies either when cleaning up after __ksm_exit
  		 * (but beware: we can reach here even before __ksm_exit),
  		 * or when all VM_MERGEABLE areas have been unmapped (and
  		 * mmap_sem then protects against race with MADV_MERGEABLE).
cd551f975   Hugh Dickins   ksm: distribute r...
1609
  		 */
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
1610
  		hash_del(&slot->link);
cd551f975   Hugh Dickins   ksm: distribute r...
1611
  		list_del(&slot->mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1612
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
1613
1614
  		free_mm_slot(slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1615
1616
1617
1618
1619
  		up_read(&mm->mmap_sem);
  		mmdrop(mm);
  	} else {
  		spin_unlock(&ksm_mmlist_lock);
  		up_read(&mm->mmap_sem);
cd551f975   Hugh Dickins   ksm: distribute r...
1620
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1621
1622
  
  	/* Repeat until we've completed scanning the whole list */
cd551f975   Hugh Dickins   ksm: distribute r...
1623
  	slot = ksm_scan.mm_slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1624
1625
  	if (slot != &ksm_mm_head)
  		goto next_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
  	ksm_scan.seqnr++;
  	return NULL;
  }
  
  /**
   * ksm_do_scan  - the ksm scanner main worker function.
   * @scan_npages - number of pages we want to scan before we return.
   */
  static void ksm_do_scan(unsigned int scan_npages)
  {
  	struct rmap_item *rmap_item;
22eccdd7d   Dan Carpenter   ksm: check for ER...
1637
  	struct page *uninitialized_var(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1638

878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1639
  	while (scan_npages-- && likely(!freezing(current))) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1640
1641
1642
1643
  		cond_resched();
  		rmap_item = scan_get_next_rmap_item(&page);
  		if (!rmap_item)
  			return;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1644
  		cmp_and_merge_page(page, rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1645
1646
1647
  		put_page(page);
  	}
  }
6e1583842   Hugh Dickins   ksm: keep quiet w...
1648
1649
1650
1651
  static int ksmd_should_run(void)
  {
  	return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1652
1653
  static int ksm_scan_thread(void *nothing)
  {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1654
  	set_freezable();
339aa6246   Izik Eidus   ksm: change ksm n...
1655
  	set_user_nice(current, 5);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1656
1657
  
  	while (!kthread_should_stop()) {
6e1583842   Hugh Dickins   ksm: keep quiet w...
1658
  		mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1659
  		wait_while_offlining();
6e1583842   Hugh Dickins   ksm: keep quiet w...
1660
  		if (ksmd_should_run())
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1661
  			ksm_do_scan(ksm_thread_pages_to_scan);
6e1583842   Hugh Dickins   ksm: keep quiet w...
1662
  		mutex_unlock(&ksm_thread_mutex);
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1663
  		try_to_freeze();
6e1583842   Hugh Dickins   ksm: keep quiet w...
1664
  		if (ksmd_should_run()) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1665
1666
1667
  			schedule_timeout_interruptible(
  				msecs_to_jiffies(ksm_thread_sleep_millisecs));
  		} else {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1668
  			wait_event_freezable(ksm_thread_wait,
6e1583842   Hugh Dickins   ksm: keep quiet w...
1669
  				ksmd_should_run() || kthread_should_stop());
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1670
1671
1672
1673
  		}
  	}
  	return 0;
  }
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1674
1675
1676
1677
  int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
  		unsigned long end, int advice, unsigned long *vm_flags)
  {
  	struct mm_struct *mm = vma->vm_mm;
d952b7913   Hugh Dickins   ksm: fix endless ...
1678
  	int err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1679
1680
1681
1682
1683
1684
1685
1686
  
  	switch (advice) {
  	case MADV_MERGEABLE:
  		/*
  		 * Be somewhat over-protective for now!
  		 */
  		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
  				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
0661a3361   Kirill A. Shutemov   mm: remove rest u...
1687
  				 VM_HUGETLB | VM_MIXEDMAP))
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1688
  			return 0;		/* just ignore the advice */
cc2383ec0   Konstantin Khlebnikov   mm: introduce arc...
1689
1690
1691
1692
  #ifdef VM_SAO
  		if (*vm_flags & VM_SAO)
  			return 0;
  #endif
d952b7913   Hugh Dickins   ksm: fix endless ...
1693
1694
1695
1696
1697
  		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
  			err = __ksm_enter(mm);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1698
1699
1700
1701
1702
1703
1704
  
  		*vm_flags |= VM_MERGEABLE;
  		break;
  
  	case MADV_UNMERGEABLE:
  		if (!(*vm_flags & VM_MERGEABLE))
  			return 0;		/* just ignore the advice */
d952b7913   Hugh Dickins   ksm: fix endless ...
1705
1706
1707
1708
1709
  		if (vma->anon_vma) {
  			err = unmerge_ksm_pages(vma, start, end);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
  
  		*vm_flags &= ~VM_MERGEABLE;
  		break;
  	}
  
  	return 0;
  }
  
  int __ksm_enter(struct mm_struct *mm)
  {
6e1583842   Hugh Dickins   ksm: keep quiet w...
1720
1721
1722
1723
  	struct mm_slot *mm_slot;
  	int needs_wakeup;
  
  	mm_slot = alloc_mm_slot();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1724
1725
  	if (!mm_slot)
  		return -ENOMEM;
6e1583842   Hugh Dickins   ksm: keep quiet w...
1726
1727
  	/* Check ksm_run too?  Would need tighter locking */
  	needs_wakeup = list_empty(&ksm_mm_head.mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1728
1729
1730
  	spin_lock(&ksm_mmlist_lock);
  	insert_to_mm_slots_hash(mm, mm_slot);
  	/*
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1731
1732
  	 * When KSM_RUN_MERGE (or KSM_RUN_STOP),
  	 * insert just behind the scanning cursor, to let the area settle
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1733
1734
  	 * down a little; when fork is followed by immediate exec, we don't
  	 * want ksmd to waste time setting up and tearing down an rmap_list.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1735
1736
1737
1738
  	 *
  	 * But when KSM_RUN_UNMERGE, it's important to insert ahead of its
  	 * scanning cursor, otherwise KSM pages in newly forked mms will be
  	 * missed: then we might as well insert at the end of the list.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1739
  	 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1740
1741
1742
1743
  	if (ksm_run & KSM_RUN_UNMERGE)
  		list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
  	else
  		list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1744
  	spin_unlock(&ksm_mmlist_lock);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1745
  	set_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1746
  	atomic_inc(&mm->mm_count);
6e1583842   Hugh Dickins   ksm: keep quiet w...
1747
1748
1749
  
  	if (needs_wakeup)
  		wake_up_interruptible(&ksm_thread_wait);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1750
1751
  	return 0;
  }
1c2fb7a4c   Andrea Arcangeli   ksm: fix deadlock...
1752
  void __ksm_exit(struct mm_struct *mm)
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1753
  {
cd551f975   Hugh Dickins   ksm: distribute r...
1754
  	struct mm_slot *mm_slot;
9ba692948   Hugh Dickins   ksm: fix oom dead...
1755
  	int easy_to_free = 0;
cd551f975   Hugh Dickins   ksm: distribute r...
1756

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1757
  	/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
1758
1759
1760
1761
1762
1763
  	 * This process is exiting: if it's straightforward (as is the
  	 * case when ksmd was never running), free mm_slot immediately.
  	 * But if it's at the cursor or has rmap_items linked to it, use
  	 * mmap_sem to synchronize with any break_cows before pagetables
  	 * are freed, and leave the mm_slot on the list for ksmd to free.
  	 * Beware: ksm may already have noticed it exiting and freed the slot.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1764
  	 */
9ba692948   Hugh Dickins   ksm: fix oom dead...
1765

cd551f975   Hugh Dickins   ksm: distribute r...
1766
1767
  	spin_lock(&ksm_mmlist_lock);
  	mm_slot = get_mm_slot(mm);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1768
  	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
6514d511d   Hugh Dickins   ksm: singly-linke...
1769
  		if (!mm_slot->rmap_list) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
1770
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1771
1772
1773
1774
1775
1776
  			list_del(&mm_slot->mm_list);
  			easy_to_free = 1;
  		} else {
  			list_move(&mm_slot->mm_list,
  				  &ksm_scan.mm_slot->mm_list);
  		}
cd551f975   Hugh Dickins   ksm: distribute r...
1777
  	}
cd551f975   Hugh Dickins   ksm: distribute r...
1778
  	spin_unlock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1779
1780
1781
1782
1783
  	if (easy_to_free) {
  		free_mm_slot(mm_slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
  		mmdrop(mm);
  	} else if (mm_slot) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
1784
1785
  		down_write(&mm->mmap_sem);
  		up_write(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1786
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1787
  }
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1788
  struct page *ksm_might_need_to_copy(struct page *page,
5ad646880   Hugh Dickins   ksm: let shared p...
1789
1790
  			struct vm_area_struct *vma, unsigned long address)
  {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1791
  	struct anon_vma *anon_vma = page_anon_vma(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1792
  	struct page *new_page;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
  	if (PageKsm(page)) {
  		if (page_stable_node(page) &&
  		    !(ksm_run & KSM_RUN_UNMERGE))
  			return page;	/* no need to copy it */
  	} else if (!anon_vma) {
  		return page;		/* no need to copy it */
  	} else if (anon_vma->root == vma->anon_vma->root &&
  		 page->index == linear_page_index(vma, address)) {
  		return page;		/* still no need to copy it */
  	}
  	if (!PageUptodate(page))
  		return page;		/* let do_swap_page report the error */
5ad646880   Hugh Dickins   ksm: let shared p...
1805
1806
1807
1808
1809
1810
  	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
  	if (new_page) {
  		copy_user_highpage(new_page, page, address, vma);
  
  		SetPageDirty(new_page);
  		__SetPageUptodate(new_page);
5ad646880   Hugh Dickins   ksm: let shared p...
1811
  		__set_page_locked(new_page);
5ad646880   Hugh Dickins   ksm: let shared p...
1812
  	}
5ad646880   Hugh Dickins   ksm: let shared p...
1813
1814
  	return new_page;
  }
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1815
  int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1816
1817
  {
  	struct stable_node *stable_node;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1818
1819
1820
  	struct rmap_item *rmap_item;
  	int ret = SWAP_AGAIN;
  	int search_new_forks = 0;
309381fea   Sasha Levin   mm: dump page whe...
1821
  	VM_BUG_ON_PAGE(!PageKsm(page), page);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
1822
1823
1824
1825
1826
  
  	/*
  	 * Rely on the page lock to protect against concurrent modifications
  	 * to that page's node of the stable tree.
  	 */
309381fea   Sasha Levin   mm: dump page whe...
1827
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1828
1829
1830
1831
1832
  
  	stable_node = page_stable_node(page);
  	if (!stable_node)
  		return ret;
  again:
b67bfe0d4   Sasha Levin   hlist: drop the n...
1833
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1834
  		struct anon_vma *anon_vma = rmap_item->anon_vma;
5beb49305   Rik van Riel   mm: change anon_v...
1835
  		struct anon_vma_chain *vmac;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1836
  		struct vm_area_struct *vma;
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
1837
  		anon_vma_lock_read(anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
1838
1839
  		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
  					       0, ULONG_MAX) {
5beb49305   Rik van Riel   mm: change anon_v...
1840
  			vma = vmac->vma;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
  			if (rmap_item->address < vma->vm_start ||
  			    rmap_item->address >= vma->vm_end)
  				continue;
  			/*
  			 * Initially we examine only the vma which covers this
  			 * rmap_item; but later, if there is still work to do,
  			 * we examine covering vmas in other mms: in case they
  			 * were forked from the original since ksmd passed.
  			 */
  			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
  				continue;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1852
1853
  			if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  				continue;
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1854
1855
  			ret = rwc->rmap_one(page, vma,
  					rmap_item->address, rwc->arg);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1856
  			if (ret != SWAP_AGAIN) {
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
1857
  				anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1858
1859
  				goto out;
  			}
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1860
1861
1862
1863
  			if (rwc->done && rwc->done(page)) {
  				anon_vma_unlock_read(anon_vma);
  				goto out;
  			}
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1864
  		}
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
1865
  		anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1866
1867
1868
1869
1870
1871
  	}
  	if (!search_new_forks++)
  		goto again;
  out:
  	return ret;
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
1872
  #ifdef CONFIG_MIGRATION
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1873
1874
1875
  void ksm_migrate_page(struct page *newpage, struct page *oldpage)
  {
  	struct stable_node *stable_node;
309381fea   Sasha Levin   mm: dump page whe...
1876
1877
1878
  	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
  	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
  	VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1879
1880
1881
  
  	stable_node = page_stable_node(newpage);
  	if (stable_node) {
309381fea   Sasha Levin   mm: dump page whe...
1882
  		VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage);
62b61f611   Hugh Dickins   ksm: memory hotre...
1883
  		stable_node->kpfn = page_to_pfn(newpage);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1884
1885
1886
1887
1888
1889
1890
1891
  		/*
  		 * newpage->mapping was set in advance; now we need smp_wmb()
  		 * to make sure that the new stable_node->kpfn is visible
  		 * to get_ksm_page() before it can see that oldpage->mapping
  		 * has gone stale (or that PageSwapCache has been cleared).
  		 */
  		smp_wmb();
  		set_page_stable_node(oldpage, NULL);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1892
1893
1894
  	}
  }
  #endif /* CONFIG_MIGRATION */
62b61f611   Hugh Dickins   ksm: memory hotre...
1895
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1896
1897
1898
1899
1900
  static void wait_while_offlining(void)
  {
  	while (ksm_run & KSM_RUN_OFFLINE) {
  		mutex_unlock(&ksm_thread_mutex);
  		wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
743162013   NeilBrown   sched: Remove pro...
1901
  			    TASK_UNINTERRUPTIBLE);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1902
1903
1904
  		mutex_lock(&ksm_thread_mutex);
  	}
  }
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1905
1906
  static void ksm_check_stable_tree(unsigned long start_pfn,
  				  unsigned long end_pfn)
62b61f611   Hugh Dickins   ksm: memory hotre...
1907
  {
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1908
  	struct stable_node *stable_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1909
  	struct list_head *this, *next;
62b61f611   Hugh Dickins   ksm: memory hotre...
1910
  	struct rb_node *node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1911
  	int nid;
62b61f611   Hugh Dickins   ksm: memory hotre...
1912

ef53d16cd   Hugh Dickins   ksm: allocate roo...
1913
1914
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
  		node = rb_first(root_stable_tree + nid);
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1915
  		while (node) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1916
1917
  			stable_node = rb_entry(node, struct stable_node, node);
  			if (stable_node->kpfn >= start_pfn &&
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1918
1919
1920
1921
1922
1923
  			    stable_node->kpfn < end_pfn) {
  				/*
  				 * Don't get_ksm_page, page has already gone:
  				 * which is why we keep kpfn instead of page*
  				 */
  				remove_node_from_stable_tree(stable_node);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1924
  				node = rb_first(root_stable_tree + nid);
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1925
1926
1927
  			} else
  				node = rb_next(node);
  			cond_resched();
90bd6fd31   Petr Holasek   ksm: allow trees ...
1928
  		}
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1929
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1930
1931
1932
1933
1934
1935
1936
  	list_for_each_safe(this, next, &migrate_nodes) {
  		stable_node = list_entry(this, struct stable_node, list);
  		if (stable_node->kpfn >= start_pfn &&
  		    stable_node->kpfn < end_pfn)
  			remove_node_from_stable_tree(stable_node);
  		cond_resched();
  	}
62b61f611   Hugh Dickins   ksm: memory hotre...
1937
1938
1939
1940
1941
1942
  }
  
  static int ksm_memory_callback(struct notifier_block *self,
  			       unsigned long action, void *arg)
  {
  	struct memory_notify *mn = arg;
62b61f611   Hugh Dickins   ksm: memory hotre...
1943
1944
1945
1946
  
  	switch (action) {
  	case MEM_GOING_OFFLINE:
  		/*
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1947
1948
1949
1950
1951
  		 * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
  		 * and remove_all_stable_nodes() while memory is going offline:
  		 * it is unsafe for them to touch the stable tree at this time.
  		 * But unmerge_ksm_pages(), rmap lookups and other entry points
  		 * which do not need the ksm_thread_mutex are all safe.
62b61f611   Hugh Dickins   ksm: memory hotre...
1952
  		 */
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1953
1954
1955
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run |= KSM_RUN_OFFLINE;
  		mutex_unlock(&ksm_thread_mutex);
62b61f611   Hugh Dickins   ksm: memory hotre...
1956
1957
1958
1959
1960
1961
  		break;
  
  	case MEM_OFFLINE:
  		/*
  		 * Most of the work is done by page migration; but there might
  		 * be a few stable_nodes left over, still pointing to struct
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1962
1963
1964
  		 * pages which have been offlined: prune those from the tree,
  		 * otherwise get_ksm_page() might later try to access a
  		 * non-existent struct page.
62b61f611   Hugh Dickins   ksm: memory hotre...
1965
  		 */
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1966
1967
  		ksm_check_stable_tree(mn->start_pfn,
  				      mn->start_pfn + mn->nr_pages);
62b61f611   Hugh Dickins   ksm: memory hotre...
1968
1969
1970
  		/* fallthrough */
  
  	case MEM_CANCEL_OFFLINE:
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1971
1972
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run &= ~KSM_RUN_OFFLINE;
62b61f611   Hugh Dickins   ksm: memory hotre...
1973
  		mutex_unlock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1974
1975
1976
  
  		smp_mb();	/* wake_up_bit advises this */
  		wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE));
62b61f611   Hugh Dickins   ksm: memory hotre...
1977
1978
1979
1980
  		break;
  	}
  	return NOTIFY_OK;
  }
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1981
1982
1983
1984
  #else
  static void wait_while_offlining(void)
  {
  }
62b61f611   Hugh Dickins   ksm: memory hotre...
1985
  #endif /* CONFIG_MEMORY_HOTREMOVE */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
1986
1987
1988
1989
  #ifdef CONFIG_SYSFS
  /*
   * This all compiles without CONFIG_SYSFS, but is a waste of space.
   */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
  #define KSM_ATTR_RO(_name) \
  	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
  #define KSM_ATTR(_name) \
  	static struct kobj_attribute _name##_attr = \
  		__ATTR(_name, 0644, _name##_show, _name##_store)
  
  static ssize_t sleep_millisecs_show(struct kobject *kobj,
  				    struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_sleep_millisecs);
  }
  
  static ssize_t sleep_millisecs_store(struct kobject *kobj,
  				     struct kobj_attribute *attr,
  				     const char *buf, size_t count)
  {
  	unsigned long msecs;
  	int err;
3dbb95f78   Jingoo Han   mm: replace stric...
2009
  	err = kstrtoul(buf, 10, &msecs);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
  	if (err || msecs > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_sleep_millisecs = msecs;
  
  	return count;
  }
  KSM_ATTR(sleep_millisecs);
  
  static ssize_t pages_to_scan_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_pages_to_scan);
  }
  
  static ssize_t pages_to_scan_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long nr_pages;
3dbb95f78   Jingoo Han   mm: replace stric...
2032
  	err = kstrtoul(buf, 10, &nr_pages);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
  	if (err || nr_pages > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_pages_to_scan = nr_pages;
  
  	return count;
  }
  KSM_ATTR(pages_to_scan);
  
  static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
  			char *buf)
  {
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2045
2046
  	return sprintf(buf, "%lu
  ", ksm_run);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2047
2048
2049
2050
2051
2052
2053
  }
  
  static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
  			 const char *buf, size_t count)
  {
  	int err;
  	unsigned long flags;
3dbb95f78   Jingoo Han   mm: replace stric...
2054
  	err = kstrtoul(buf, 10, &flags);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2055
2056
2057
2058
2059
2060
2061
2062
  	if (err || flags > UINT_MAX)
  		return -EINVAL;
  	if (flags > KSM_RUN_UNMERGE)
  		return -EINVAL;
  
  	/*
  	 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
  	 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
d0f209f68   Hugh Dickins   ksm: remove unswa...
2063
2064
  	 * breaking COW to free the pages_shared (but leaves mm_slots
  	 * on the list for when ksmd may be set running again).
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2065
2066
2067
  	 */
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2068
  	wait_while_offlining();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2069
2070
  	if (ksm_run != flags) {
  		ksm_run = flags;
d952b7913   Hugh Dickins   ksm: fix endless ...
2071
  		if (flags & KSM_RUN_UNMERGE) {
e1e12d2f3   David Rientjes   mm, oom: fix race...
2072
  			set_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2073
  			err = unmerge_and_remove_all_rmap_items();
e1e12d2f3   David Rientjes   mm, oom: fix race...
2074
  			clear_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2075
2076
2077
2078
2079
  			if (err) {
  				ksm_run = KSM_RUN_STOP;
  				count = err;
  			}
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2080
2081
2082
2083
2084
2085
2086
2087
2088
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	if (flags & KSM_RUN_MERGE)
  		wake_up_interruptible(&ksm_thread_wait);
  
  	return count;
  }
  KSM_ATTR(run);
90bd6fd31   Petr Holasek   ksm: allow trees ...
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
  #ifdef CONFIG_NUMA
  static ssize_t merge_across_nodes_show(struct kobject *kobj,
  				struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_merge_across_nodes);
  }
  
  static ssize_t merge_across_nodes_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long knob;
  
  	err = kstrtoul(buf, 10, &knob);
  	if (err)
  		return err;
  	if (knob > 1)
  		return -EINVAL;
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2111
  	wait_while_offlining();
90bd6fd31   Petr Holasek   ksm: allow trees ...
2112
  	if (ksm_merge_across_nodes != knob) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2113
  		if (ksm_pages_shared || remove_all_stable_nodes())
90bd6fd31   Petr Holasek   ksm: allow trees ...
2114
  			err = -EBUSY;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2115
2116
2117
2118
2119
2120
2121
2122
2123
  		else if (root_stable_tree == one_stable_tree) {
  			struct rb_root *buf;
  			/*
  			 * This is the first time that we switch away from the
  			 * default of merging across nodes: must now allocate
  			 * a buffer to hold as many roots as may be needed.
  			 * Allocate stable and unstable together:
  			 * MAXSMP NODES_SHIFT 10 will use 16kB.
  			 */
bafe1e144   Joe Perches   ksm: remove redun...
2124
2125
  			buf = kcalloc(nr_node_ids + nr_node_ids, sizeof(*buf),
  				      GFP_KERNEL);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
  			/* Let us assume that RB_ROOT is NULL is zero */
  			if (!buf)
  				err = -ENOMEM;
  			else {
  				root_stable_tree = buf;
  				root_unstable_tree = buf + nr_node_ids;
  				/* Stable tree is empty but not the unstable */
  				root_unstable_tree[0] = one_unstable_tree[0];
  			}
  		}
  		if (!err) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
2137
  			ksm_merge_across_nodes = knob;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2138
2139
  			ksm_nr_node_ids = knob ? 1 : nr_node_ids;
  		}
90bd6fd31   Petr Holasek   ksm: allow trees ...
2140
2141
2142
2143
2144
2145
2146
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	return err ? err : count;
  }
  KSM_ATTR(merge_across_nodes);
  #endif
b40282603   Hugh Dickins   ksm: rename kerne...
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
  static ssize_t pages_shared_show(struct kobject *kobj,
  				 struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_shared);
  }
  KSM_ATTR_RO(pages_shared);
  
  static ssize_t pages_sharing_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
e178dfde3   Hugh Dickins   ksm: move pages_s...
2158
2159
  	return sprintf(buf, "%lu
  ", ksm_pages_sharing);
b40282603   Hugh Dickins   ksm: rename kerne...
2160
2161
  }
  KSM_ATTR_RO(pages_sharing);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
  static ssize_t pages_unshared_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_unshared);
  }
  KSM_ATTR_RO(pages_unshared);
  
  static ssize_t pages_volatile_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	long ksm_pages_volatile;
  
  	ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
  				- ksm_pages_sharing - ksm_pages_unshared;
  	/*
  	 * It was not worth any locking to calculate that statistic,
  	 * but it might therefore sometimes be negative: conceal that.
  	 */
  	if (ksm_pages_volatile < 0)
  		ksm_pages_volatile = 0;
  	return sprintf(buf, "%ld
  ", ksm_pages_volatile);
  }
  KSM_ATTR_RO(pages_volatile);
  
  static ssize_t full_scans_show(struct kobject *kobj,
  			       struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_scan.seqnr);
  }
  KSM_ATTR_RO(full_scans);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2195
2196
2197
2198
  static struct attribute *ksm_attrs[] = {
  	&sleep_millisecs_attr.attr,
  	&pages_to_scan_attr.attr,
  	&run_attr.attr,
b40282603   Hugh Dickins   ksm: rename kerne...
2199
2200
  	&pages_shared_attr.attr,
  	&pages_sharing_attr.attr,
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2201
2202
2203
  	&pages_unshared_attr.attr,
  	&pages_volatile_attr.attr,
  	&full_scans_attr.attr,
90bd6fd31   Petr Holasek   ksm: allow trees ...
2204
2205
2206
  #ifdef CONFIG_NUMA
  	&merge_across_nodes_attr.attr,
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2207
2208
2209
2210
2211
2212
2213
  	NULL,
  };
  
  static struct attribute_group ksm_attr_group = {
  	.attrs = ksm_attrs,
  	.name = "ksm",
  };
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2214
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2215
2216
2217
2218
2219
2220
2221
2222
2223
  
  static int __init ksm_init(void)
  {
  	struct task_struct *ksm_thread;
  	int err;
  
  	err = ksm_slab_init();
  	if (err)
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2224
2225
  	ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
  	if (IS_ERR(ksm_thread)) {
25acde317   Paul McQuade   mm: ksm use pr_er...
2226
2227
  		pr_err("ksm: creating kthread failed
  ");
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2228
  		err = PTR_ERR(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
2229
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2230
  	}
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2231
  #ifdef CONFIG_SYSFS
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2232
2233
  	err = sysfs_create_group(mm_kobj, &ksm_attr_group);
  	if (err) {
25acde317   Paul McQuade   mm: ksm use pr_er...
2234
2235
  		pr_err("ksm: register sysfs failed
  ");
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2236
  		kthread_stop(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
2237
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2238
  	}
c73602ad3   Hugh Dickins   ksm: more on defa...
2239
2240
  #else
  	ksm_run = KSM_RUN_MERGE;	/* no way for user to start it */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2241
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2242

62b61f611   Hugh Dickins   ksm: memory hotre...
2243
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2244
  	/* There is no significance to this priority 100 */
62b61f611   Hugh Dickins   ksm: memory hotre...
2245
2246
  	hotplug_memory_notifier(ksm_memory_callback, 100);
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2247
  	return 0;
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
2248
  out_free:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2249
2250
2251
  	ksm_slab_free();
  out:
  	return err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2252
  }
a64fb3cd6   Paul Gortmaker   mm: audit/fix non...
2253
  subsys_initcall(ksm_init);