Blame view

mm/ksm.c 63.4 KB
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2
3
4
5
6
   * Memory merging support.
   *
   * This code enables dynamic sharing of identical pages found in different
   * memory areas, even if they are not shared by fork()
   *
36b2528dc   Izik Eidus   ksm: change copyr...
7
   * Copyright (C) 2008-2009 Red Hat, Inc.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
8
9
10
11
   * Authors:
   *	Izik Eidus
   *	Andrea Arcangeli
   *	Chris Wright
36b2528dc   Izik Eidus   ksm: change copyr...
12
   *	Hugh Dickins
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
13
14
   *
   * This work is licensed under the terms of the GNU GPL, version 2.
f8af4da3b   Hugh Dickins   ksm: the mm inter...
15
16
17
   */
  
  #include <linux/errno.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
18
19
  #include <linux/mm.h>
  #include <linux/fs.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
20
  #include <linux/mman.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
21
22
23
24
25
26
27
28
29
30
31
  #include <linux/sched.h>
  #include <linux/rwsem.h>
  #include <linux/pagemap.h>
  #include <linux/rmap.h>
  #include <linux/spinlock.h>
  #include <linux/jhash.h>
  #include <linux/delay.h>
  #include <linux/kthread.h>
  #include <linux/wait.h>
  #include <linux/slab.h>
  #include <linux/rbtree.h>
62b61f611   Hugh Dickins   ksm: memory hotre...
32
  #include <linux/memory.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
33
  #include <linux/mmu_notifier.h>
2c6854fda   Izik Eidus   ksm: change defau...
34
  #include <linux/swap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
35
  #include <linux/ksm.h>
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
36
  #include <linux/hashtable.h>
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
37
  #include <linux/freezer.h>
72788c385   David Rientjes   oom: replace PF_O...
38
  #include <linux/oom.h>
90bd6fd31   Petr Holasek   ksm: allow trees ...
39
  #include <linux/numa.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
40

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
41
  #include <asm/tlbflush.h>
73848b468   Hugh Dickins   ksm: fix mlockfre...
42
  #include "internal.h"
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
43

e850dcf53   Hugh Dickins   ksm: trivial tidyups
44
45
46
47
48
49
50
  #ifdef CONFIG_NUMA
  #define NUMA(x)		(x)
  #define DO_NUMA(x)	do { (x); } while (0)
  #else
  #define NUMA(x)		(0)
  #define DO_NUMA(x)	do { } while (0)
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  /*
   * A few notes about the KSM scanning process,
   * to make it easier to understand the data structures below:
   *
   * In order to reduce excessive scanning, KSM sorts the memory pages by their
   * contents into a data structure that holds pointers to the pages' locations.
   *
   * Since the contents of the pages may change at any moment, KSM cannot just
   * insert the pages into a normal sorted tree and expect it to find anything.
   * Therefore KSM uses two data structures - the stable and the unstable tree.
   *
   * The stable tree holds pointers to all the merged pages (ksm pages), sorted
   * by their contents.  Because each such page is write-protected, searching on
   * this tree is fully assured to be working (except when pages are unmapped),
   * and therefore this tree is called the stable tree.
   *
   * In addition to the stable tree, KSM uses a second data structure called the
   * unstable tree: this tree holds pointers to pages which have been found to
   * be "unchanged for a period of time".  The unstable tree sorts these pages
   * by their contents, but since they are not write-protected, KSM cannot rely
   * upon the unstable tree to work correctly - the unstable tree is liable to
   * be corrupted as its contents are modified, and so it is called unstable.
   *
   * KSM solves this problem by several techniques:
   *
   * 1) The unstable tree is flushed every time KSM completes scanning all
   *    memory areas, and then the tree is rebuilt again from the beginning.
   * 2) KSM will only insert into the unstable tree, pages whose hash value
   *    has not changed since the previous scan of all memory areas.
   * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
   *    colors of the nodes and not on their contents, assuring that even when
   *    the tree gets "corrupted" it won't get out of balance, so scanning time
   *    remains the same (also, searching and inserting nodes in an rbtree uses
   *    the same algorithm, so we have no overhead when we flush and rebuild).
   * 4) KSM never flushes the stable tree, which means that even if it were to
   *    take 10 attempts to find a page in the unstable tree, once it is found,
   *    it is secured in the stable tree.  (When we scan a new page, we first
   *    compare it against the stable tree, and then against the unstable tree.)
8fdb3dbf0   Hugh Dickins   ksm: add some com...
89
90
91
   *
   * If the merge_across_nodes tunable is unset, then KSM maintains multiple
   * stable trees and multiple unstable trees: one of each for each NUMA node.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
92
93
94
95
96
97
   */
  
  /**
   * struct mm_slot - ksm information per mm that is being scanned
   * @link: link to the mm_slots hash list
   * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
6514d511d   Hugh Dickins   ksm: singly-linke...
98
   * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
99
100
101
102
103
   * @mm: the mm that this information is valid for
   */
  struct mm_slot {
  	struct hlist_node link;
  	struct list_head mm_list;
6514d511d   Hugh Dickins   ksm: singly-linke...
104
  	struct rmap_item *rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
105
106
107
108
109
110
111
  	struct mm_struct *mm;
  };
  
  /**
   * struct ksm_scan - cursor for scanning
   * @mm_slot: the current mm_slot we are scanning
   * @address: the next address inside that to be scanned
6514d511d   Hugh Dickins   ksm: singly-linke...
112
   * @rmap_list: link to the next rmap to be scanned in the rmap_list
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
113
114
115
116
117
118
119
   * @seqnr: count of completed full scans (needed when removing unstable node)
   *
   * There is only the one ksm_scan instance of this cursor structure.
   */
  struct ksm_scan {
  	struct mm_slot *mm_slot;
  	unsigned long address;
6514d511d   Hugh Dickins   ksm: singly-linke...
120
  	struct rmap_item **rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
121
122
123
124
  	unsigned long seqnr;
  };
  
  /**
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
125
126
   * struct stable_node - node of the stable rbtree
   * @node: rb node of this ksm page in the stable tree
4146d2d67   Hugh Dickins   ksm: make !merge_...
127
128
   * @head: (overlaying parent) &migrate_nodes indicates temporarily on that list
   * @list: linked into migrate_nodes, pending placement in the proper node tree
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
129
   * @hlist: hlist head of rmap_items using this ksm page
4146d2d67   Hugh Dickins   ksm: make !merge_...
130
131
   * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
   * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
132
133
   */
  struct stable_node {
4146d2d67   Hugh Dickins   ksm: make !merge_...
134
135
136
137
138
139
140
  	union {
  		struct rb_node node;	/* when node of stable tree */
  		struct {		/* when listed for migration */
  			struct list_head *head;
  			struct list_head list;
  		};
  	};
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
141
  	struct hlist_head hlist;
62b61f611   Hugh Dickins   ksm: memory hotre...
142
  	unsigned long kpfn;
4146d2d67   Hugh Dickins   ksm: make !merge_...
143
144
145
  #ifdef CONFIG_NUMA
  	int nid;
  #endif
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
146
147
148
  };
  
  /**
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
149
   * struct rmap_item - reverse mapping item for virtual addresses
6514d511d   Hugh Dickins   ksm: singly-linke...
150
   * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
db114b83a   Hugh Dickins   ksm: hold anon_vm...
151
   * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
152
   * @nid: NUMA node id of unstable tree in which linked (may not match page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
153
154
155
   * @mm: the memory structure this rmap_item is pointing into
   * @address: the virtual address this rmap_item tracks (+ flags in low bits)
   * @oldchecksum: previous checksum of the page at that virtual address
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
156
157
158
   * @node: rb node of this rmap_item in the unstable tree
   * @head: pointer to stable_node heading this list in the stable tree
   * @hlist: link into hlist of rmap_items hanging off that stable_node
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
159
160
   */
  struct rmap_item {
6514d511d   Hugh Dickins   ksm: singly-linke...
161
  	struct rmap_item *rmap_list;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
162
163
164
165
166
167
  	union {
  		struct anon_vma *anon_vma;	/* when stable */
  #ifdef CONFIG_NUMA
  		int nid;		/* when node of unstable tree */
  #endif
  	};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
168
169
  	struct mm_struct *mm;
  	unsigned long address;		/* + low bits used for flags below */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
170
  	unsigned int oldchecksum;	/* when unstable */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
171
  	union {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
172
173
174
175
176
  		struct rb_node node;	/* when node of unstable tree */
  		struct {		/* when listed from stable tree */
  			struct stable_node *head;
  			struct hlist_node hlist;
  		};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
177
178
179
180
  	};
  };
  
  #define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
181
182
  #define UNSTABLE_FLAG	0x100	/* is a node of the unstable tree */
  #define STABLE_FLAG	0x200	/* is listed from the stable tree */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
183
184
  
  /* The stable and unstable tree heads */
ef53d16cd   Hugh Dickins   ksm: allocate roo...
185
186
187
188
  static struct rb_root one_stable_tree[1] = { RB_ROOT };
  static struct rb_root one_unstable_tree[1] = { RB_ROOT };
  static struct rb_root *root_stable_tree = one_stable_tree;
  static struct rb_root *root_unstable_tree = one_unstable_tree;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
189

4146d2d67   Hugh Dickins   ksm: make !merge_...
190
191
  /* Recently migrated nodes of stable tree, pending proper placement */
  static LIST_HEAD(migrate_nodes);
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
192
193
  #define MM_SLOTS_HASH_BITS 10
  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
194
195
196
197
198
199
200
201
202
  
  static struct mm_slot ksm_mm_head = {
  	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
  };
  static struct ksm_scan ksm_scan = {
  	.mm_slot = &ksm_mm_head,
  };
  
  static struct kmem_cache *rmap_item_cache;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
203
  static struct kmem_cache *stable_node_cache;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
204
205
206
  static struct kmem_cache *mm_slot_cache;
  
  /* The number of nodes in the stable tree */
b40282603   Hugh Dickins   ksm: rename kerne...
207
  static unsigned long ksm_pages_shared;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
208

e178dfde3   Hugh Dickins   ksm: move pages_s...
209
  /* The number of page slots additionally sharing those nodes */
b40282603   Hugh Dickins   ksm: rename kerne...
210
  static unsigned long ksm_pages_sharing;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
211

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
212
213
214
215
216
  /* The number of nodes in the unstable tree */
  static unsigned long ksm_pages_unshared;
  
  /* The number of rmap_items in use: to calculate pages_volatile */
  static unsigned long ksm_rmap_items;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
217
  /* Number of pages ksmd should scan in one batch */
2c6854fda   Izik Eidus   ksm: change defau...
218
  static unsigned int ksm_thread_pages_to_scan = 100;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
219
220
  
  /* Milliseconds ksmd should sleep between batches */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
221
  static unsigned int ksm_thread_sleep_millisecs = 20;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
222

e850dcf53   Hugh Dickins   ksm: trivial tidyups
223
  #ifdef CONFIG_NUMA
90bd6fd31   Petr Holasek   ksm: allow trees ...
224
225
  /* Zeroed when merging across nodes is not allowed */
  static unsigned int ksm_merge_across_nodes = 1;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
226
  static int ksm_nr_node_ids = 1;
e850dcf53   Hugh Dickins   ksm: trivial tidyups
227
228
  #else
  #define ksm_merge_across_nodes	1U
ef53d16cd   Hugh Dickins   ksm: allocate roo...
229
  #define ksm_nr_node_ids		1
e850dcf53   Hugh Dickins   ksm: trivial tidyups
230
  #endif
90bd6fd31   Petr Holasek   ksm: allow trees ...
231

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
232
233
234
  #define KSM_RUN_STOP	0
  #define KSM_RUN_MERGE	1
  #define KSM_RUN_UNMERGE	2
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
235
236
237
  #define KSM_RUN_OFFLINE	4
  static unsigned long ksm_run = KSM_RUN_STOP;
  static void wait_while_offlining(void);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
238
239
240
241
242
243
244
245
246
247
248
249
250
251
  
  static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
  static DEFINE_MUTEX(ksm_thread_mutex);
  static DEFINE_SPINLOCK(ksm_mmlist_lock);
  
  #define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
  		sizeof(struct __struct), __alignof__(struct __struct),\
  		(__flags), NULL)
  
  static int __init ksm_slab_init(void)
  {
  	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
  	if (!rmap_item_cache)
  		goto out;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
252
253
254
  	stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
  	if (!stable_node_cache)
  		goto out_free1;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
255
256
  	mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
  	if (!mm_slot_cache)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
257
  		goto out_free2;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
258
259
  
  	return 0;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
260
261
262
  out_free2:
  	kmem_cache_destroy(stable_node_cache);
  out_free1:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
263
264
265
266
267
268
269
270
  	kmem_cache_destroy(rmap_item_cache);
  out:
  	return -ENOMEM;
  }
  
  static void __init ksm_slab_free(void)
  {
  	kmem_cache_destroy(mm_slot_cache);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
271
  	kmem_cache_destroy(stable_node_cache);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
272
273
274
275
276
277
  	kmem_cache_destroy(rmap_item_cache);
  	mm_slot_cache = NULL;
  }
  
  static inline struct rmap_item *alloc_rmap_item(void)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
278
279
280
281
282
283
  	struct rmap_item *rmap_item;
  
  	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
  	if (rmap_item)
  		ksm_rmap_items++;
  	return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
284
285
286
287
  }
  
  static inline void free_rmap_item(struct rmap_item *rmap_item)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
288
  	ksm_rmap_items--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
289
290
291
  	rmap_item->mm = NULL;	/* debug safety */
  	kmem_cache_free(rmap_item_cache, rmap_item);
  }
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
292
293
294
295
296
297
298
299
300
  static inline struct stable_node *alloc_stable_node(void)
  {
  	return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
  }
  
  static inline void free_stable_node(struct stable_node *stable_node)
  {
  	kmem_cache_free(stable_node_cache, stable_node);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
301
302
303
304
305
306
307
308
309
310
311
  static inline struct mm_slot *alloc_mm_slot(void)
  {
  	if (!mm_slot_cache)	/* initialization failed */
  		return NULL;
  	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
  }
  
  static inline void free_mm_slot(struct mm_slot *mm_slot)
  {
  	kmem_cache_free(mm_slot_cache, mm_slot);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
312
313
  static struct mm_slot *get_mm_slot(struct mm_struct *mm)
  {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
314
  	struct mm_slot *slot;
b67bfe0d4   Sasha Levin   hlist: drop the n...
315
  	hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm)
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
316
317
  		if (slot->mm == mm)
  			return slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
318

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
319
320
321
322
323
324
  	return NULL;
  }
  
  static void insert_to_mm_slots_hash(struct mm_struct *mm,
  				    struct mm_slot *mm_slot)
  {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
325
  	mm_slot->mm = mm;
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
326
  	hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
327
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
328
  /*
a913e182a   Hugh Dickins   ksm: clean up obs...
329
330
331
332
333
334
335
336
337
338
339
340
341
   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
   * page tables after it has passed through ksm_exit() - which, if necessary,
   * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
   * a special flag: they can just back out as soon as mm_users goes to zero.
   * ksm_test_exit() is used throughout to make this test for exit: in some
   * places for correctness, in some places just to avoid unnecessary work.
   */
  static inline bool ksm_test_exit(struct mm_struct *mm)
  {
  	return atomic_read(&mm->mm_users) == 0;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
342
343
344
345
346
347
348
349
350
351
   * We use break_ksm to break COW on a ksm page: it's a stripped down
   *
   *	if (get_user_pages(current, mm, addr, 1, 1, 1, &page, NULL) == 1)
   *		put_page(page);
   *
   * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
   * in case the application has unmapped and remapped mm,addr meanwhile.
   * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
   * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
352
  static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
353
354
  {
  	struct page *page;
d952b7913   Hugh Dickins   ksm: fix endless ...
355
  	int ret = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
356
357
358
  
  	do {
  		cond_resched();
5117b3b83   Hugh Dickins   mm,ksm: FOLL_MIGR...
359
  		page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION);
22eccdd7d   Dan Carpenter   ksm: check for ER...
360
  		if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
361
362
363
364
365
366
367
  			break;
  		if (PageKsm(page))
  			ret = handle_mm_fault(vma->vm_mm, vma, addr,
  							FAULT_FLAG_WRITE);
  		else
  			ret = VM_FAULT_WRITE;
  		put_page(page);
d952b7913   Hugh Dickins   ksm: fix endless ...
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
  	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_OOM)));
  	/*
  	 * We must loop because handle_mm_fault() may back out if there's
  	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
  	 *
  	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
  	 * COW has been broken, even if the vma does not permit VM_WRITE;
  	 * but note that a concurrent fault might break PageKsm for us.
  	 *
  	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
  	 * backing file, which also invalidates anonymous pages: that's
  	 * okay, that truncation will have unmapped the PageKsm for us.
  	 *
  	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
  	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
  	 * current task has TIF_MEMDIE set, and will be OOM killed on return
  	 * to user; and ksmd, having no mm, would never be chosen for that.
  	 *
  	 * But if the mm is in a limited mem_cgroup, then the fault may fail
  	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
  	 * even ksmd can fail in this way - though it's usually breaking ksm
  	 * just to undo a merge it made a moment before, so unlikely to oom.
  	 *
  	 * That's a pity: we might therefore have more kernel pages allocated
  	 * than we're counting as nodes in the stable tree; but ksm_do_scan
  	 * will retry to break_cow on each pass, so should recover the page
  	 * in due course.  The important thing is to not let VM_MERGEABLE
  	 * be cleared while any such pages might remain in the area.
  	 */
  	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
398
  }
ef6942224   Bob Liu   ksm: cleanup: int...
399
400
401
402
403
404
405
406
407
408
409
410
411
  static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
  		unsigned long addr)
  {
  	struct vm_area_struct *vma;
  	if (ksm_test_exit(mm))
  		return NULL;
  	vma = find_vma(mm, addr);
  	if (!vma || vma->vm_start > addr)
  		return NULL;
  	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  		return NULL;
  	return vma;
  }
8dd3557a5   Hugh Dickins   ksm: cleanup some...
412
  static void break_cow(struct rmap_item *rmap_item)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
413
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
414
415
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
416
  	struct vm_area_struct *vma;
4035c07a8   Hugh Dickins   ksm: take keyhole...
417
418
419
420
  	/*
  	 * It is not an accident that whenever we want to break COW
  	 * to undo, we also need to drop a reference to the anon_vma.
  	 */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
421
  	put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
422

81464e306   Hugh Dickins   ksm: five little ...
423
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
424
425
426
  	vma = find_mergeable_vma(mm, addr);
  	if (vma)
  		break_ksm(vma, addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
427
428
  	up_read(&mm->mmap_sem);
  }
29ad768cf   Andrea Arcangeli   thp: KSM on THP
429
430
431
  static struct page *page_trans_compound_anon(struct page *page)
  {
  	if (PageTransCompound(page)) {
668f9abbd   David Rientjes   mm: close PageTai...
432
  		struct page *head = compound_head(page);
29ad768cf   Andrea Arcangeli   thp: KSM on THP
433
  		/*
22e5c47ee   Andrea Arcangeli   thp: add compound...
434
435
  		 * head may actually be splitted and freed from under
  		 * us but it's ok here.
29ad768cf   Andrea Arcangeli   thp: KSM on THP
436
  		 */
29ad768cf   Andrea Arcangeli   thp: KSM on THP
437
438
439
440
441
  		if (PageAnon(head))
  			return head;
  	}
  	return NULL;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
442
443
444
445
446
447
448
449
  static struct page *get_mergeable_page(struct rmap_item *rmap_item)
  {
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
  	struct vm_area_struct *vma;
  	struct page *page;
  
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
450
451
  	vma = find_mergeable_vma(mm, addr);
  	if (!vma)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
452
453
454
  		goto out;
  
  	page = follow_page(vma, addr, FOLL_GET);
22eccdd7d   Dan Carpenter   ksm: check for ER...
455
  	if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
456
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
457
  	if (PageAnon(page) || page_trans_compound_anon(page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
458
459
460
461
462
463
464
465
466
  		flush_anon_page(vma, page, addr);
  		flush_dcache_page(page);
  	} else {
  		put_page(page);
  out:		page = NULL;
  	}
  	up_read(&mm->mmap_sem);
  	return page;
  }
90bd6fd31   Petr Holasek   ksm: allow trees ...
467
468
469
470
471
472
473
474
  /*
   * This helper is used for getting right index into array of tree roots.
   * When merge_across_nodes knob is set to 1, there are only two rb-trees for
   * stable and unstable pages from all nodes with roots in index 0. Otherwise,
   * every node has its own stable and unstable tree.
   */
  static inline int get_kpfn_nid(unsigned long kpfn)
  {
d8fc16a82   Hugh Dickins   ksm: fix m68k bui...
475
  	return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn));
90bd6fd31   Petr Holasek   ksm: allow trees ...
476
  }
4035c07a8   Hugh Dickins   ksm: take keyhole...
477
478
479
  static void remove_node_from_stable_tree(struct stable_node *stable_node)
  {
  	struct rmap_item *rmap_item;
4035c07a8   Hugh Dickins   ksm: take keyhole...
480

b67bfe0d4   Sasha Levin   hlist: drop the n...
481
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
482
483
484
485
  		if (rmap_item->hlist.next)
  			ksm_pages_sharing--;
  		else
  			ksm_pages_shared--;
9e60109f1   Peter Zijlstra   mm: rename drop_a...
486
  		put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
487
488
489
  		rmap_item->address &= PAGE_MASK;
  		cond_resched();
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
490
491
492
493
  	if (stable_node->head == &migrate_nodes)
  		list_del(&stable_node->list);
  	else
  		rb_erase(&stable_node->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
494
  			 root_stable_tree + NUMA(stable_node->nid));
4035c07a8   Hugh Dickins   ksm: take keyhole...
495
496
497
498
499
500
501
502
503
  	free_stable_node(stable_node);
  }
  
  /*
   * get_ksm_page: checks if the page indicated by the stable node
   * is still its ksm page, despite having held no reference to it.
   * In which case we can trust the content of the page, and it
   * returns the gotten page; but if the page has now been zapped,
   * remove the stale node from the stable tree and return NULL.
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
504
   * But beware, the stable node's page might be being migrated.
4035c07a8   Hugh Dickins   ksm: take keyhole...
505
506
507
508
509
510
511
512
513
514
   *
   * You would expect the stable_node to hold a reference to the ksm page.
   * But if it increments the page's count, swapping out has to wait for
   * ksmd to come around again before it can free the page, which may take
   * seconds or even minutes: much too unresponsive.  So instead we use a
   * "keyhole reference": access to the ksm page from the stable node peeps
   * out through its keyhole to see if that page still holds the right key,
   * pointing back to this stable node.  This relies on freeing a PageAnon
   * page to reset its page->mapping to NULL, and relies on no other use of
   * a page to put something that might look like our key in page->mapping.
4035c07a8   Hugh Dickins   ksm: take keyhole...
515
516
   * is on its way to being freed; but it is an anomaly to bear in mind.
   */
8fdb3dbf0   Hugh Dickins   ksm: add some com...
517
  static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
4035c07a8   Hugh Dickins   ksm: take keyhole...
518
519
520
  {
  	struct page *page;
  	void *expected_mapping;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
521
  	unsigned long kpfn;
4035c07a8   Hugh Dickins   ksm: take keyhole...
522

4035c07a8   Hugh Dickins   ksm: take keyhole...
523
524
  	expected_mapping = (void *)stable_node +
  				(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
525
526
527
528
529
530
531
532
533
534
535
  again:
  	kpfn = ACCESS_ONCE(stable_node->kpfn);
  	page = pfn_to_page(kpfn);
  
  	/*
  	 * page is computed from kpfn, so on most architectures reading
  	 * page->mapping is naturally ordered after reading node->kpfn,
  	 * but on Alpha we need to be more careful.
  	 */
  	smp_read_barrier_depends();
  	if (ACCESS_ONCE(page->mapping) != expected_mapping)
4035c07a8   Hugh Dickins   ksm: take keyhole...
536
  		goto stale;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
  
  	/*
  	 * We cannot do anything with the page while its refcount is 0.
  	 * Usually 0 means free, or tail of a higher-order page: in which
  	 * case this node is no longer referenced, and should be freed;
  	 * however, it might mean that the page is under page_freeze_refs().
  	 * The __remove_mapping() case is easy, again the node is now stale;
  	 * but if page is swapcache in migrate_page_move_mapping(), it might
  	 * still be our page, in which case it's essential to keep the node.
  	 */
  	while (!get_page_unless_zero(page)) {
  		/*
  		 * Another check for page->mapping != expected_mapping would
  		 * work here too.  We have chosen the !PageSwapCache test to
  		 * optimize the common case, when the page is or is about to
  		 * be freed: PageSwapCache is cleared (under spin_lock_irq)
  		 * in the freeze_refs section of __remove_mapping(); but Anon
  		 * page->mapping reset to NULL later, in free_pages_prepare().
  		 */
  		if (!PageSwapCache(page))
  			goto stale;
  		cpu_relax();
  	}
  
  	if (ACCESS_ONCE(page->mapping) != expected_mapping) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
562
563
564
  		put_page(page);
  		goto stale;
  	}
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
565

8fdb3dbf0   Hugh Dickins   ksm: add some com...
566
  	if (lock_it) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
567
  		lock_page(page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
568
  		if (ACCESS_ONCE(page->mapping) != expected_mapping) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
569
570
571
572
573
  			unlock_page(page);
  			put_page(page);
  			goto stale;
  		}
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
574
  	return page;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
575

4035c07a8   Hugh Dickins   ksm: take keyhole...
576
  stale:
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
577
578
579
580
581
582
583
584
585
  	/*
  	 * We come here from above when page->mapping or !PageSwapCache
  	 * suggests that the node is stale; but it might be under migration.
  	 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
  	 * before checking whether node->kpfn has been changed.
  	 */
  	smp_rmb();
  	if (ACCESS_ONCE(stable_node->kpfn) != kpfn)
  		goto again;
4035c07a8   Hugh Dickins   ksm: take keyhole...
586
587
588
  	remove_node_from_stable_tree(stable_node);
  	return NULL;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
589
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
590
591
592
593
594
   * Removing rmap_item from stable or unstable tree.
   * This function will clean the information from the stable/unstable tree.
   */
  static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
  {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
595
596
  	if (rmap_item->address & STABLE_FLAG) {
  		struct stable_node *stable_node;
5ad646880   Hugh Dickins   ksm: let shared p...
597
  		struct page *page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
598

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
599
  		stable_node = rmap_item->head;
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
600
  		page = get_ksm_page(stable_node, true);
4035c07a8   Hugh Dickins   ksm: take keyhole...
601
602
  		if (!page)
  			goto out;
5ad646880   Hugh Dickins   ksm: let shared p...
603

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
604
  		hlist_del(&rmap_item->hlist);
4035c07a8   Hugh Dickins   ksm: take keyhole...
605
606
  		unlock_page(page);
  		put_page(page);
08beca44d   Hugh Dickins   ksm: stable_node ...
607

4035c07a8   Hugh Dickins   ksm: take keyhole...
608
609
610
  		if (stable_node->hlist.first)
  			ksm_pages_sharing--;
  		else
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
611
  			ksm_pages_shared--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
612

9e60109f1   Peter Zijlstra   mm: rename drop_a...
613
  		put_anon_vma(rmap_item->anon_vma);
93d17715a   Hugh Dickins   ksm: three remove...
614
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
615

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
616
  	} else if (rmap_item->address & UNSTABLE_FLAG) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
617
618
  		unsigned char age;
  		/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
619
  		 * Usually ksmd can and must skip the rb_erase, because
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
620
  		 * root_unstable_tree was already reset to RB_ROOT.
9ba692948   Hugh Dickins   ksm: fix oom dead...
621
622
623
  		 * But be careful when an mm is exiting: do the rb_erase
  		 * if this rmap_item was inserted by this scan, rather
  		 * than left over from before.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
624
625
  		 */
  		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
cd551f975   Hugh Dickins   ksm: distribute r...
626
  		BUG_ON(age > 1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
627
  		if (!age)
90bd6fd31   Petr Holasek   ksm: allow trees ...
628
  			rb_erase(&rmap_item->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
629
  				 root_unstable_tree + NUMA(rmap_item->nid));
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
630
  		ksm_pages_unshared--;
93d17715a   Hugh Dickins   ksm: three remove...
631
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
632
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
633
  out:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
634
635
  	cond_resched();		/* we're called from many long loops */
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
636
  static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
637
  				       struct rmap_item **rmap_list)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
638
  {
6514d511d   Hugh Dickins   ksm: singly-linke...
639
640
641
  	while (*rmap_list) {
  		struct rmap_item *rmap_item = *rmap_list;
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
642
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
643
644
645
646
647
  		free_rmap_item(rmap_item);
  	}
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
648
   * Though it's very tempting to unmerge rmap_items from stable tree rather
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
649
650
651
652
653
   * than check every pte of a given vma, the locking doesn't quite work for
   * that - an rmap_item is assigned to the stable tree after inserting ksm
   * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
   * rmap_items from parent to child at fork time (so as not to waste time
   * if exit comes before the next scan reaches it).
81464e306   Hugh Dickins   ksm: five little ...
654
655
656
657
658
   *
   * Similarly, although we'd like to remove rmap_items (so updating counts
   * and freeing memory) when unmerging an area, it's easier to leave that
   * to the next pass of ksmd - consider, for example, how ksmd might be
   * in cmp_and_merge_page on one of the rmap_items we would be removing.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
659
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
660
661
  static int unmerge_ksm_pages(struct vm_area_struct *vma,
  			     unsigned long start, unsigned long end)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
662
663
  {
  	unsigned long addr;
d952b7913   Hugh Dickins   ksm: fix endless ...
664
  	int err = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
665

d952b7913   Hugh Dickins   ksm: fix endless ...
666
  	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
667
668
  		if (ksm_test_exit(vma->vm_mm))
  			break;
d952b7913   Hugh Dickins   ksm: fix endless ...
669
670
671
672
673
674
  		if (signal_pending(current))
  			err = -ERESTARTSYS;
  		else
  			err = break_ksm(vma, addr);
  	}
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
675
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
676
677
678
679
  #ifdef CONFIG_SYSFS
  /*
   * Only called through the sysfs control interface:
   */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
680
681
682
683
684
685
686
687
688
689
690
691
  static int remove_stable_node(struct stable_node *stable_node)
  {
  	struct page *page;
  	int err;
  
  	page = get_ksm_page(stable_node, true);
  	if (!page) {
  		/*
  		 * get_ksm_page did remove_node_from_stable_tree itself.
  		 */
  		return 0;
  	}
8fdb3dbf0   Hugh Dickins   ksm: add some com...
692
693
694
695
696
  	if (WARN_ON_ONCE(page_mapped(page))) {
  		/*
  		 * This should not happen: but if it does, just refuse to let
  		 * merge_across_nodes be switched - there is no need to panic.
  		 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
697
  		err = -EBUSY;
8fdb3dbf0   Hugh Dickins   ksm: add some com...
698
  	} else {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
699
  		/*
8fdb3dbf0   Hugh Dickins   ksm: add some com...
700
701
702
  		 * The stable node did not yet appear stale to get_ksm_page(),
  		 * since that allows for an unmapped ksm page to be recognized
  		 * right up until it is freed; but the node is safe to remove.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
  		 * This page might be in a pagevec waiting to be freed,
  		 * or it might be PageSwapCache (perhaps under writeback),
  		 * or it might have been removed from swapcache a moment ago.
  		 */
  		set_page_stable_node(page, NULL);
  		remove_node_from_stable_tree(stable_node);
  		err = 0;
  	}
  
  	unlock_page(page);
  	put_page(page);
  	return err;
  }
  
  static int remove_all_stable_nodes(void)
  {
  	struct stable_node *stable_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
720
  	struct list_head *this, *next;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
721
722
  	int nid;
  	int err = 0;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
723
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
724
725
726
727
728
729
730
731
732
733
  		while (root_stable_tree[nid].rb_node) {
  			stable_node = rb_entry(root_stable_tree[nid].rb_node,
  						struct stable_node, node);
  			if (remove_stable_node(stable_node)) {
  				err = -EBUSY;
  				break;	/* proceed to next nid */
  			}
  			cond_resched();
  		}
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
734
735
736
737
738
739
  	list_for_each_safe(this, next, &migrate_nodes) {
  		stable_node = list_entry(this, struct stable_node, list);
  		if (remove_stable_node(stable_node))
  			err = -EBUSY;
  		cond_resched();
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
740
741
  	return err;
  }
d952b7913   Hugh Dickins   ksm: fix endless ...
742
  static int unmerge_and_remove_all_rmap_items(void)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
743
744
745
746
  {
  	struct mm_slot *mm_slot;
  	struct mm_struct *mm;
  	struct vm_area_struct *vma;
d952b7913   Hugh Dickins   ksm: fix endless ...
747
748
749
  	int err = 0;
  
  	spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
750
  	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
751
752
  						struct mm_slot, mm_list);
  	spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
753

9ba692948   Hugh Dickins   ksm: fix oom dead...
754
755
  	for (mm_slot = ksm_scan.mm_slot;
  			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
756
757
758
  		mm = mm_slot->mm;
  		down_read(&mm->mmap_sem);
  		for (vma = mm->mmap; vma; vma = vma->vm_next) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
759
760
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
761
762
  			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  				continue;
d952b7913   Hugh Dickins   ksm: fix endless ...
763
764
  			err = unmerge_ksm_pages(vma,
  						vma->vm_start, vma->vm_end);
9ba692948   Hugh Dickins   ksm: fix oom dead...
765
766
  			if (err)
  				goto error;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
767
  		}
9ba692948   Hugh Dickins   ksm: fix oom dead...
768

6514d511d   Hugh Dickins   ksm: singly-linke...
769
  		remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
d952b7913   Hugh Dickins   ksm: fix endless ...
770
771
  
  		spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
772
  		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
773
  						struct mm_slot, mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
774
  		if (ksm_test_exit(mm)) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
775
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
776
777
778
779
780
781
782
783
784
785
786
  			list_del(&mm_slot->mm_list);
  			spin_unlock(&ksm_mmlist_lock);
  
  			free_mm_slot(mm_slot);
  			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
  			up_read(&mm->mmap_sem);
  			mmdrop(mm);
  		} else {
  			spin_unlock(&ksm_mmlist_lock);
  			up_read(&mm->mmap_sem);
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
787
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
788
789
  	/* Clean up stable nodes, but don't worry if some are still busy */
  	remove_all_stable_nodes();
d952b7913   Hugh Dickins   ksm: fix endless ...
790
  	ksm_scan.seqnr = 0;
9ba692948   Hugh Dickins   ksm: fix oom dead...
791
792
793
794
  	return 0;
  
  error:
  	up_read(&mm->mmap_sem);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
795
  	spin_lock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
796
  	ksm_scan.mm_slot = &ksm_mm_head;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
797
  	spin_unlock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
798
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
799
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
800
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
801

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
802
803
804
  static u32 calc_checksum(struct page *page)
  {
  	u32 checksum;
9b04c5fec   Cong Wang   mm: remove the se...
805
  	void *addr = kmap_atomic(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
806
  	checksum = jhash2(addr, PAGE_SIZE / 4, 17);
9b04c5fec   Cong Wang   mm: remove the se...
807
  	kunmap_atomic(addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
808
809
810
811
812
813
814
  	return checksum;
  }
  
  static int memcmp_pages(struct page *page1, struct page *page2)
  {
  	char *addr1, *addr2;
  	int ret;
9b04c5fec   Cong Wang   mm: remove the se...
815
816
  	addr1 = kmap_atomic(page1);
  	addr2 = kmap_atomic(page2);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
817
  	ret = memcmp(addr1, addr2, PAGE_SIZE);
9b04c5fec   Cong Wang   mm: remove the se...
818
819
  	kunmap_atomic(addr2);
  	kunmap_atomic(addr1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
  	return ret;
  }
  
  static inline int pages_identical(struct page *page1, struct page *page2)
  {
  	return !memcmp_pages(page1, page2);
  }
  
  static int write_protect_page(struct vm_area_struct *vma, struct page *page,
  			      pte_t *orig_pte)
  {
  	struct mm_struct *mm = vma->vm_mm;
  	unsigned long addr;
  	pte_t *ptep;
  	spinlock_t *ptl;
  	int swapped;
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
837
838
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
839
840
841
842
  
  	addr = page_address_in_vma(page, vma);
  	if (addr == -EFAULT)
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
843
  	BUG_ON(PageTransCompound(page));
6bdb913f0   Haggai Eran   mm: wrap calls to...
844
845
846
847
  
  	mmun_start = addr;
  	mmun_end   = addr + PAGE_SIZE;
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
848
849
  	ptep = page_check_address(page, mm, addr, &ptl, 0);
  	if (!ptep)
6bdb913f0   Haggai Eran   mm: wrap calls to...
850
  		goto out_mn;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
851

4e31635c3   Hugh Dickins   ksm: fix bad user...
852
  	if (pte_write(*ptep) || pte_dirty(*ptep)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
853
854
855
856
857
  		pte_t entry;
  
  		swapped = PageSwapCache(page);
  		flush_cache_page(vma, addr, page_to_pfn(page));
  		/*
25985edce   Lucas De Marchi   Fix common misspe...
858
  		 * Ok this is tricky, when get_user_pages_fast() run it doesn't
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
859
860
861
862
863
864
865
866
867
868
869
870
  		 * take any lock, therefore the check that we are going to make
  		 * with the pagecount against the mapcount is racey and
  		 * O_DIRECT can happen right after the check.
  		 * So we clear the pte and flush the tlb before the check
  		 * this assure us that no O_DIRECT can happen after the check
  		 * or in the middle of the check.
  		 */
  		entry = ptep_clear_flush(vma, addr, ptep);
  		/*
  		 * Check that no O_DIRECT or similar I/O is in progress on the
  		 * page
  		 */
31e855ea7   Hugh Dickins   ksm: remove redun...
871
  		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
cb5323751   Robin Holt   mm/ksm.c is doing...
872
  			set_pte_at(mm, addr, ptep, entry);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
873
874
  			goto out_unlock;
  		}
4e31635c3   Hugh Dickins   ksm: fix bad user...
875
876
877
  		if (pte_dirty(entry))
  			set_page_dirty(page);
  		entry = pte_mkclean(pte_wrprotect(entry));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
878
879
880
881
882
883
884
  		set_pte_at_notify(mm, addr, ptep, entry);
  	}
  	*orig_pte = *ptep;
  	err = 0;
  
  out_unlock:
  	pte_unmap_unlock(ptep, ptl);
6bdb913f0   Haggai Eran   mm: wrap calls to...
885
886
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
887
888
889
890
891
892
  out:
  	return err;
  }
  
  /**
   * replace_page - replace page in vma by new ksm page
8dd3557a5   Hugh Dickins   ksm: cleanup some...
893
894
895
   * @vma:      vma that holds the pte pointing to page
   * @page:     the page we are replacing by kpage
   * @kpage:    the ksm page we replace page by
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
896
897
898
899
   * @orig_pte: the original value of the pte
   *
   * Returns 0 on success, -EFAULT on failure.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
900
901
  static int replace_page(struct vm_area_struct *vma, struct page *page,
  			struct page *kpage, pte_t orig_pte)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
902
903
  {
  	struct mm_struct *mm = vma->vm_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
904
905
906
907
  	pmd_t *pmd;
  	pte_t *ptep;
  	spinlock_t *ptl;
  	unsigned long addr;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
908
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
909
910
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
911

8dd3557a5   Hugh Dickins   ksm: cleanup some...
912
  	addr = page_address_in_vma(page, vma);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
913
914
  	if (addr == -EFAULT)
  		goto out;
6219049ae   Bob Liu   mm: introduce mm_...
915
916
  	pmd = mm_find_pmd(mm, addr);
  	if (!pmd)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
917
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
918
  	BUG_ON(pmd_trans_huge(*pmd));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
919

6bdb913f0   Haggai Eran   mm: wrap calls to...
920
921
922
  	mmun_start = addr;
  	mmun_end   = addr + PAGE_SIZE;
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
923
924
925
  	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
  	if (!pte_same(*ptep, orig_pte)) {
  		pte_unmap_unlock(ptep, ptl);
6bdb913f0   Haggai Eran   mm: wrap calls to...
926
  		goto out_mn;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
927
  	}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
928
  	get_page(kpage);
5ad646880   Hugh Dickins   ksm: let shared p...
929
  	page_add_anon_rmap(kpage, vma, addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
930
931
932
  
  	flush_cache_page(vma, addr, pte_pfn(*ptep));
  	ptep_clear_flush(vma, addr, ptep);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
933
  	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
934

8dd3557a5   Hugh Dickins   ksm: cleanup some...
935
  	page_remove_rmap(page);
ae52a2adb   Hugh Dickins   thp: ksm: free sw...
936
937
  	if (!page_mapped(page))
  		try_to_free_swap(page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
938
  	put_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
939
940
941
  
  	pte_unmap_unlock(ptep, ptl);
  	err = 0;
6bdb913f0   Haggai Eran   mm: wrap calls to...
942
943
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
944
945
946
  out:
  	return err;
  }
29ad768cf   Andrea Arcangeli   thp: KSM on THP
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
  static int page_trans_compound_anon_split(struct page *page)
  {
  	int ret = 0;
  	struct page *transhuge_head = page_trans_compound_anon(page);
  	if (transhuge_head) {
  		/* Get the reference on the head to split it. */
  		if (get_page_unless_zero(transhuge_head)) {
  			/*
  			 * Recheck we got the reference while the head
  			 * was still anonymous.
  			 */
  			if (PageAnon(transhuge_head))
  				ret = split_huge_page(transhuge_head);
  			else
  				/*
  				 * Retry later if split_huge_page run
  				 * from under us.
  				 */
  				ret = 1;
  			put_page(transhuge_head);
  		} else
  			/* Retry later if split_huge_page run from under us. */
  			ret = 1;
  	}
  	return ret;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
973
974
  /*
   * try_to_merge_one_page - take two pages and merge them into one
8dd3557a5   Hugh Dickins   ksm: cleanup some...
975
976
   * @vma: the vma that holds the pte pointing to page
   * @page: the PageAnon page that we want to replace with kpage
80e148226   Hugh Dickins   ksm: share anon p...
977
978
   * @kpage: the PageKsm page that we want to map instead of page,
   *         or NULL the first time when we want to use page as kpage.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
979
980
981
982
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
   */
  static int try_to_merge_one_page(struct vm_area_struct *vma,
8dd3557a5   Hugh Dickins   ksm: cleanup some...
983
  				 struct page *page, struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
984
985
986
  {
  	pte_t orig_pte = __pte(0);
  	int err = -EFAULT;
db114b83a   Hugh Dickins   ksm: hold anon_vm...
987
988
  	if (page == kpage)			/* ksm page forked */
  		return 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
989
990
  	if (!(vma->vm_flags & VM_MERGEABLE))
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
991
992
993
  	if (PageTransCompound(page) && page_trans_compound_anon_split(page))
  		goto out;
  	BUG_ON(PageTransCompound(page));
8dd3557a5   Hugh Dickins   ksm: cleanup some...
994
  	if (!PageAnon(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
995
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
996
997
998
999
1000
1001
1002
  	/*
  	 * We need the page lock to read a stable PageSwapCache in
  	 * write_protect_page().  We use trylock_page() instead of
  	 * lock_page() because we don't want to wait here - we
  	 * prefer to continue scanning and merging different pages,
  	 * then come back to this page when it is unlocked.
  	 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1003
  	if (!trylock_page(page))
31e855ea7   Hugh Dickins   ksm: remove redun...
1004
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1005
1006
1007
1008
1009
1010
  	/*
  	 * If this anonymous page is mapped only here, its pte may need
  	 * to be write-protected.  If it's mapped elsewhere, all of its
  	 * ptes are necessarily already write-protected.  But in either
  	 * case, we need to lock and check page_count is not raised.
  	 */
80e148226   Hugh Dickins   ksm: share anon p...
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
  	if (write_protect_page(vma, page, &orig_pte) == 0) {
  		if (!kpage) {
  			/*
  			 * While we hold page lock, upgrade page from
  			 * PageAnon+anon_vma to PageKsm+NULL stable_node:
  			 * stable_tree_insert() will update stable_node.
  			 */
  			set_page_stable_node(page, NULL);
  			mark_page_accessed(page);
  			err = 0;
  		} else if (pages_identical(page, kpage))
  			err = replace_page(vma, page, kpage, orig_pte);
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1024

80e148226   Hugh Dickins   ksm: share anon p...
1025
  	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
73848b468   Hugh Dickins   ksm: fix mlockfre...
1026
  		munlock_vma_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1027
1028
  		if (!PageMlocked(kpage)) {
  			unlock_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1029
1030
1031
1032
1033
  			lock_page(kpage);
  			mlock_vma_page(kpage);
  			page = kpage;		/* for final unlock */
  		}
  	}
73848b468   Hugh Dickins   ksm: fix mlockfre...
1034

8dd3557a5   Hugh Dickins   ksm: cleanup some...
1035
  	unlock_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1036
1037
1038
1039
1040
  out:
  	return err;
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1041
1042
   * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
   * but no new kernel page is allocated: kpage must already be a ksm page.
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1043
1044
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
81464e306   Hugh Dickins   ksm: five little ...
1045
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1046
1047
  static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
  				      struct page *page, struct page *kpage)
81464e306   Hugh Dickins   ksm: five little ...
1048
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1049
  	struct mm_struct *mm = rmap_item->mm;
81464e306   Hugh Dickins   ksm: five little ...
1050
1051
  	struct vm_area_struct *vma;
  	int err = -EFAULT;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1052
1053
  	down_read(&mm->mmap_sem);
  	if (ksm_test_exit(mm))
9ba692948   Hugh Dickins   ksm: fix oom dead...
1054
  		goto out;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1055
1056
  	vma = find_vma(mm, rmap_item->address);
  	if (!vma || vma->vm_start > rmap_item->address)
81464e306   Hugh Dickins   ksm: five little ...
1057
  		goto out;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1058
  	err = try_to_merge_one_page(vma, page, kpage);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1059
1060
  	if (err)
  		goto out;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
1061
1062
  	/* Unstable nid is in union with stable anon_vma: remove first */
  	remove_rmap_item_from_tree(rmap_item);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1063
  	/* Must get reference to anon_vma while still holding mmap_sem */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1064
1065
  	rmap_item->anon_vma = vma->anon_vma;
  	get_anon_vma(vma->anon_vma);
81464e306   Hugh Dickins   ksm: five little ...
1066
  out:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1067
  	up_read(&mm->mmap_sem);
81464e306   Hugh Dickins   ksm: five little ...
1068
1069
1070
1071
  	return err;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1072
1073
1074
   * try_to_merge_two_pages - take two identical pages and prepare them
   * to be merged into one page.
   *
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1075
1076
   * This function returns the kpage if we successfully merged two identical
   * pages into one ksm page, NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1077
   *
80e148226   Hugh Dickins   ksm: share anon p...
1078
   * Note that this function upgrades page to ksm page: if one of the pages
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1079
1080
   * is already a ksm page, try_to_merge_with_ksm_page should be used.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1081
1082
1083
1084
  static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
  					   struct page *page,
  					   struct rmap_item *tree_rmap_item,
  					   struct page *tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1085
  {
80e148226   Hugh Dickins   ksm: share anon p...
1086
  	int err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1087

80e148226   Hugh Dickins   ksm: share anon p...
1088
  	err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1089
  	if (!err) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1090
  		err = try_to_merge_with_ksm_page(tree_rmap_item,
80e148226   Hugh Dickins   ksm: share anon p...
1091
  							tree_page, page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1092
  		/*
81464e306   Hugh Dickins   ksm: five little ...
1093
1094
  		 * If that fails, we have a ksm page with only one pte
  		 * pointing to it: so break it.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1095
  		 */
4035c07a8   Hugh Dickins   ksm: take keyhole...
1096
  		if (err)
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1097
  			break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1098
  	}
80e148226   Hugh Dickins   ksm: share anon p...
1099
  	return err ? NULL : page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1100
1101
1102
  }
  
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1103
   * stable_tree_search - search for page inside the stable tree
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1104
1105
1106
1107
   *
   * This function checks if there is a page inside the stable tree
   * with identical content to the page that we are scanning right now.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1108
   * This function returns the stable tree node of identical content if found,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1109
1110
   * NULL otherwise.
   */
62b61f611   Hugh Dickins   ksm: memory hotre...
1111
  static struct page *stable_tree_search(struct page *page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1112
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1113
  	int nid;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1114
  	struct rb_root *root;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1115
1116
1117
1118
  	struct rb_node **new;
  	struct rb_node *parent;
  	struct stable_node *stable_node;
  	struct stable_node *page_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1119

4146d2d67   Hugh Dickins   ksm: make !merge_...
1120
1121
1122
  	page_node = page_stable_node(page);
  	if (page_node && page_node->head != &migrate_nodes) {
  		/* ksm page forked */
08beca44d   Hugh Dickins   ksm: stable_node ...
1123
  		get_page(page);
62b61f611   Hugh Dickins   ksm: memory hotre...
1124
  		return page;
08beca44d   Hugh Dickins   ksm: stable_node ...
1125
  	}
90bd6fd31   Petr Holasek   ksm: allow trees ...
1126
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1127
  	root = root_stable_tree + nid;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1128
  again:
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1129
  	new = &root->rb_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1130
  	parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1131

4146d2d67   Hugh Dickins   ksm: make !merge_...
1132
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1133
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1134
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1135
  		cond_resched();
4146d2d67   Hugh Dickins   ksm: make !merge_...
1136
  		stable_node = rb_entry(*new, struct stable_node, node);
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
1137
  		tree_page = get_ksm_page(stable_node, false);
4035c07a8   Hugh Dickins   ksm: take keyhole...
1138
1139
  		if (!tree_page)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1140

4035c07a8   Hugh Dickins   ksm: take keyhole...
1141
  		ret = memcmp_pages(page, tree_page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1142
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1143

4146d2d67   Hugh Dickins   ksm: make !merge_...
1144
  		parent = *new;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1145
  		if (ret < 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1146
  			new = &parent->rb_left;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1147
  		else if (ret > 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1148
  			new = &parent->rb_right;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1149
1150
1151
1152
1153
1154
1155
1156
1157
  		else {
  			/*
  			 * Lock and unlock the stable_node's page (which
  			 * might already have been migrated) so that page
  			 * migration is sure to notice its raised count.
  			 * It would be more elegant to return stable_node
  			 * than kpage, but that involves more changes.
  			 */
  			tree_page = get_ksm_page(stable_node, true);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1158
  			if (tree_page) {
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1159
  				unlock_page(tree_page);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
  				if (get_kpfn_nid(stable_node->kpfn) !=
  						NUMA(stable_node->nid)) {
  					put_page(tree_page);
  					goto replace;
  				}
  				return tree_page;
  			}
  			/*
  			 * There is now a place for page_node, but the tree may
  			 * have been rebalanced, so re-evaluate parent and new.
  			 */
  			if (page_node)
  				goto again;
  			return NULL;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1174
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1175
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1176
1177
1178
1179
1180
1181
  	if (!page_node)
  		return NULL;
  
  	list_del(&page_node->list);
  	DO_NUMA(page_node->nid = nid);
  	rb_link_node(&page_node->node, parent, new);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1182
  	rb_insert_color(&page_node->node, root);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1183
1184
1185
1186
1187
1188
1189
  	get_page(page);
  	return page;
  
  replace:
  	if (page_node) {
  		list_del(&page_node->list);
  		DO_NUMA(page_node->nid = nid);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1190
  		rb_replace_node(&stable_node->node, &page_node->node, root);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1191
1192
  		get_page(page);
  	} else {
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1193
  		rb_erase(&stable_node->node, root);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1194
1195
1196
1197
1198
  		page = NULL;
  	}
  	stable_node->head = &migrate_nodes;
  	list_add(&stable_node->list, stable_node->head);
  	return page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1199
1200
1201
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1202
   * stable_tree_insert - insert stable tree node pointing to new ksm page
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1203
1204
   * into the stable tree.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1205
1206
   * This function returns the stable tree node just allocated on success,
   * NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1207
   */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1208
  static struct stable_node *stable_tree_insert(struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1209
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1210
1211
  	int nid;
  	unsigned long kpfn;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1212
  	struct rb_root *root;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1213
  	struct rb_node **new;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1214
  	struct rb_node *parent = NULL;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1215
  	struct stable_node *stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1216

90bd6fd31   Petr Holasek   ksm: allow trees ...
1217
1218
  	kpfn = page_to_pfn(kpage);
  	nid = get_kpfn_nid(kpfn);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1219
1220
  	root = root_stable_tree + nid;
  	new = &root->rb_node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1221

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1222
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1223
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1224
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1225
  		cond_resched();
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1226
  		stable_node = rb_entry(*new, struct stable_node, node);
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
1227
  		tree_page = get_ksm_page(stable_node, false);
4035c07a8   Hugh Dickins   ksm: take keyhole...
1228
1229
  		if (!tree_page)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1230

4035c07a8   Hugh Dickins   ksm: take keyhole...
1231
1232
  		ret = memcmp_pages(kpage, tree_page);
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
  
  		parent = *new;
  		if (ret < 0)
  			new = &parent->rb_left;
  		else if (ret > 0)
  			new = &parent->rb_right;
  		else {
  			/*
  			 * It is not a bug that stable_tree_search() didn't
  			 * find this node: because at that time our page was
  			 * not yet write-protected, so may have changed since.
  			 */
  			return NULL;
  		}
  	}
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1248
1249
1250
  	stable_node = alloc_stable_node();
  	if (!stable_node)
  		return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1251

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1252
  	INIT_HLIST_HEAD(&stable_node->hlist);
90bd6fd31   Petr Holasek   ksm: allow trees ...
1253
  	stable_node->kpfn = kpfn;
08beca44d   Hugh Dickins   ksm: stable_node ...
1254
  	set_page_stable_node(kpage, stable_node);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1255
  	DO_NUMA(stable_node->nid = nid);
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1256
  	rb_link_node(&stable_node->node, parent, new);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1257
  	rb_insert_color(&stable_node->node, root);
08beca44d   Hugh Dickins   ksm: stable_node ...
1258

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1259
  	return stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1260
1261
1262
  }
  
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1263
1264
   * unstable_tree_search_insert - search for identical page,
   * else insert rmap_item into the unstable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
   *
   * This function searches for a page in the unstable tree identical to the
   * page currently being scanned; and if no identical page is found in the
   * tree, we insert rmap_item as a new object into the unstable tree.
   *
   * This function returns pointer to rmap_item found to be identical
   * to the currently scanned page, NULL otherwise.
   *
   * This function does both searching and inserting, because they share
   * the same walking algorithm in an rbtree.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1276
1277
1278
1279
  static
  struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
  					      struct page *page,
  					      struct page **tree_pagep)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1280
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1281
1282
  	struct rb_node **new;
  	struct rb_root *root;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1283
  	struct rb_node *parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1284
1285
1286
  	int nid;
  
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1287
  	root = root_unstable_tree + nid;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1288
  	new = &root->rb_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1289
1290
1291
  
  	while (*new) {
  		struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1292
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1293
  		int ret;
d178f27fc   Hugh Dickins   ksm: cond_resched...
1294
  		cond_resched();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1295
  		tree_rmap_item = rb_entry(*new, struct rmap_item, node);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1296
  		tree_page = get_mergeable_page(tree_rmap_item);
22eccdd7d   Dan Carpenter   ksm: check for ER...
1297
  		if (IS_ERR_OR_NULL(tree_page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1298
1299
1300
  			return NULL;
  
  		/*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1301
  		 * Don't substitute a ksm page for a forked page.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1302
  		 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1303
1304
  		if (page == tree_page) {
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1305
1306
  			return NULL;
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1307
  		ret = memcmp_pages(page, tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1308
1309
1310
  
  		parent = *new;
  		if (ret < 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1311
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1312
1313
  			new = &parent->rb_left;
  		} else if (ret > 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1314
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1315
  			new = &parent->rb_right;
b599cbdf1   Hugh Dickins   ksm: treat unstab...
1316
1317
1318
1319
1320
1321
1322
1323
1324
  		} else if (!ksm_merge_across_nodes &&
  			   page_to_nid(tree_page) != nid) {
  			/*
  			 * If tree_page has been migrated to another NUMA node,
  			 * it will be flushed out and put in the right unstable
  			 * tree next time: only merge with it when across_nodes.
  			 */
  			put_page(tree_page);
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1325
  		} else {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1326
  			*tree_pagep = tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1327
1328
1329
  			return tree_rmap_item;
  		}
  	}
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1330
  	rmap_item->address |= UNSTABLE_FLAG;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1331
  	rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1332
  	DO_NUMA(rmap_item->nid = nid);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1333
  	rb_link_node(&rmap_item->node, parent, new);
90bd6fd31   Petr Holasek   ksm: allow trees ...
1334
  	rb_insert_color(&rmap_item->node, root);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1335

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
1336
  	ksm_pages_unshared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1337
1338
1339
1340
1341
1342
1343
1344
1345
  	return NULL;
  }
  
  /*
   * stable_tree_append - add another rmap_item to the linked list of
   * rmap_items hanging off a given node of the stable tree, all sharing
   * the same ksm page.
   */
  static void stable_tree_append(struct rmap_item *rmap_item,
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1346
  			       struct stable_node *stable_node)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1347
  {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1348
  	rmap_item->head = stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1349
  	rmap_item->address |= STABLE_FLAG;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1350
  	hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
e178dfde3   Hugh Dickins   ksm: move pages_s...
1351

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1352
1353
1354
1355
  	if (rmap_item->hlist.next)
  		ksm_pages_sharing++;
  	else
  		ksm_pages_shared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1356
1357
1358
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1359
1360
1361
1362
   * cmp_and_merge_page - first see if page can be merged into the stable tree;
   * if not, compare checksum to previous and if it's the same, see if page can
   * be inserted into the unstable tree, or merged with a page already there and
   * both transferred to the stable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1363
1364
1365
1366
1367
1368
   *
   * @page: the page that we are searching identical page to.
   * @rmap_item: the reverse mapping into the virtual address of this page
   */
  static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
  {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1369
  	struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1370
  	struct page *tree_page = NULL;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1371
  	struct stable_node *stable_node;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1372
  	struct page *kpage;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1373
1374
  	unsigned int checksum;
  	int err;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1375
1376
1377
1378
1379
  	stable_node = page_stable_node(page);
  	if (stable_node) {
  		if (stable_node->head != &migrate_nodes &&
  		    get_kpfn_nid(stable_node->kpfn) != NUMA(stable_node->nid)) {
  			rb_erase(&stable_node->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1380
  				 root_stable_tree + NUMA(stable_node->nid));
4146d2d67   Hugh Dickins   ksm: make !merge_...
1381
1382
1383
1384
1385
1386
1387
  			stable_node->head = &migrate_nodes;
  			list_add(&stable_node->list, stable_node->head);
  		}
  		if (stable_node->head != &migrate_nodes &&
  		    rmap_item->head == stable_node)
  			return;
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1388
1389
  
  	/* We first start with searching the page inside the stable tree */
62b61f611   Hugh Dickins   ksm: memory hotre...
1390
  	kpage = stable_tree_search(page);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1391
1392
1393
1394
1395
1396
  	if (kpage == page && rmap_item->head == stable_node) {
  		put_page(kpage);
  		return;
  	}
  
  	remove_rmap_item_from_tree(rmap_item);
62b61f611   Hugh Dickins   ksm: memory hotre...
1397
  	if (kpage) {
08beca44d   Hugh Dickins   ksm: stable_node ...
1398
  		err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1399
1400
1401
1402
1403
  		if (!err) {
  			/*
  			 * The page was successfully merged:
  			 * add its rmap_item to the stable tree.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
1404
  			lock_page(kpage);
62b61f611   Hugh Dickins   ksm: memory hotre...
1405
  			stable_tree_append(rmap_item, page_stable_node(kpage));
5ad646880   Hugh Dickins   ksm: let shared p...
1406
  			unlock_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1407
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1408
  		put_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1409
1410
1411
1412
  		return;
  	}
  
  	/*
4035c07a8   Hugh Dickins   ksm: take keyhole...
1413
1414
1415
1416
  	 * If the hash value of the page has changed from the last time
  	 * we calculated it, this page is changing frequently: therefore we
  	 * don't want to insert it in the unstable tree, and we don't want
  	 * to waste our time searching for something identical to it there.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1417
1418
1419
1420
1421
1422
  	 */
  	checksum = calc_checksum(page);
  	if (rmap_item->oldchecksum != checksum) {
  		rmap_item->oldchecksum = checksum;
  		return;
  	}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1423
1424
  	tree_rmap_item =
  		unstable_tree_search_insert(rmap_item, page, &tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1425
  	if (tree_rmap_item) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1426
1427
1428
  		kpage = try_to_merge_two_pages(rmap_item, page,
  						tree_rmap_item, tree_page);
  		put_page(tree_page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1429
  		if (kpage) {
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
1430
1431
1432
1433
  			/*
  			 * The pages were successfully merged: insert new
  			 * node in the stable tree and add both rmap_items.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
1434
  			lock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1435
1436
1437
1438
1439
  			stable_node = stable_tree_insert(kpage);
  			if (stable_node) {
  				stable_tree_append(tree_rmap_item, stable_node);
  				stable_tree_append(rmap_item, stable_node);
  			}
5ad646880   Hugh Dickins   ksm: let shared p...
1440
  			unlock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1441

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1442
1443
1444
1445
1446
1447
  			/*
  			 * If we fail to insert the page into the stable tree,
  			 * we will have 2 virtual addresses that are pointing
  			 * to a ksm page left outside the stable tree,
  			 * in which case we need to break_cow on both.
  			 */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1448
  			if (!stable_node) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1449
1450
  				break_cow(tree_rmap_item);
  				break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1451
1452
  			}
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1453
1454
1455
1456
  	}
  }
  
  static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
1457
  					    struct rmap_item **rmap_list,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1458
1459
1460
  					    unsigned long addr)
  {
  	struct rmap_item *rmap_item;
6514d511d   Hugh Dickins   ksm: singly-linke...
1461
1462
  	while (*rmap_list) {
  		rmap_item = *rmap_list;
93d17715a   Hugh Dickins   ksm: three remove...
1463
  		if ((rmap_item->address & PAGE_MASK) == addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1464
  			return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1465
1466
  		if (rmap_item->address > addr)
  			break;
6514d511d   Hugh Dickins   ksm: singly-linke...
1467
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1468
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1469
1470
1471
1472
1473
1474
1475
1476
  		free_rmap_item(rmap_item);
  	}
  
  	rmap_item = alloc_rmap_item();
  	if (rmap_item) {
  		/* It has already been zeroed */
  		rmap_item->mm = mm_slot->mm;
  		rmap_item->address = addr;
6514d511d   Hugh Dickins   ksm: singly-linke...
1477
1478
  		rmap_item->rmap_list = *rmap_list;
  		*rmap_list = rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
  	}
  	return rmap_item;
  }
  
  static struct rmap_item *scan_get_next_rmap_item(struct page **page)
  {
  	struct mm_struct *mm;
  	struct mm_slot *slot;
  	struct vm_area_struct *vma;
  	struct rmap_item *rmap_item;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1489
  	int nid;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1490
1491
1492
1493
1494
1495
  
  	if (list_empty(&ksm_mm_head.mm_list))
  		return NULL;
  
  	slot = ksm_scan.mm_slot;
  	if (slot == &ksm_mm_head) {
2919bfd07   Hugh Dickins   ksm: drain pageve...
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
  		/*
  		 * A number of pages can hang around indefinitely on per-cpu
  		 * pagevecs, raised page count preventing write_protect_page
  		 * from merging them.  Though it doesn't really matter much,
  		 * it is puzzling to see some stuck in pages_volatile until
  		 * other activity jostles them out, and they also prevented
  		 * LTP's KSM test from succeeding deterministically; so drain
  		 * them here (here rather than on entry to ksm_do_scan(),
  		 * so we don't IPI too often when pages_to_scan is set low).
  		 */
  		lru_add_drain_all();
4146d2d67   Hugh Dickins   ksm: make !merge_...
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
  		/*
  		 * Whereas stale stable_nodes on the stable_tree itself
  		 * get pruned in the regular course of stable_tree_search(),
  		 * those moved out to the migrate_nodes list can accumulate:
  		 * so prune them once before each full scan.
  		 */
  		if (!ksm_merge_across_nodes) {
  			struct stable_node *stable_node;
  			struct list_head *this, *next;
  			struct page *page;
  
  			list_for_each_safe(this, next, &migrate_nodes) {
  				stable_node = list_entry(this,
  						struct stable_node, list);
  				page = get_ksm_page(stable_node, false);
  				if (page)
  					put_page(page);
  				cond_resched();
  			}
  		}
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1527
  		for (nid = 0; nid < ksm_nr_node_ids; nid++)
90bd6fd31   Petr Holasek   ksm: allow trees ...
1528
  			root_unstable_tree[nid] = RB_ROOT;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1529
1530
1531
1532
1533
  
  		spin_lock(&ksm_mmlist_lock);
  		slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
  		ksm_scan.mm_slot = slot;
  		spin_unlock(&ksm_mmlist_lock);
2b472611a   Hugh Dickins   ksm: fix NULL poi...
1534
1535
1536
1537
1538
1539
  		/*
  		 * Although we tested list_empty() above, a racing __ksm_exit
  		 * of the last mm on the list may have removed it since then.
  		 */
  		if (slot == &ksm_mm_head)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1540
1541
  next_mm:
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
1542
  		ksm_scan.rmap_list = &slot->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1543
1544
1545
1546
  	}
  
  	mm = slot->mm;
  	down_read(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1547
1548
1549
1550
1551
1552
  	if (ksm_test_exit(mm))
  		vma = NULL;
  	else
  		vma = find_vma(mm, ksm_scan.address);
  
  	for (; vma; vma = vma->vm_next) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1553
1554
1555
1556
1557
1558
1559
1560
  		if (!(vma->vm_flags & VM_MERGEABLE))
  			continue;
  		if (ksm_scan.address < vma->vm_start)
  			ksm_scan.address = vma->vm_start;
  		if (!vma->anon_vma)
  			ksm_scan.address = vma->vm_end;
  
  		while (ksm_scan.address < vma->vm_end) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
1561
1562
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1563
  			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
21ae5b017   Andrea Arcangeli   thp: skip transhu...
1564
1565
1566
1567
1568
  			if (IS_ERR_OR_NULL(*page)) {
  				ksm_scan.address += PAGE_SIZE;
  				cond_resched();
  				continue;
  			}
29ad768cf   Andrea Arcangeli   thp: KSM on THP
1569
1570
  			if (PageAnon(*page) ||
  			    page_trans_compound_anon(*page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1571
1572
1573
  				flush_anon_page(vma, *page, ksm_scan.address);
  				flush_dcache_page(*page);
  				rmap_item = get_next_rmap_item(slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
1574
  					ksm_scan.rmap_list, ksm_scan.address);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1575
  				if (rmap_item) {
6514d511d   Hugh Dickins   ksm: singly-linke...
1576
1577
  					ksm_scan.rmap_list =
  							&rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1578
1579
1580
1581
1582
1583
  					ksm_scan.address += PAGE_SIZE;
  				} else
  					put_page(*page);
  				up_read(&mm->mmap_sem);
  				return rmap_item;
  			}
21ae5b017   Andrea Arcangeli   thp: skip transhu...
1584
  			put_page(*page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1585
1586
1587
1588
  			ksm_scan.address += PAGE_SIZE;
  			cond_resched();
  		}
  	}
9ba692948   Hugh Dickins   ksm: fix oom dead...
1589
1590
  	if (ksm_test_exit(mm)) {
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
1591
  		ksm_scan.rmap_list = &slot->rmap_list;
9ba692948   Hugh Dickins   ksm: fix oom dead...
1592
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1593
1594
1595
1596
  	/*
  	 * Nuke all the rmap_items that are above this current rmap:
  	 * because there were no VM_MERGEABLE vmas with such addresses.
  	 */
6514d511d   Hugh Dickins   ksm: singly-linke...
1597
  	remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1598
1599
  
  	spin_lock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
1600
1601
1602
1603
1604
1605
1606
  	ksm_scan.mm_slot = list_entry(slot->mm_list.next,
  						struct mm_slot, mm_list);
  	if (ksm_scan.address == 0) {
  		/*
  		 * We've completed a full scan of all vmas, holding mmap_sem
  		 * throughout, and found no VM_MERGEABLE: so do the same as
  		 * __ksm_exit does to remove this mm from all our lists now.
9ba692948   Hugh Dickins   ksm: fix oom dead...
1607
1608
1609
1610
  		 * This applies either when cleaning up after __ksm_exit
  		 * (but beware: we can reach here even before __ksm_exit),
  		 * or when all VM_MERGEABLE areas have been unmapped (and
  		 * mmap_sem then protects against race with MADV_MERGEABLE).
cd551f975   Hugh Dickins   ksm: distribute r...
1611
  		 */
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
1612
  		hash_del(&slot->link);
cd551f975   Hugh Dickins   ksm: distribute r...
1613
  		list_del(&slot->mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1614
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
1615
1616
  		free_mm_slot(slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1617
1618
1619
1620
1621
  		up_read(&mm->mmap_sem);
  		mmdrop(mm);
  	} else {
  		spin_unlock(&ksm_mmlist_lock);
  		up_read(&mm->mmap_sem);
cd551f975   Hugh Dickins   ksm: distribute r...
1622
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1623
1624
  
  	/* Repeat until we've completed scanning the whole list */
cd551f975   Hugh Dickins   ksm: distribute r...
1625
  	slot = ksm_scan.mm_slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1626
1627
  	if (slot != &ksm_mm_head)
  		goto next_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
  	ksm_scan.seqnr++;
  	return NULL;
  }
  
  /**
   * ksm_do_scan  - the ksm scanner main worker function.
   * @scan_npages - number of pages we want to scan before we return.
   */
  static void ksm_do_scan(unsigned int scan_npages)
  {
  	struct rmap_item *rmap_item;
22eccdd7d   Dan Carpenter   ksm: check for ER...
1639
  	struct page *uninitialized_var(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1640

878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1641
  	while (scan_npages-- && likely(!freezing(current))) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1642
1643
1644
1645
  		cond_resched();
  		rmap_item = scan_get_next_rmap_item(&page);
  		if (!rmap_item)
  			return;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1646
  		cmp_and_merge_page(page, rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1647
1648
1649
  		put_page(page);
  	}
  }
6e1583842   Hugh Dickins   ksm: keep quiet w...
1650
1651
1652
1653
  static int ksmd_should_run(void)
  {
  	return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1654
1655
  static int ksm_scan_thread(void *nothing)
  {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1656
  	set_freezable();
339aa6246   Izik Eidus   ksm: change ksm n...
1657
  	set_user_nice(current, 5);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1658
1659
  
  	while (!kthread_should_stop()) {
6e1583842   Hugh Dickins   ksm: keep quiet w...
1660
  		mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1661
  		wait_while_offlining();
6e1583842   Hugh Dickins   ksm: keep quiet w...
1662
  		if (ksmd_should_run())
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1663
  			ksm_do_scan(ksm_thread_pages_to_scan);
6e1583842   Hugh Dickins   ksm: keep quiet w...
1664
  		mutex_unlock(&ksm_thread_mutex);
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1665
  		try_to_freeze();
6e1583842   Hugh Dickins   ksm: keep quiet w...
1666
  		if (ksmd_should_run()) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1667
1668
1669
  			schedule_timeout_interruptible(
  				msecs_to_jiffies(ksm_thread_sleep_millisecs));
  		} else {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1670
  			wait_event_freezable(ksm_thread_wait,
6e1583842   Hugh Dickins   ksm: keep quiet w...
1671
  				ksmd_should_run() || kthread_should_stop());
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1672
1673
1674
1675
  		}
  	}
  	return 0;
  }
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1676
1677
1678
1679
  int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
  		unsigned long end, int advice, unsigned long *vm_flags)
  {
  	struct mm_struct *mm = vma->vm_mm;
d952b7913   Hugh Dickins   ksm: fix endless ...
1680
  	int err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1681
1682
1683
1684
1685
1686
1687
1688
  
  	switch (advice) {
  	case MADV_MERGEABLE:
  		/*
  		 * Be somewhat over-protective for now!
  		 */
  		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
  				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
314e51b98   Konstantin Khlebnikov   mm: kill vma flag...
1689
  				 VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP))
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1690
  			return 0;		/* just ignore the advice */
cc2383ec0   Konstantin Khlebnikov   mm: introduce arc...
1691
1692
1693
1694
  #ifdef VM_SAO
  		if (*vm_flags & VM_SAO)
  			return 0;
  #endif
d952b7913   Hugh Dickins   ksm: fix endless ...
1695
1696
1697
1698
1699
  		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
  			err = __ksm_enter(mm);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1700
1701
1702
1703
1704
1705
1706
  
  		*vm_flags |= VM_MERGEABLE;
  		break;
  
  	case MADV_UNMERGEABLE:
  		if (!(*vm_flags & VM_MERGEABLE))
  			return 0;		/* just ignore the advice */
d952b7913   Hugh Dickins   ksm: fix endless ...
1707
1708
1709
1710
1711
  		if (vma->anon_vma) {
  			err = unmerge_ksm_pages(vma, start, end);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
  
  		*vm_flags &= ~VM_MERGEABLE;
  		break;
  	}
  
  	return 0;
  }
  
  int __ksm_enter(struct mm_struct *mm)
  {
6e1583842   Hugh Dickins   ksm: keep quiet w...
1722
1723
1724
1725
  	struct mm_slot *mm_slot;
  	int needs_wakeup;
  
  	mm_slot = alloc_mm_slot();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1726
1727
  	if (!mm_slot)
  		return -ENOMEM;
6e1583842   Hugh Dickins   ksm: keep quiet w...
1728
1729
  	/* Check ksm_run too?  Would need tighter locking */
  	needs_wakeup = list_empty(&ksm_mm_head.mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1730
1731
1732
  	spin_lock(&ksm_mmlist_lock);
  	insert_to_mm_slots_hash(mm, mm_slot);
  	/*
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1733
1734
  	 * When KSM_RUN_MERGE (or KSM_RUN_STOP),
  	 * insert just behind the scanning cursor, to let the area settle
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1735
1736
  	 * down a little; when fork is followed by immediate exec, we don't
  	 * want ksmd to waste time setting up and tearing down an rmap_list.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1737
1738
1739
1740
  	 *
  	 * But when KSM_RUN_UNMERGE, it's important to insert ahead of its
  	 * scanning cursor, otherwise KSM pages in newly forked mms will be
  	 * missed: then we might as well insert at the end of the list.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1741
  	 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1742
1743
1744
1745
  	if (ksm_run & KSM_RUN_UNMERGE)
  		list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
  	else
  		list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1746
  	spin_unlock(&ksm_mmlist_lock);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1747
  	set_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1748
  	atomic_inc(&mm->mm_count);
6e1583842   Hugh Dickins   ksm: keep quiet w...
1749
1750
1751
  
  	if (needs_wakeup)
  		wake_up_interruptible(&ksm_thread_wait);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1752
1753
  	return 0;
  }
1c2fb7a4c   Andrea Arcangeli   ksm: fix deadlock...
1754
  void __ksm_exit(struct mm_struct *mm)
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1755
  {
cd551f975   Hugh Dickins   ksm: distribute r...
1756
  	struct mm_slot *mm_slot;
9ba692948   Hugh Dickins   ksm: fix oom dead...
1757
  	int easy_to_free = 0;
cd551f975   Hugh Dickins   ksm: distribute r...
1758

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1759
  	/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
1760
1761
1762
1763
1764
1765
  	 * This process is exiting: if it's straightforward (as is the
  	 * case when ksmd was never running), free mm_slot immediately.
  	 * But if it's at the cursor or has rmap_items linked to it, use
  	 * mmap_sem to synchronize with any break_cows before pagetables
  	 * are freed, and leave the mm_slot on the list for ksmd to free.
  	 * Beware: ksm may already have noticed it exiting and freed the slot.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1766
  	 */
9ba692948   Hugh Dickins   ksm: fix oom dead...
1767

cd551f975   Hugh Dickins   ksm: distribute r...
1768
1769
  	spin_lock(&ksm_mmlist_lock);
  	mm_slot = get_mm_slot(mm);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1770
  	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
6514d511d   Hugh Dickins   ksm: singly-linke...
1771
  		if (!mm_slot->rmap_list) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
1772
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1773
1774
1775
1776
1777
1778
  			list_del(&mm_slot->mm_list);
  			easy_to_free = 1;
  		} else {
  			list_move(&mm_slot->mm_list,
  				  &ksm_scan.mm_slot->mm_list);
  		}
cd551f975   Hugh Dickins   ksm: distribute r...
1779
  	}
cd551f975   Hugh Dickins   ksm: distribute r...
1780
  	spin_unlock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1781
1782
1783
1784
1785
  	if (easy_to_free) {
  		free_mm_slot(mm_slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
  		mmdrop(mm);
  	} else if (mm_slot) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
1786
1787
  		down_write(&mm->mmap_sem);
  		up_write(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1788
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1789
  }
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1790
  struct page *ksm_might_need_to_copy(struct page *page,
5ad646880   Hugh Dickins   ksm: let shared p...
1791
1792
  			struct vm_area_struct *vma, unsigned long address)
  {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1793
  	struct anon_vma *anon_vma = page_anon_vma(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1794
  	struct page *new_page;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
  	if (PageKsm(page)) {
  		if (page_stable_node(page) &&
  		    !(ksm_run & KSM_RUN_UNMERGE))
  			return page;	/* no need to copy it */
  	} else if (!anon_vma) {
  		return page;		/* no need to copy it */
  	} else if (anon_vma->root == vma->anon_vma->root &&
  		 page->index == linear_page_index(vma, address)) {
  		return page;		/* still no need to copy it */
  	}
  	if (!PageUptodate(page))
  		return page;		/* let do_swap_page report the error */
5ad646880   Hugh Dickins   ksm: let shared p...
1807
1808
1809
1810
1811
1812
  	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
  	if (new_page) {
  		copy_user_highpage(new_page, page, address, vma);
  
  		SetPageDirty(new_page);
  		__SetPageUptodate(new_page);
5ad646880   Hugh Dickins   ksm: let shared p...
1813
  		__set_page_locked(new_page);
5ad646880   Hugh Dickins   ksm: let shared p...
1814
  	}
5ad646880   Hugh Dickins   ksm: let shared p...
1815
1816
  	return new_page;
  }
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1817
  int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1818
1819
  {
  	struct stable_node *stable_node;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1820
1821
1822
  	struct rmap_item *rmap_item;
  	int ret = SWAP_AGAIN;
  	int search_new_forks = 0;
309381fea   Sasha Levin   mm: dump page whe...
1823
  	VM_BUG_ON_PAGE(!PageKsm(page), page);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
1824
1825
1826
1827
1828
  
  	/*
  	 * Rely on the page lock to protect against concurrent modifications
  	 * to that page's node of the stable tree.
  	 */
309381fea   Sasha Levin   mm: dump page whe...
1829
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1830
1831
1832
1833
1834
  
  	stable_node = page_stable_node(page);
  	if (!stable_node)
  		return ret;
  again:
b67bfe0d4   Sasha Levin   hlist: drop the n...
1835
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1836
  		struct anon_vma *anon_vma = rmap_item->anon_vma;
5beb49305   Rik van Riel   mm: change anon_v...
1837
  		struct anon_vma_chain *vmac;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1838
  		struct vm_area_struct *vma;
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
1839
  		anon_vma_lock_read(anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
1840
1841
  		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
  					       0, ULONG_MAX) {
5beb49305   Rik van Riel   mm: change anon_v...
1842
  			vma = vmac->vma;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
  			if (rmap_item->address < vma->vm_start ||
  			    rmap_item->address >= vma->vm_end)
  				continue;
  			/*
  			 * Initially we examine only the vma which covers this
  			 * rmap_item; but later, if there is still work to do,
  			 * we examine covering vmas in other mms: in case they
  			 * were forked from the original since ksmd passed.
  			 */
  			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
  				continue;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1854
1855
  			if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  				continue;
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1856
1857
  			ret = rwc->rmap_one(page, vma,
  					rmap_item->address, rwc->arg);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1858
  			if (ret != SWAP_AGAIN) {
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
1859
  				anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1860
1861
  				goto out;
  			}
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1862
1863
1864
1865
  			if (rwc->done && rwc->done(page)) {
  				anon_vma_unlock_read(anon_vma);
  				goto out;
  			}
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1866
  		}
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
1867
  		anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1868
1869
1870
1871
1872
1873
  	}
  	if (!search_new_forks++)
  		goto again;
  out:
  	return ret;
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
1874
  #ifdef CONFIG_MIGRATION
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1875
1876
1877
  void ksm_migrate_page(struct page *newpage, struct page *oldpage)
  {
  	struct stable_node *stable_node;
309381fea   Sasha Levin   mm: dump page whe...
1878
1879
1880
  	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
  	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
  	VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1881
1882
1883
  
  	stable_node = page_stable_node(newpage);
  	if (stable_node) {
309381fea   Sasha Levin   mm: dump page whe...
1884
  		VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage);
62b61f611   Hugh Dickins   ksm: memory hotre...
1885
  		stable_node->kpfn = page_to_pfn(newpage);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1886
1887
1888
1889
1890
1891
1892
1893
  		/*
  		 * newpage->mapping was set in advance; now we need smp_wmb()
  		 * to make sure that the new stable_node->kpfn is visible
  		 * to get_ksm_page() before it can see that oldpage->mapping
  		 * has gone stale (or that PageSwapCache has been cleared).
  		 */
  		smp_wmb();
  		set_page_stable_node(oldpage, NULL);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1894
1895
1896
  	}
  }
  #endif /* CONFIG_MIGRATION */
62b61f611   Hugh Dickins   ksm: memory hotre...
1897
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
  static int just_wait(void *word)
  {
  	schedule();
  	return 0;
  }
  
  static void wait_while_offlining(void)
  {
  	while (ksm_run & KSM_RUN_OFFLINE) {
  		mutex_unlock(&ksm_thread_mutex);
  		wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
  				just_wait, TASK_UNINTERRUPTIBLE);
  		mutex_lock(&ksm_thread_mutex);
  	}
  }
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1913
1914
  static void ksm_check_stable_tree(unsigned long start_pfn,
  				  unsigned long end_pfn)
62b61f611   Hugh Dickins   ksm: memory hotre...
1915
  {
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1916
  	struct stable_node *stable_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1917
  	struct list_head *this, *next;
62b61f611   Hugh Dickins   ksm: memory hotre...
1918
  	struct rb_node *node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1919
  	int nid;
62b61f611   Hugh Dickins   ksm: memory hotre...
1920

ef53d16cd   Hugh Dickins   ksm: allocate roo...
1921
1922
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
  		node = rb_first(root_stable_tree + nid);
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1923
  		while (node) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1924
1925
  			stable_node = rb_entry(node, struct stable_node, node);
  			if (stable_node->kpfn >= start_pfn &&
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1926
1927
1928
1929
1930
1931
  			    stable_node->kpfn < end_pfn) {
  				/*
  				 * Don't get_ksm_page, page has already gone:
  				 * which is why we keep kpfn instead of page*
  				 */
  				remove_node_from_stable_tree(stable_node);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1932
  				node = rb_first(root_stable_tree + nid);
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1933
1934
1935
  			} else
  				node = rb_next(node);
  			cond_resched();
90bd6fd31   Petr Holasek   ksm: allow trees ...
1936
  		}
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1937
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1938
1939
1940
1941
1942
1943
1944
  	list_for_each_safe(this, next, &migrate_nodes) {
  		stable_node = list_entry(this, struct stable_node, list);
  		if (stable_node->kpfn >= start_pfn &&
  		    stable_node->kpfn < end_pfn)
  			remove_node_from_stable_tree(stable_node);
  		cond_resched();
  	}
62b61f611   Hugh Dickins   ksm: memory hotre...
1945
1946
1947
1948
1949
1950
  }
  
  static int ksm_memory_callback(struct notifier_block *self,
  			       unsigned long action, void *arg)
  {
  	struct memory_notify *mn = arg;
62b61f611   Hugh Dickins   ksm: memory hotre...
1951
1952
1953
1954
  
  	switch (action) {
  	case MEM_GOING_OFFLINE:
  		/*
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1955
1956
1957
1958
1959
  		 * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
  		 * and remove_all_stable_nodes() while memory is going offline:
  		 * it is unsafe for them to touch the stable tree at this time.
  		 * But unmerge_ksm_pages(), rmap lookups and other entry points
  		 * which do not need the ksm_thread_mutex are all safe.
62b61f611   Hugh Dickins   ksm: memory hotre...
1960
  		 */
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1961
1962
1963
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run |= KSM_RUN_OFFLINE;
  		mutex_unlock(&ksm_thread_mutex);
62b61f611   Hugh Dickins   ksm: memory hotre...
1964
1965
1966
1967
1968
1969
  		break;
  
  	case MEM_OFFLINE:
  		/*
  		 * Most of the work is done by page migration; but there might
  		 * be a few stable_nodes left over, still pointing to struct
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1970
1971
1972
  		 * pages which have been offlined: prune those from the tree,
  		 * otherwise get_ksm_page() might later try to access a
  		 * non-existent struct page.
62b61f611   Hugh Dickins   ksm: memory hotre...
1973
  		 */
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1974
1975
  		ksm_check_stable_tree(mn->start_pfn,
  				      mn->start_pfn + mn->nr_pages);
62b61f611   Hugh Dickins   ksm: memory hotre...
1976
1977
1978
  		/* fallthrough */
  
  	case MEM_CANCEL_OFFLINE:
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1979
1980
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run &= ~KSM_RUN_OFFLINE;
62b61f611   Hugh Dickins   ksm: memory hotre...
1981
  		mutex_unlock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1982
1983
1984
  
  		smp_mb();	/* wake_up_bit advises this */
  		wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE));
62b61f611   Hugh Dickins   ksm: memory hotre...
1985
1986
1987
1988
  		break;
  	}
  	return NOTIFY_OK;
  }
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1989
1990
1991
1992
  #else
  static void wait_while_offlining(void)
  {
  }
62b61f611   Hugh Dickins   ksm: memory hotre...
1993
  #endif /* CONFIG_MEMORY_HOTREMOVE */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
1994
1995
1996
1997
  #ifdef CONFIG_SYSFS
  /*
   * This all compiles without CONFIG_SYSFS, but is a waste of space.
   */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
  #define KSM_ATTR_RO(_name) \
  	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
  #define KSM_ATTR(_name) \
  	static struct kobj_attribute _name##_attr = \
  		__ATTR(_name, 0644, _name##_show, _name##_store)
  
  static ssize_t sleep_millisecs_show(struct kobject *kobj,
  				    struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_sleep_millisecs);
  }
  
  static ssize_t sleep_millisecs_store(struct kobject *kobj,
  				     struct kobj_attribute *attr,
  				     const char *buf, size_t count)
  {
  	unsigned long msecs;
  	int err;
3dbb95f78   Jingoo Han   mm: replace stric...
2017
  	err = kstrtoul(buf, 10, &msecs);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
  	if (err || msecs > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_sleep_millisecs = msecs;
  
  	return count;
  }
  KSM_ATTR(sleep_millisecs);
  
  static ssize_t pages_to_scan_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_pages_to_scan);
  }
  
  static ssize_t pages_to_scan_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long nr_pages;
3dbb95f78   Jingoo Han   mm: replace stric...
2040
  	err = kstrtoul(buf, 10, &nr_pages);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
  	if (err || nr_pages > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_pages_to_scan = nr_pages;
  
  	return count;
  }
  KSM_ATTR(pages_to_scan);
  
  static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
  			char *buf)
  {
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2053
2054
  	return sprintf(buf, "%lu
  ", ksm_run);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2055
2056
2057
2058
2059
2060
2061
  }
  
  static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
  			 const char *buf, size_t count)
  {
  	int err;
  	unsigned long flags;
3dbb95f78   Jingoo Han   mm: replace stric...
2062
  	err = kstrtoul(buf, 10, &flags);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2063
2064
2065
2066
2067
2068
2069
2070
  	if (err || flags > UINT_MAX)
  		return -EINVAL;
  	if (flags > KSM_RUN_UNMERGE)
  		return -EINVAL;
  
  	/*
  	 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
  	 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
d0f209f68   Hugh Dickins   ksm: remove unswa...
2071
2072
  	 * breaking COW to free the pages_shared (but leaves mm_slots
  	 * on the list for when ksmd may be set running again).
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2073
2074
2075
  	 */
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2076
  	wait_while_offlining();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2077
2078
  	if (ksm_run != flags) {
  		ksm_run = flags;
d952b7913   Hugh Dickins   ksm: fix endless ...
2079
  		if (flags & KSM_RUN_UNMERGE) {
e1e12d2f3   David Rientjes   mm, oom: fix race...
2080
  			set_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2081
  			err = unmerge_and_remove_all_rmap_items();
e1e12d2f3   David Rientjes   mm, oom: fix race...
2082
  			clear_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2083
2084
2085
2086
2087
  			if (err) {
  				ksm_run = KSM_RUN_STOP;
  				count = err;
  			}
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2088
2089
2090
2091
2092
2093
2094
2095
2096
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	if (flags & KSM_RUN_MERGE)
  		wake_up_interruptible(&ksm_thread_wait);
  
  	return count;
  }
  KSM_ATTR(run);
90bd6fd31   Petr Holasek   ksm: allow trees ...
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
  #ifdef CONFIG_NUMA
  static ssize_t merge_across_nodes_show(struct kobject *kobj,
  				struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_merge_across_nodes);
  }
  
  static ssize_t merge_across_nodes_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long knob;
  
  	err = kstrtoul(buf, 10, &knob);
  	if (err)
  		return err;
  	if (knob > 1)
  		return -EINVAL;
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2119
  	wait_while_offlining();
90bd6fd31   Petr Holasek   ksm: allow trees ...
2120
  	if (ksm_merge_across_nodes != knob) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2121
  		if (ksm_pages_shared || remove_all_stable_nodes())
90bd6fd31   Petr Holasek   ksm: allow trees ...
2122
  			err = -EBUSY;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2123
2124
2125
2126
2127
2128
2129
2130
2131
  		else if (root_stable_tree == one_stable_tree) {
  			struct rb_root *buf;
  			/*
  			 * This is the first time that we switch away from the
  			 * default of merging across nodes: must now allocate
  			 * a buffer to hold as many roots as may be needed.
  			 * Allocate stable and unstable together:
  			 * MAXSMP NODES_SHIFT 10 will use 16kB.
  			 */
bafe1e144   Joe Perches   ksm: remove redun...
2132
2133
  			buf = kcalloc(nr_node_ids + nr_node_ids, sizeof(*buf),
  				      GFP_KERNEL);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
  			/* Let us assume that RB_ROOT is NULL is zero */
  			if (!buf)
  				err = -ENOMEM;
  			else {
  				root_stable_tree = buf;
  				root_unstable_tree = buf + nr_node_ids;
  				/* Stable tree is empty but not the unstable */
  				root_unstable_tree[0] = one_unstable_tree[0];
  			}
  		}
  		if (!err) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
2145
  			ksm_merge_across_nodes = knob;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2146
2147
  			ksm_nr_node_ids = knob ? 1 : nr_node_ids;
  		}
90bd6fd31   Petr Holasek   ksm: allow trees ...
2148
2149
2150
2151
2152
2153
2154
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	return err ? err : count;
  }
  KSM_ATTR(merge_across_nodes);
  #endif
b40282603   Hugh Dickins   ksm: rename kerne...
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
  static ssize_t pages_shared_show(struct kobject *kobj,
  				 struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_shared);
  }
  KSM_ATTR_RO(pages_shared);
  
  static ssize_t pages_sharing_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
e178dfde3   Hugh Dickins   ksm: move pages_s...
2166
2167
  	return sprintf(buf, "%lu
  ", ksm_pages_sharing);
b40282603   Hugh Dickins   ksm: rename kerne...
2168
2169
  }
  KSM_ATTR_RO(pages_sharing);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
  static ssize_t pages_unshared_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_unshared);
  }
  KSM_ATTR_RO(pages_unshared);
  
  static ssize_t pages_volatile_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	long ksm_pages_volatile;
  
  	ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
  				- ksm_pages_sharing - ksm_pages_unshared;
  	/*
  	 * It was not worth any locking to calculate that statistic,
  	 * but it might therefore sometimes be negative: conceal that.
  	 */
  	if (ksm_pages_volatile < 0)
  		ksm_pages_volatile = 0;
  	return sprintf(buf, "%ld
  ", ksm_pages_volatile);
  }
  KSM_ATTR_RO(pages_volatile);
  
  static ssize_t full_scans_show(struct kobject *kobj,
  			       struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_scan.seqnr);
  }
  KSM_ATTR_RO(full_scans);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2203
2204
2205
2206
  static struct attribute *ksm_attrs[] = {
  	&sleep_millisecs_attr.attr,
  	&pages_to_scan_attr.attr,
  	&run_attr.attr,
b40282603   Hugh Dickins   ksm: rename kerne...
2207
2208
  	&pages_shared_attr.attr,
  	&pages_sharing_attr.attr,
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2209
2210
2211
  	&pages_unshared_attr.attr,
  	&pages_volatile_attr.attr,
  	&full_scans_attr.attr,
90bd6fd31   Petr Holasek   ksm: allow trees ...
2212
2213
2214
  #ifdef CONFIG_NUMA
  	&merge_across_nodes_attr.attr,
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2215
2216
2217
2218
2219
2220
2221
  	NULL,
  };
  
  static struct attribute_group ksm_attr_group = {
  	.attrs = ksm_attrs,
  	.name = "ksm",
  };
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2222
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2223
2224
2225
2226
2227
2228
2229
2230
2231
  
  static int __init ksm_init(void)
  {
  	struct task_struct *ksm_thread;
  	int err;
  
  	err = ksm_slab_init();
  	if (err)
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2232
2233
2234
2235
2236
  	ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
  	if (IS_ERR(ksm_thread)) {
  		printk(KERN_ERR "ksm: creating kthread failed
  ");
  		err = PTR_ERR(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
2237
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2238
  	}
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2239
  #ifdef CONFIG_SYSFS
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2240
2241
2242
2243
  	err = sysfs_create_group(mm_kobj, &ksm_attr_group);
  	if (err) {
  		printk(KERN_ERR "ksm: register sysfs failed
  ");
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2244
  		kthread_stop(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
2245
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2246
  	}
c73602ad3   Hugh Dickins   ksm: more on defa...
2247
2248
  #else
  	ksm_run = KSM_RUN_MERGE;	/* no way for user to start it */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2249
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2250

62b61f611   Hugh Dickins   ksm: memory hotre...
2251
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2252
  	/* There is no significance to this priority 100 */
62b61f611   Hugh Dickins   ksm: memory hotre...
2253
2254
  	hotplug_memory_notifier(ksm_memory_callback, 100);
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2255
  	return 0;
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
2256
  out_free:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2257
2258
2259
  	ksm_slab_free();
  out:
  	return err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2260
  }
a64fb3cd6   Paul Gortmaker   mm: audit/fix non...
2261
  subsys_initcall(ksm_init);