Blame view

mm/ksm.c 63.5 KB
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2
3
4
5
6
   * Memory merging support.
   *
   * This code enables dynamic sharing of identical pages found in different
   * memory areas, even if they are not shared by fork()
   *
36b2528dc   Izik Eidus   ksm: change copyr...
7
   * Copyright (C) 2008-2009 Red Hat, Inc.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
8
9
10
11
   * Authors:
   *	Izik Eidus
   *	Andrea Arcangeli
   *	Chris Wright
36b2528dc   Izik Eidus   ksm: change copyr...
12
   *	Hugh Dickins
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
13
14
   *
   * This work is licensed under the terms of the GNU GPL, version 2.
f8af4da3b   Hugh Dickins   ksm: the mm inter...
15
16
17
   */
  
  #include <linux/errno.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
18
19
  #include <linux/mm.h>
  #include <linux/fs.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
20
  #include <linux/mman.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
21
22
23
24
25
26
27
28
29
30
31
  #include <linux/sched.h>
  #include <linux/rwsem.h>
  #include <linux/pagemap.h>
  #include <linux/rmap.h>
  #include <linux/spinlock.h>
  #include <linux/jhash.h>
  #include <linux/delay.h>
  #include <linux/kthread.h>
  #include <linux/wait.h>
  #include <linux/slab.h>
  #include <linux/rbtree.h>
62b61f611   Hugh Dickins   ksm: memory hotre...
32
  #include <linux/memory.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
33
  #include <linux/mmu_notifier.h>
2c6854fda   Izik Eidus   ksm: change defau...
34
  #include <linux/swap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
35
  #include <linux/ksm.h>
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
36
  #include <linux/hashtable.h>
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
37
  #include <linux/freezer.h>
72788c385   David Rientjes   oom: replace PF_O...
38
  #include <linux/oom.h>
90bd6fd31   Petr Holasek   ksm: allow trees ...
39
  #include <linux/numa.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
40

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
41
  #include <asm/tlbflush.h>
73848b468   Hugh Dickins   ksm: fix mlockfre...
42
  #include "internal.h"
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
43

e850dcf53   Hugh Dickins   ksm: trivial tidyups
44
45
46
47
48
49
50
  #ifdef CONFIG_NUMA
  #define NUMA(x)		(x)
  #define DO_NUMA(x)	do { (x); } while (0)
  #else
  #define NUMA(x)		(0)
  #define DO_NUMA(x)	do { } while (0)
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  /*
   * A few notes about the KSM scanning process,
   * to make it easier to understand the data structures below:
   *
   * In order to reduce excessive scanning, KSM sorts the memory pages by their
   * contents into a data structure that holds pointers to the pages' locations.
   *
   * Since the contents of the pages may change at any moment, KSM cannot just
   * insert the pages into a normal sorted tree and expect it to find anything.
   * Therefore KSM uses two data structures - the stable and the unstable tree.
   *
   * The stable tree holds pointers to all the merged pages (ksm pages), sorted
   * by their contents.  Because each such page is write-protected, searching on
   * this tree is fully assured to be working (except when pages are unmapped),
   * and therefore this tree is called the stable tree.
   *
   * In addition to the stable tree, KSM uses a second data structure called the
   * unstable tree: this tree holds pointers to pages which have been found to
   * be "unchanged for a period of time".  The unstable tree sorts these pages
   * by their contents, but since they are not write-protected, KSM cannot rely
   * upon the unstable tree to work correctly - the unstable tree is liable to
   * be corrupted as its contents are modified, and so it is called unstable.
   *
   * KSM solves this problem by several techniques:
   *
   * 1) The unstable tree is flushed every time KSM completes scanning all
   *    memory areas, and then the tree is rebuilt again from the beginning.
   * 2) KSM will only insert into the unstable tree, pages whose hash value
   *    has not changed since the previous scan of all memory areas.
   * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
   *    colors of the nodes and not on their contents, assuring that even when
   *    the tree gets "corrupted" it won't get out of balance, so scanning time
   *    remains the same (also, searching and inserting nodes in an rbtree uses
   *    the same algorithm, so we have no overhead when we flush and rebuild).
   * 4) KSM never flushes the stable tree, which means that even if it were to
   *    take 10 attempts to find a page in the unstable tree, once it is found,
   *    it is secured in the stable tree.  (When we scan a new page, we first
   *    compare it against the stable tree, and then against the unstable tree.)
8fdb3dbf0   Hugh Dickins   ksm: add some com...
89
90
91
   *
   * If the merge_across_nodes tunable is unset, then KSM maintains multiple
   * stable trees and multiple unstable trees: one of each for each NUMA node.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
92
93
94
95
96
97
   */
  
  /**
   * struct mm_slot - ksm information per mm that is being scanned
   * @link: link to the mm_slots hash list
   * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
6514d511d   Hugh Dickins   ksm: singly-linke...
98
   * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
99
100
101
102
103
   * @mm: the mm that this information is valid for
   */
  struct mm_slot {
  	struct hlist_node link;
  	struct list_head mm_list;
6514d511d   Hugh Dickins   ksm: singly-linke...
104
  	struct rmap_item *rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
105
106
107
108
109
110
111
  	struct mm_struct *mm;
  };
  
  /**
   * struct ksm_scan - cursor for scanning
   * @mm_slot: the current mm_slot we are scanning
   * @address: the next address inside that to be scanned
6514d511d   Hugh Dickins   ksm: singly-linke...
112
   * @rmap_list: link to the next rmap to be scanned in the rmap_list
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
113
114
115
116
117
118
119
   * @seqnr: count of completed full scans (needed when removing unstable node)
   *
   * There is only the one ksm_scan instance of this cursor structure.
   */
  struct ksm_scan {
  	struct mm_slot *mm_slot;
  	unsigned long address;
6514d511d   Hugh Dickins   ksm: singly-linke...
120
  	struct rmap_item **rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
121
122
123
124
  	unsigned long seqnr;
  };
  
  /**
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
125
126
   * struct stable_node - node of the stable rbtree
   * @node: rb node of this ksm page in the stable tree
4146d2d67   Hugh Dickins   ksm: make !merge_...
127
128
   * @head: (overlaying parent) &migrate_nodes indicates temporarily on that list
   * @list: linked into migrate_nodes, pending placement in the proper node tree
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
129
   * @hlist: hlist head of rmap_items using this ksm page
4146d2d67   Hugh Dickins   ksm: make !merge_...
130
131
   * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
   * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
132
133
   */
  struct stable_node {
4146d2d67   Hugh Dickins   ksm: make !merge_...
134
135
136
137
138
139
140
  	union {
  		struct rb_node node;	/* when node of stable tree */
  		struct {		/* when listed for migration */
  			struct list_head *head;
  			struct list_head list;
  		};
  	};
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
141
  	struct hlist_head hlist;
62b61f611   Hugh Dickins   ksm: memory hotre...
142
  	unsigned long kpfn;
4146d2d67   Hugh Dickins   ksm: make !merge_...
143
144
145
  #ifdef CONFIG_NUMA
  	int nid;
  #endif
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
146
147
148
  };
  
  /**
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
149
   * struct rmap_item - reverse mapping item for virtual addresses
6514d511d   Hugh Dickins   ksm: singly-linke...
150
   * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
db114b83a   Hugh Dickins   ksm: hold anon_vm...
151
   * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
152
   * @nid: NUMA node id of unstable tree in which linked (may not match page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
153
154
155
   * @mm: the memory structure this rmap_item is pointing into
   * @address: the virtual address this rmap_item tracks (+ flags in low bits)
   * @oldchecksum: previous checksum of the page at that virtual address
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
156
157
158
   * @node: rb node of this rmap_item in the unstable tree
   * @head: pointer to stable_node heading this list in the stable tree
   * @hlist: link into hlist of rmap_items hanging off that stable_node
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
159
160
   */
  struct rmap_item {
6514d511d   Hugh Dickins   ksm: singly-linke...
161
  	struct rmap_item *rmap_list;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
162
163
164
165
166
167
  	union {
  		struct anon_vma *anon_vma;	/* when stable */
  #ifdef CONFIG_NUMA
  		int nid;		/* when node of unstable tree */
  #endif
  	};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
168
169
  	struct mm_struct *mm;
  	unsigned long address;		/* + low bits used for flags below */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
170
  	unsigned int oldchecksum;	/* when unstable */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
171
  	union {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
172
173
174
175
176
  		struct rb_node node;	/* when node of unstable tree */
  		struct {		/* when listed from stable tree */
  			struct stable_node *head;
  			struct hlist_node hlist;
  		};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
177
178
179
180
  	};
  };
  
  #define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
181
182
  #define UNSTABLE_FLAG	0x100	/* is a node of the unstable tree */
  #define STABLE_FLAG	0x200	/* is listed from the stable tree */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
183
184
  
  /* The stable and unstable tree heads */
ef53d16cd   Hugh Dickins   ksm: allocate roo...
185
186
187
188
  static struct rb_root one_stable_tree[1] = { RB_ROOT };
  static struct rb_root one_unstable_tree[1] = { RB_ROOT };
  static struct rb_root *root_stable_tree = one_stable_tree;
  static struct rb_root *root_unstable_tree = one_unstable_tree;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
189

4146d2d67   Hugh Dickins   ksm: make !merge_...
190
191
  /* Recently migrated nodes of stable tree, pending proper placement */
  static LIST_HEAD(migrate_nodes);
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
192
193
  #define MM_SLOTS_HASH_BITS 10
  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
194
195
196
197
198
199
200
201
202
  
  static struct mm_slot ksm_mm_head = {
  	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
  };
  static struct ksm_scan ksm_scan = {
  	.mm_slot = &ksm_mm_head,
  };
  
  static struct kmem_cache *rmap_item_cache;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
203
  static struct kmem_cache *stable_node_cache;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
204
205
206
  static struct kmem_cache *mm_slot_cache;
  
  /* The number of nodes in the stable tree */
b40282603   Hugh Dickins   ksm: rename kerne...
207
  static unsigned long ksm_pages_shared;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
208

e178dfde3   Hugh Dickins   ksm: move pages_s...
209
  /* The number of page slots additionally sharing those nodes */
b40282603   Hugh Dickins   ksm: rename kerne...
210
  static unsigned long ksm_pages_sharing;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
211

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
212
213
214
215
216
  /* The number of nodes in the unstable tree */
  static unsigned long ksm_pages_unshared;
  
  /* The number of rmap_items in use: to calculate pages_volatile */
  static unsigned long ksm_rmap_items;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
217
  /* Number of pages ksmd should scan in one batch */
2c6854fda   Izik Eidus   ksm: change defau...
218
  static unsigned int ksm_thread_pages_to_scan = 100;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
219
220
  
  /* Milliseconds ksmd should sleep between batches */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
221
  static unsigned int ksm_thread_sleep_millisecs = 20;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
222

e850dcf53   Hugh Dickins   ksm: trivial tidyups
223
  #ifdef CONFIG_NUMA
90bd6fd31   Petr Holasek   ksm: allow trees ...
224
225
  /* Zeroed when merging across nodes is not allowed */
  static unsigned int ksm_merge_across_nodes = 1;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
226
  static int ksm_nr_node_ids = 1;
e850dcf53   Hugh Dickins   ksm: trivial tidyups
227
228
  #else
  #define ksm_merge_across_nodes	1U
ef53d16cd   Hugh Dickins   ksm: allocate roo...
229
  #define ksm_nr_node_ids		1
e850dcf53   Hugh Dickins   ksm: trivial tidyups
230
  #endif
90bd6fd31   Petr Holasek   ksm: allow trees ...
231

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
232
233
234
  #define KSM_RUN_STOP	0
  #define KSM_RUN_MERGE	1
  #define KSM_RUN_UNMERGE	2
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
235
236
237
  #define KSM_RUN_OFFLINE	4
  static unsigned long ksm_run = KSM_RUN_STOP;
  static void wait_while_offlining(void);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
238
239
240
241
242
243
244
245
246
247
248
249
250
251
  
  static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
  static DEFINE_MUTEX(ksm_thread_mutex);
  static DEFINE_SPINLOCK(ksm_mmlist_lock);
  
  #define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
  		sizeof(struct __struct), __alignof__(struct __struct),\
  		(__flags), NULL)
  
  static int __init ksm_slab_init(void)
  {
  	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
  	if (!rmap_item_cache)
  		goto out;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
252
253
254
  	stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
  	if (!stable_node_cache)
  		goto out_free1;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
255
256
  	mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
  	if (!mm_slot_cache)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
257
  		goto out_free2;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
258
259
  
  	return 0;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
260
261
262
  out_free2:
  	kmem_cache_destroy(stable_node_cache);
  out_free1:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
263
264
265
266
267
268
269
270
  	kmem_cache_destroy(rmap_item_cache);
  out:
  	return -ENOMEM;
  }
  
  static void __init ksm_slab_free(void)
  {
  	kmem_cache_destroy(mm_slot_cache);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
271
  	kmem_cache_destroy(stable_node_cache);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
272
273
274
275
276
277
  	kmem_cache_destroy(rmap_item_cache);
  	mm_slot_cache = NULL;
  }
  
  static inline struct rmap_item *alloc_rmap_item(void)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
278
  	struct rmap_item *rmap_item;
5b398e416   zhong jiang   mm,ksm: fix endle...
279
280
  	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
  						__GFP_NORETRY | __GFP_NOWARN);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
281
282
283
  	if (rmap_item)
  		ksm_rmap_items++;
  	return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
284
285
286
287
  }
  
  static inline void free_rmap_item(struct rmap_item *rmap_item)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
288
  	ksm_rmap_items--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
289
290
291
  	rmap_item->mm = NULL;	/* debug safety */
  	kmem_cache_free(rmap_item_cache, rmap_item);
  }
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
292
293
  static inline struct stable_node *alloc_stable_node(void)
  {
6213055f2   zhong jiang   mm,ksm: add __GFP...
294
295
296
297
298
299
  	/*
  	 * The allocation can take too long with GFP_KERNEL when memory is under
  	 * pressure, which may lead to hung task warnings.  Adding __GFP_HIGH
  	 * grants access to memory reserves, helping to avoid this problem.
  	 */
  	return kmem_cache_alloc(stable_node_cache, GFP_KERNEL | __GFP_HIGH);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
300
301
302
303
304
305
  }
  
  static inline void free_stable_node(struct stable_node *stable_node)
  {
  	kmem_cache_free(stable_node_cache, stable_node);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
306
307
308
309
310
311
312
313
314
315
316
  static inline struct mm_slot *alloc_mm_slot(void)
  {
  	if (!mm_slot_cache)	/* initialization failed */
  		return NULL;
  	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
  }
  
  static inline void free_mm_slot(struct mm_slot *mm_slot)
  {
  	kmem_cache_free(mm_slot_cache, mm_slot);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
317
318
  static struct mm_slot *get_mm_slot(struct mm_struct *mm)
  {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
319
  	struct mm_slot *slot;
b67bfe0d4   Sasha Levin   hlist: drop the n...
320
  	hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm)
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
321
322
  		if (slot->mm == mm)
  			return slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
323

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
324
325
326
327
328
329
  	return NULL;
  }
  
  static void insert_to_mm_slots_hash(struct mm_struct *mm,
  				    struct mm_slot *mm_slot)
  {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
330
  	mm_slot->mm = mm;
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
331
  	hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
332
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
333
  /*
a913e182a   Hugh Dickins   ksm: clean up obs...
334
335
336
337
338
339
340
341
342
343
344
345
346
   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
   * page tables after it has passed through ksm_exit() - which, if necessary,
   * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
   * a special flag: they can just back out as soon as mm_users goes to zero.
   * ksm_test_exit() is used throughout to make this test for exit: in some
   * places for correctness, in some places just to avoid unnecessary work.
   */
  static inline bool ksm_test_exit(struct mm_struct *mm)
  {
  	return atomic_read(&mm->mm_users) == 0;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
347
348
   * We use break_ksm to break COW on a ksm page: it's a stripped down
   *
d4edcf0d5   Dave Hansen   mm/gup: Switch al...
349
   *	if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
350
351
352
353
354
355
   *		put_page(page);
   *
   * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
   * in case the application has unmapped and remapped mm,addr meanwhile.
   * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
   * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
1b2ee1266   Dave Hansen   mm/core: Do not e...
356
357
358
359
   *
   * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context
   * of the process that owns 'vma'.  We also do not want to enforce
   * protection keys here anyway.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
360
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
361
  static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
362
363
  {
  	struct page *page;
d952b7913   Hugh Dickins   ksm: fix endless ...
364
  	int ret = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
365
366
367
  
  	do {
  		cond_resched();
1b2ee1266   Dave Hansen   mm/core: Do not e...
368
369
  		page = follow_page(vma, addr,
  				FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
22eccdd7d   Dan Carpenter   ksm: check for ER...
370
  		if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
371
372
  			break;
  		if (PageKsm(page))
dcddffd41   Kirill A. Shutemov   mm: do not pass m...
373
374
  			ret = handle_mm_fault(vma, addr,
  					FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
375
376
377
  		else
  			ret = VM_FAULT_WRITE;
  		put_page(page);
33692f275   Linus Torvalds   vm: add VM_FAULT_...
378
  	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
d952b7913   Hugh Dickins   ksm: fix endless ...
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
  	/*
  	 * We must loop because handle_mm_fault() may back out if there's
  	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
  	 *
  	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
  	 * COW has been broken, even if the vma does not permit VM_WRITE;
  	 * but note that a concurrent fault might break PageKsm for us.
  	 *
  	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
  	 * backing file, which also invalidates anonymous pages: that's
  	 * okay, that truncation will have unmapped the PageKsm for us.
  	 *
  	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
  	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
  	 * current task has TIF_MEMDIE set, and will be OOM killed on return
  	 * to user; and ksmd, having no mm, would never be chosen for that.
  	 *
  	 * But if the mm is in a limited mem_cgroup, then the fault may fail
  	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
  	 * even ksmd can fail in this way - though it's usually breaking ksm
  	 * just to undo a merge it made a moment before, so unlikely to oom.
  	 *
  	 * That's a pity: we might therefore have more kernel pages allocated
  	 * than we're counting as nodes in the stable tree; but ksm_do_scan
  	 * will retry to break_cow on each pass, so should recover the page
  	 * in due course.  The important thing is to not let VM_MERGEABLE
  	 * be cleared while any such pages might remain in the area.
  	 */
  	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
408
  }
ef6942224   Bob Liu   ksm: cleanup: int...
409
410
411
412
413
414
415
416
417
418
419
420
421
  static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
  		unsigned long addr)
  {
  	struct vm_area_struct *vma;
  	if (ksm_test_exit(mm))
  		return NULL;
  	vma = find_vma(mm, addr);
  	if (!vma || vma->vm_start > addr)
  		return NULL;
  	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  		return NULL;
  	return vma;
  }
8dd3557a5   Hugh Dickins   ksm: cleanup some...
422
  static void break_cow(struct rmap_item *rmap_item)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
423
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
424
425
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
426
  	struct vm_area_struct *vma;
4035c07a8   Hugh Dickins   ksm: take keyhole...
427
428
429
430
  	/*
  	 * It is not an accident that whenever we want to break COW
  	 * to undo, we also need to drop a reference to the anon_vma.
  	 */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
431
  	put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
432

81464e306   Hugh Dickins   ksm: five little ...
433
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
434
435
436
  	vma = find_mergeable_vma(mm, addr);
  	if (vma)
  		break_ksm(vma, addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
437
438
439
440
441
442
443
444
445
446
447
  	up_read(&mm->mmap_sem);
  }
  
  static struct page *get_mergeable_page(struct rmap_item *rmap_item)
  {
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
  	struct vm_area_struct *vma;
  	struct page *page;
  
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
448
449
  	vma = find_mergeable_vma(mm, addr);
  	if (!vma)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
450
451
452
  		goto out;
  
  	page = follow_page(vma, addr, FOLL_GET);
22eccdd7d   Dan Carpenter   ksm: check for ER...
453
  	if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
454
  		goto out;
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
455
  	if (PageAnon(page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
456
457
458
459
  		flush_anon_page(vma, page, addr);
  		flush_dcache_page(page);
  	} else {
  		put_page(page);
c8f95ed1a   Andrea Arcangeli   ksm: unstable_tre...
460
461
  out:
  		page = NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
462
463
464
465
  	}
  	up_read(&mm->mmap_sem);
  	return page;
  }
90bd6fd31   Petr Holasek   ksm: allow trees ...
466
467
468
469
470
471
472
473
  /*
   * This helper is used for getting right index into array of tree roots.
   * When merge_across_nodes knob is set to 1, there are only two rb-trees for
   * stable and unstable pages from all nodes with roots in index 0. Otherwise,
   * every node has its own stable and unstable tree.
   */
  static inline int get_kpfn_nid(unsigned long kpfn)
  {
d8fc16a82   Hugh Dickins   ksm: fix m68k bui...
474
  	return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn));
90bd6fd31   Petr Holasek   ksm: allow trees ...
475
  }
4035c07a8   Hugh Dickins   ksm: take keyhole...
476
477
478
  static void remove_node_from_stable_tree(struct stable_node *stable_node)
  {
  	struct rmap_item *rmap_item;
4035c07a8   Hugh Dickins   ksm: take keyhole...
479

b67bfe0d4   Sasha Levin   hlist: drop the n...
480
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
481
482
483
484
  		if (rmap_item->hlist.next)
  			ksm_pages_sharing--;
  		else
  			ksm_pages_shared--;
9e60109f1   Peter Zijlstra   mm: rename drop_a...
485
  		put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
486
487
488
  		rmap_item->address &= PAGE_MASK;
  		cond_resched();
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
489
490
491
492
  	if (stable_node->head == &migrate_nodes)
  		list_del(&stable_node->list);
  	else
  		rb_erase(&stable_node->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
493
  			 root_stable_tree + NUMA(stable_node->nid));
4035c07a8   Hugh Dickins   ksm: take keyhole...
494
495
496
497
498
499
500
501
502
  	free_stable_node(stable_node);
  }
  
  /*
   * get_ksm_page: checks if the page indicated by the stable node
   * is still its ksm page, despite having held no reference to it.
   * In which case we can trust the content of the page, and it
   * returns the gotten page; but if the page has now been zapped,
   * remove the stale node from the stable tree and return NULL.
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
503
   * But beware, the stable node's page might be being migrated.
4035c07a8   Hugh Dickins   ksm: take keyhole...
504
505
506
507
508
509
510
511
512
513
   *
   * You would expect the stable_node to hold a reference to the ksm page.
   * But if it increments the page's count, swapping out has to wait for
   * ksmd to come around again before it can free the page, which may take
   * seconds or even minutes: much too unresponsive.  So instead we use a
   * "keyhole reference": access to the ksm page from the stable node peeps
   * out through its keyhole to see if that page still holds the right key,
   * pointing back to this stable node.  This relies on freeing a PageAnon
   * page to reset its page->mapping to NULL, and relies on no other use of
   * a page to put something that might look like our key in page->mapping.
4035c07a8   Hugh Dickins   ksm: take keyhole...
514
515
   * is on its way to being freed; but it is an anomaly to bear in mind.
   */
8fdb3dbf0   Hugh Dickins   ksm: add some com...
516
  static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
4035c07a8   Hugh Dickins   ksm: take keyhole...
517
518
519
  {
  	struct page *page;
  	void *expected_mapping;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
520
  	unsigned long kpfn;
4035c07a8   Hugh Dickins   ksm: take keyhole...
521

bda807d44   Minchan Kim   mm: migrate: supp...
522
523
  	expected_mapping = (void *)((unsigned long)stable_node |
  					PAGE_MAPPING_KSM);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
524
  again:
4db0c3c29   Jason Low   mm: remove rest o...
525
  	kpfn = READ_ONCE(stable_node->kpfn);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
526
527
528
529
530
531
532
533
  	page = pfn_to_page(kpfn);
  
  	/*
  	 * page is computed from kpfn, so on most architectures reading
  	 * page->mapping is naturally ordered after reading node->kpfn,
  	 * but on Alpha we need to be more careful.
  	 */
  	smp_read_barrier_depends();
4db0c3c29   Jason Low   mm: remove rest o...
534
  	if (READ_ONCE(page->mapping) != expected_mapping)
4035c07a8   Hugh Dickins   ksm: take keyhole...
535
  		goto stale;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
  
  	/*
  	 * We cannot do anything with the page while its refcount is 0.
  	 * Usually 0 means free, or tail of a higher-order page: in which
  	 * case this node is no longer referenced, and should be freed;
  	 * however, it might mean that the page is under page_freeze_refs().
  	 * The __remove_mapping() case is easy, again the node is now stale;
  	 * but if page is swapcache in migrate_page_move_mapping(), it might
  	 * still be our page, in which case it's essential to keep the node.
  	 */
  	while (!get_page_unless_zero(page)) {
  		/*
  		 * Another check for page->mapping != expected_mapping would
  		 * work here too.  We have chosen the !PageSwapCache test to
  		 * optimize the common case, when the page is or is about to
  		 * be freed: PageSwapCache is cleared (under spin_lock_irq)
  		 * in the freeze_refs section of __remove_mapping(); but Anon
  		 * page->mapping reset to NULL later, in free_pages_prepare().
  		 */
  		if (!PageSwapCache(page))
  			goto stale;
  		cpu_relax();
  	}
4db0c3c29   Jason Low   mm: remove rest o...
559
  	if (READ_ONCE(page->mapping) != expected_mapping) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
560
561
562
  		put_page(page);
  		goto stale;
  	}
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
563

8fdb3dbf0   Hugh Dickins   ksm: add some com...
564
  	if (lock_it) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
565
  		lock_page(page);
4db0c3c29   Jason Low   mm: remove rest o...
566
  		if (READ_ONCE(page->mapping) != expected_mapping) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
567
568
569
570
571
  			unlock_page(page);
  			put_page(page);
  			goto stale;
  		}
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
572
  	return page;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
573

4035c07a8   Hugh Dickins   ksm: take keyhole...
574
  stale:
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
575
576
577
578
579
580
581
  	/*
  	 * We come here from above when page->mapping or !PageSwapCache
  	 * suggests that the node is stale; but it might be under migration.
  	 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
  	 * before checking whether node->kpfn has been changed.
  	 */
  	smp_rmb();
4db0c3c29   Jason Low   mm: remove rest o...
582
  	if (READ_ONCE(stable_node->kpfn) != kpfn)
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
583
  		goto again;
4035c07a8   Hugh Dickins   ksm: take keyhole...
584
585
586
  	remove_node_from_stable_tree(stable_node);
  	return NULL;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
587
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
588
589
590
591
592
   * Removing rmap_item from stable or unstable tree.
   * This function will clean the information from the stable/unstable tree.
   */
  static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
  {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
593
594
  	if (rmap_item->address & STABLE_FLAG) {
  		struct stable_node *stable_node;
5ad646880   Hugh Dickins   ksm: let shared p...
595
  		struct page *page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
596

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
597
  		stable_node = rmap_item->head;
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
598
  		page = get_ksm_page(stable_node, true);
4035c07a8   Hugh Dickins   ksm: take keyhole...
599
600
  		if (!page)
  			goto out;
5ad646880   Hugh Dickins   ksm: let shared p...
601

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
602
  		hlist_del(&rmap_item->hlist);
4035c07a8   Hugh Dickins   ksm: take keyhole...
603
604
  		unlock_page(page);
  		put_page(page);
08beca44d   Hugh Dickins   ksm: stable_node ...
605

98666f8a2   Andrea Arcangeli   ksm: use the help...
606
  		if (!hlist_empty(&stable_node->hlist))
4035c07a8   Hugh Dickins   ksm: take keyhole...
607
608
  			ksm_pages_sharing--;
  		else
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
609
  			ksm_pages_shared--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
610

9e60109f1   Peter Zijlstra   mm: rename drop_a...
611
  		put_anon_vma(rmap_item->anon_vma);
93d17715a   Hugh Dickins   ksm: three remove...
612
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
613

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
614
  	} else if (rmap_item->address & UNSTABLE_FLAG) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
615
616
  		unsigned char age;
  		/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
617
  		 * Usually ksmd can and must skip the rb_erase, because
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
618
  		 * root_unstable_tree was already reset to RB_ROOT.
9ba692948   Hugh Dickins   ksm: fix oom dead...
619
620
621
  		 * But be careful when an mm is exiting: do the rb_erase
  		 * if this rmap_item was inserted by this scan, rather
  		 * than left over from before.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
622
623
  		 */
  		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
cd551f975   Hugh Dickins   ksm: distribute r...
624
  		BUG_ON(age > 1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
625
  		if (!age)
90bd6fd31   Petr Holasek   ksm: allow trees ...
626
  			rb_erase(&rmap_item->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
627
  				 root_unstable_tree + NUMA(rmap_item->nid));
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
628
  		ksm_pages_unshared--;
93d17715a   Hugh Dickins   ksm: three remove...
629
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
630
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
631
  out:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
632
633
  	cond_resched();		/* we're called from many long loops */
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
634
  static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
635
  				       struct rmap_item **rmap_list)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
636
  {
6514d511d   Hugh Dickins   ksm: singly-linke...
637
638
639
  	while (*rmap_list) {
  		struct rmap_item *rmap_item = *rmap_list;
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
640
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
641
642
643
644
645
  		free_rmap_item(rmap_item);
  	}
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
646
   * Though it's very tempting to unmerge rmap_items from stable tree rather
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
647
648
649
650
651
   * than check every pte of a given vma, the locking doesn't quite work for
   * that - an rmap_item is assigned to the stable tree after inserting ksm
   * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
   * rmap_items from parent to child at fork time (so as not to waste time
   * if exit comes before the next scan reaches it).
81464e306   Hugh Dickins   ksm: five little ...
652
653
654
655
656
   *
   * Similarly, although we'd like to remove rmap_items (so updating counts
   * and freeing memory) when unmerging an area, it's easier to leave that
   * to the next pass of ksmd - consider, for example, how ksmd might be
   * in cmp_and_merge_page on one of the rmap_items we would be removing.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
657
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
658
659
  static int unmerge_ksm_pages(struct vm_area_struct *vma,
  			     unsigned long start, unsigned long end)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
660
661
  {
  	unsigned long addr;
d952b7913   Hugh Dickins   ksm: fix endless ...
662
  	int err = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
663

d952b7913   Hugh Dickins   ksm: fix endless ...
664
  	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
665
666
  		if (ksm_test_exit(vma->vm_mm))
  			break;
d952b7913   Hugh Dickins   ksm: fix endless ...
667
668
669
670
671
672
  		if (signal_pending(current))
  			err = -ERESTARTSYS;
  		else
  			err = break_ksm(vma, addr);
  	}
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
673
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
674
675
676
677
  #ifdef CONFIG_SYSFS
  /*
   * Only called through the sysfs control interface:
   */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
678
679
680
681
682
683
684
685
686
687
688
689
  static int remove_stable_node(struct stable_node *stable_node)
  {
  	struct page *page;
  	int err;
  
  	page = get_ksm_page(stable_node, true);
  	if (!page) {
  		/*
  		 * get_ksm_page did remove_node_from_stable_tree itself.
  		 */
  		return 0;
  	}
8fdb3dbf0   Hugh Dickins   ksm: add some com...
690
691
692
693
694
  	if (WARN_ON_ONCE(page_mapped(page))) {
  		/*
  		 * This should not happen: but if it does, just refuse to let
  		 * merge_across_nodes be switched - there is no need to panic.
  		 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
695
  		err = -EBUSY;
8fdb3dbf0   Hugh Dickins   ksm: add some com...
696
  	} else {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
697
  		/*
8fdb3dbf0   Hugh Dickins   ksm: add some com...
698
699
700
  		 * The stable node did not yet appear stale to get_ksm_page(),
  		 * since that allows for an unmapped ksm page to be recognized
  		 * right up until it is freed; but the node is safe to remove.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
  		 * This page might be in a pagevec waiting to be freed,
  		 * or it might be PageSwapCache (perhaps under writeback),
  		 * or it might have been removed from swapcache a moment ago.
  		 */
  		set_page_stable_node(page, NULL);
  		remove_node_from_stable_tree(stable_node);
  		err = 0;
  	}
  
  	unlock_page(page);
  	put_page(page);
  	return err;
  }
  
  static int remove_all_stable_nodes(void)
  {
036404183   Geliang Tang   mm/ksm.c: use lis...
717
  	struct stable_node *stable_node, *next;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
718
719
  	int nid;
  	int err = 0;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
720
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
721
722
723
724
725
726
727
728
729
730
  		while (root_stable_tree[nid].rb_node) {
  			stable_node = rb_entry(root_stable_tree[nid].rb_node,
  						struct stable_node, node);
  			if (remove_stable_node(stable_node)) {
  				err = -EBUSY;
  				break;	/* proceed to next nid */
  			}
  			cond_resched();
  		}
  	}
036404183   Geliang Tang   mm/ksm.c: use lis...
731
  	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
732
733
734
735
  		if (remove_stable_node(stable_node))
  			err = -EBUSY;
  		cond_resched();
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
736
737
  	return err;
  }
d952b7913   Hugh Dickins   ksm: fix endless ...
738
  static int unmerge_and_remove_all_rmap_items(void)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
739
740
741
742
  {
  	struct mm_slot *mm_slot;
  	struct mm_struct *mm;
  	struct vm_area_struct *vma;
d952b7913   Hugh Dickins   ksm: fix endless ...
743
744
745
  	int err = 0;
  
  	spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
746
  	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
747
748
  						struct mm_slot, mm_list);
  	spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
749

9ba692948   Hugh Dickins   ksm: fix oom dead...
750
751
  	for (mm_slot = ksm_scan.mm_slot;
  			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
752
753
754
  		mm = mm_slot->mm;
  		down_read(&mm->mmap_sem);
  		for (vma = mm->mmap; vma; vma = vma->vm_next) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
755
756
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
757
758
  			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  				continue;
d952b7913   Hugh Dickins   ksm: fix endless ...
759
760
  			err = unmerge_ksm_pages(vma,
  						vma->vm_start, vma->vm_end);
9ba692948   Hugh Dickins   ksm: fix oom dead...
761
762
  			if (err)
  				goto error;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
763
  		}
9ba692948   Hugh Dickins   ksm: fix oom dead...
764

6514d511d   Hugh Dickins   ksm: singly-linke...
765
  		remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
7496fea9a   Zhou Chengming   ksm: fix conflict...
766
  		up_read(&mm->mmap_sem);
d952b7913   Hugh Dickins   ksm: fix endless ...
767
768
  
  		spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
769
  		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
770
  						struct mm_slot, mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
771
  		if (ksm_test_exit(mm)) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
772
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
773
774
775
776
777
  			list_del(&mm_slot->mm_list);
  			spin_unlock(&ksm_mmlist_lock);
  
  			free_mm_slot(mm_slot);
  			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
778
  			mmdrop(mm);
7496fea9a   Zhou Chengming   ksm: fix conflict...
779
  		} else
9ba692948   Hugh Dickins   ksm: fix oom dead...
780
  			spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
781
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
782
783
  	/* Clean up stable nodes, but don't worry if some are still busy */
  	remove_all_stable_nodes();
d952b7913   Hugh Dickins   ksm: fix endless ...
784
  	ksm_scan.seqnr = 0;
9ba692948   Hugh Dickins   ksm: fix oom dead...
785
786
787
788
  	return 0;
  
  error:
  	up_read(&mm->mmap_sem);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
789
  	spin_lock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
790
  	ksm_scan.mm_slot = &ksm_mm_head;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
791
  	spin_unlock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
792
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
793
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
794
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
795

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
796
797
798
  static u32 calc_checksum(struct page *page)
  {
  	u32 checksum;
9b04c5fec   Cong Wang   mm: remove the se...
799
  	void *addr = kmap_atomic(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
800
  	checksum = jhash2(addr, PAGE_SIZE / 4, 17);
9b04c5fec   Cong Wang   mm: remove the se...
801
  	kunmap_atomic(addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
802
803
804
805
806
807
808
  	return checksum;
  }
  
  static int memcmp_pages(struct page *page1, struct page *page2)
  {
  	char *addr1, *addr2;
  	int ret;
9b04c5fec   Cong Wang   mm: remove the se...
809
810
  	addr1 = kmap_atomic(page1);
  	addr2 = kmap_atomic(page2);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
811
  	ret = memcmp(addr1, addr2, PAGE_SIZE);
9b04c5fec   Cong Wang   mm: remove the se...
812
813
  	kunmap_atomic(addr2);
  	kunmap_atomic(addr1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
  	return ret;
  }
  
  static inline int pages_identical(struct page *page1, struct page *page2)
  {
  	return !memcmp_pages(page1, page2);
  }
  
  static int write_protect_page(struct vm_area_struct *vma, struct page *page,
  			      pte_t *orig_pte)
  {
  	struct mm_struct *mm = vma->vm_mm;
  	unsigned long addr;
  	pte_t *ptep;
  	spinlock_t *ptl;
  	int swapped;
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
831
832
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
833
834
835
836
  
  	addr = page_address_in_vma(page, vma);
  	if (addr == -EFAULT)
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
837
  	BUG_ON(PageTransCompound(page));
6bdb913f0   Haggai Eran   mm: wrap calls to...
838
839
840
841
  
  	mmun_start = addr;
  	mmun_end   = addr + PAGE_SIZE;
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
842
843
  	ptep = page_check_address(page, mm, addr, &ptl, 0);
  	if (!ptep)
6bdb913f0   Haggai Eran   mm: wrap calls to...
844
  		goto out_mn;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
845

4e31635c3   Hugh Dickins   ksm: fix bad user...
846
  	if (pte_write(*ptep) || pte_dirty(*ptep)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
847
848
849
850
851
  		pte_t entry;
  
  		swapped = PageSwapCache(page);
  		flush_cache_page(vma, addr, page_to_pfn(page));
  		/*
25985edce   Lucas De Marchi   Fix common misspe...
852
  		 * Ok this is tricky, when get_user_pages_fast() run it doesn't
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
853
854
855
856
857
858
859
  		 * take any lock, therefore the check that we are going to make
  		 * with the pagecount against the mapcount is racey and
  		 * O_DIRECT can happen right after the check.
  		 * So we clear the pte and flush the tlb before the check
  		 * this assure us that no O_DIRECT can happen after the check
  		 * or in the middle of the check.
  		 */
34ee645e8   Joerg Roedel   mmu_notifier: cal...
860
  		entry = ptep_clear_flush_notify(vma, addr, ptep);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
861
862
863
864
  		/*
  		 * Check that no O_DIRECT or similar I/O is in progress on the
  		 * page
  		 */
31e855ea7   Hugh Dickins   ksm: remove redun...
865
  		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
cb5323751   Robin Holt   mm/ksm.c is doing...
866
  			set_pte_at(mm, addr, ptep, entry);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
867
868
  			goto out_unlock;
  		}
4e31635c3   Hugh Dickins   ksm: fix bad user...
869
870
871
  		if (pte_dirty(entry))
  			set_page_dirty(page);
  		entry = pte_mkclean(pte_wrprotect(entry));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
872
873
874
875
876
877
878
  		set_pte_at_notify(mm, addr, ptep, entry);
  	}
  	*orig_pte = *ptep;
  	err = 0;
  
  out_unlock:
  	pte_unmap_unlock(ptep, ptl);
6bdb913f0   Haggai Eran   mm: wrap calls to...
879
880
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
881
882
883
884
885
886
  out:
  	return err;
  }
  
  /**
   * replace_page - replace page in vma by new ksm page
8dd3557a5   Hugh Dickins   ksm: cleanup some...
887
888
889
   * @vma:      vma that holds the pte pointing to page
   * @page:     the page we are replacing by kpage
   * @kpage:    the ksm page we replace page by
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
890
891
892
893
   * @orig_pte: the original value of the pte
   *
   * Returns 0 on success, -EFAULT on failure.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
894
895
  static int replace_page(struct vm_area_struct *vma, struct page *page,
  			struct page *kpage, pte_t orig_pte)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
896
897
  {
  	struct mm_struct *mm = vma->vm_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
898
899
900
901
  	pmd_t *pmd;
  	pte_t *ptep;
  	spinlock_t *ptl;
  	unsigned long addr;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
902
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
903
904
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
905

8dd3557a5   Hugh Dickins   ksm: cleanup some...
906
  	addr = page_address_in_vma(page, vma);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
907
908
  	if (addr == -EFAULT)
  		goto out;
6219049ae   Bob Liu   mm: introduce mm_...
909
910
  	pmd = mm_find_pmd(mm, addr);
  	if (!pmd)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
911
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
912

6bdb913f0   Haggai Eran   mm: wrap calls to...
913
914
915
  	mmun_start = addr;
  	mmun_end   = addr + PAGE_SIZE;
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
916
917
918
  	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
  	if (!pte_same(*ptep, orig_pte)) {
  		pte_unmap_unlock(ptep, ptl);
6bdb913f0   Haggai Eran   mm: wrap calls to...
919
  		goto out_mn;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
920
  	}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
921
  	get_page(kpage);
d281ee614   Kirill A. Shutemov   rmap: add argumen...
922
  	page_add_anon_rmap(kpage, vma, addr, false);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
923
924
  
  	flush_cache_page(vma, addr, pte_pfn(*ptep));
34ee645e8   Joerg Roedel   mmu_notifier: cal...
925
  	ptep_clear_flush_notify(vma, addr, ptep);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
926
  	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
927

d281ee614   Kirill A. Shutemov   rmap: add argumen...
928
  	page_remove_rmap(page, false);
ae52a2adb   Hugh Dickins   thp: ksm: free sw...
929
930
  	if (!page_mapped(page))
  		try_to_free_swap(page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
931
  	put_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
932
933
934
  
  	pte_unmap_unlock(ptep, ptl);
  	err = 0;
6bdb913f0   Haggai Eran   mm: wrap calls to...
935
936
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
937
938
939
940
941
942
  out:
  	return err;
  }
  
  /*
   * try_to_merge_one_page - take two pages and merge them into one
8dd3557a5   Hugh Dickins   ksm: cleanup some...
943
944
   * @vma: the vma that holds the pte pointing to page
   * @page: the PageAnon page that we want to replace with kpage
80e148226   Hugh Dickins   ksm: share anon p...
945
946
   * @kpage: the PageKsm page that we want to map instead of page,
   *         or NULL the first time when we want to use page as kpage.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
947
948
949
950
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
   */
  static int try_to_merge_one_page(struct vm_area_struct *vma,
8dd3557a5   Hugh Dickins   ksm: cleanup some...
951
  				 struct page *page, struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
952
953
954
  {
  	pte_t orig_pte = __pte(0);
  	int err = -EFAULT;
db114b83a   Hugh Dickins   ksm: hold anon_vm...
955
956
  	if (page == kpage)			/* ksm page forked */
  		return 0;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
957
  	if (!PageAnon(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
958
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
959
960
961
962
963
964
965
  	/*
  	 * We need the page lock to read a stable PageSwapCache in
  	 * write_protect_page().  We use trylock_page() instead of
  	 * lock_page() because we don't want to wait here - we
  	 * prefer to continue scanning and merging different pages,
  	 * then come back to this page when it is unlocked.
  	 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
966
  	if (!trylock_page(page))
31e855ea7   Hugh Dickins   ksm: remove redun...
967
  		goto out;
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
968
969
970
971
972
973
  
  	if (PageTransCompound(page)) {
  		err = split_huge_page(page);
  		if (err)
  			goto out_unlock;
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
974
975
976
977
978
979
  	/*
  	 * If this anonymous page is mapped only here, its pte may need
  	 * to be write-protected.  If it's mapped elsewhere, all of its
  	 * ptes are necessarily already write-protected.  But in either
  	 * case, we need to lock and check page_count is not raised.
  	 */
80e148226   Hugh Dickins   ksm: share anon p...
980
981
982
983
984
985
986
987
988
  	if (write_protect_page(vma, page, &orig_pte) == 0) {
  		if (!kpage) {
  			/*
  			 * While we hold page lock, upgrade page from
  			 * PageAnon+anon_vma to PageKsm+NULL stable_node:
  			 * stable_tree_insert() will update stable_node.
  			 */
  			set_page_stable_node(page, NULL);
  			mark_page_accessed(page);
337ed7eb5   Minchan Kim   mm/ksm.c: mark st...
989
990
991
992
993
994
  			/*
  			 * Page reclaim just frees a clean page with no dirty
  			 * ptes: make sure that the ksm page would be swapped.
  			 */
  			if (!PageDirty(page))
  				SetPageDirty(page);
80e148226   Hugh Dickins   ksm: share anon p...
995
996
997
998
  			err = 0;
  		} else if (pages_identical(page, kpage))
  			err = replace_page(vma, page, kpage, orig_pte);
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
999

80e148226   Hugh Dickins   ksm: share anon p...
1000
  	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
73848b468   Hugh Dickins   ksm: fix mlockfre...
1001
  		munlock_vma_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1002
1003
  		if (!PageMlocked(kpage)) {
  			unlock_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1004
1005
1006
1007
1008
  			lock_page(kpage);
  			mlock_vma_page(kpage);
  			page = kpage;		/* for final unlock */
  		}
  	}
73848b468   Hugh Dickins   ksm: fix mlockfre...
1009

f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1010
  out_unlock:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1011
  	unlock_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1012
1013
1014
1015
1016
  out:
  	return err;
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1017
1018
   * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
   * but no new kernel page is allocated: kpage must already be a ksm page.
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1019
1020
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
81464e306   Hugh Dickins   ksm: five little ...
1021
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1022
1023
  static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
  				      struct page *page, struct page *kpage)
81464e306   Hugh Dickins   ksm: five little ...
1024
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1025
  	struct mm_struct *mm = rmap_item->mm;
81464e306   Hugh Dickins   ksm: five little ...
1026
1027
  	struct vm_area_struct *vma;
  	int err = -EFAULT;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1028
  	down_read(&mm->mmap_sem);
85c6e8dd2   Andrea Arcangeli   ksm: use find_mer...
1029
1030
  	vma = find_mergeable_vma(mm, rmap_item->address);
  	if (!vma)
81464e306   Hugh Dickins   ksm: five little ...
1031
  		goto out;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1032
  	err = try_to_merge_one_page(vma, page, kpage);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1033
1034
  	if (err)
  		goto out;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
1035
1036
  	/* Unstable nid is in union with stable anon_vma: remove first */
  	remove_rmap_item_from_tree(rmap_item);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1037
  	/* Must get reference to anon_vma while still holding mmap_sem */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1038
1039
  	rmap_item->anon_vma = vma->anon_vma;
  	get_anon_vma(vma->anon_vma);
81464e306   Hugh Dickins   ksm: five little ...
1040
  out:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1041
  	up_read(&mm->mmap_sem);
81464e306   Hugh Dickins   ksm: five little ...
1042
1043
1044
1045
  	return err;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1046
1047
1048
   * try_to_merge_two_pages - take two identical pages and prepare them
   * to be merged into one page.
   *
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1049
1050
   * This function returns the kpage if we successfully merged two identical
   * pages into one ksm page, NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1051
   *
80e148226   Hugh Dickins   ksm: share anon p...
1052
   * Note that this function upgrades page to ksm page: if one of the pages
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1053
1054
   * is already a ksm page, try_to_merge_with_ksm_page should be used.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1055
1056
1057
1058
  static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
  					   struct page *page,
  					   struct rmap_item *tree_rmap_item,
  					   struct page *tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1059
  {
80e148226   Hugh Dickins   ksm: share anon p...
1060
  	int err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1061

80e148226   Hugh Dickins   ksm: share anon p...
1062
  	err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1063
  	if (!err) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1064
  		err = try_to_merge_with_ksm_page(tree_rmap_item,
80e148226   Hugh Dickins   ksm: share anon p...
1065
  							tree_page, page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1066
  		/*
81464e306   Hugh Dickins   ksm: five little ...
1067
1068
  		 * If that fails, we have a ksm page with only one pte
  		 * pointing to it: so break it.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1069
  		 */
4035c07a8   Hugh Dickins   ksm: take keyhole...
1070
  		if (err)
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1071
  			break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1072
  	}
80e148226   Hugh Dickins   ksm: share anon p...
1073
  	return err ? NULL : page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1074
1075
1076
  }
  
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1077
   * stable_tree_search - search for page inside the stable tree
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1078
1079
1080
1081
   *
   * This function checks if there is a page inside the stable tree
   * with identical content to the page that we are scanning right now.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1082
   * This function returns the stable tree node of identical content if found,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1083
1084
   * NULL otherwise.
   */
62b61f611   Hugh Dickins   ksm: memory hotre...
1085
  static struct page *stable_tree_search(struct page *page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1086
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1087
  	int nid;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1088
  	struct rb_root *root;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1089
1090
1091
1092
  	struct rb_node **new;
  	struct rb_node *parent;
  	struct stable_node *stable_node;
  	struct stable_node *page_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1093

4146d2d67   Hugh Dickins   ksm: make !merge_...
1094
1095
1096
  	page_node = page_stable_node(page);
  	if (page_node && page_node->head != &migrate_nodes) {
  		/* ksm page forked */
08beca44d   Hugh Dickins   ksm: stable_node ...
1097
  		get_page(page);
62b61f611   Hugh Dickins   ksm: memory hotre...
1098
  		return page;
08beca44d   Hugh Dickins   ksm: stable_node ...
1099
  	}
90bd6fd31   Petr Holasek   ksm: allow trees ...
1100
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1101
  	root = root_stable_tree + nid;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1102
  again:
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1103
  	new = &root->rb_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1104
  	parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1105

4146d2d67   Hugh Dickins   ksm: make !merge_...
1106
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1107
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1108
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1109
  		cond_resched();
4146d2d67   Hugh Dickins   ksm: make !merge_...
1110
  		stable_node = rb_entry(*new, struct stable_node, node);
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
1111
  		tree_page = get_ksm_page(stable_node, false);
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
  		if (!tree_page) {
  			/*
  			 * If we walked over a stale stable_node,
  			 * get_ksm_page() will call rb_erase() and it
  			 * may rebalance the tree from under us. So
  			 * restart the search from scratch. Returning
  			 * NULL would be safe too, but we'd generate
  			 * false negative insertions just because some
  			 * stable_node was stale.
  			 */
  			goto again;
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1124

4035c07a8   Hugh Dickins   ksm: take keyhole...
1125
  		ret = memcmp_pages(page, tree_page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1126
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1127

4146d2d67   Hugh Dickins   ksm: make !merge_...
1128
  		parent = *new;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1129
  		if (ret < 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1130
  			new = &parent->rb_left;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1131
  		else if (ret > 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1132
  			new = &parent->rb_right;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1133
1134
1135
1136
1137
1138
1139
1140
1141
  		else {
  			/*
  			 * Lock and unlock the stable_node's page (which
  			 * might already have been migrated) so that page
  			 * migration is sure to notice its raised count.
  			 * It would be more elegant to return stable_node
  			 * than kpage, but that involves more changes.
  			 */
  			tree_page = get_ksm_page(stable_node, true);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1142
  			if (tree_page) {
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1143
  				unlock_page(tree_page);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
  				if (get_kpfn_nid(stable_node->kpfn) !=
  						NUMA(stable_node->nid)) {
  					put_page(tree_page);
  					goto replace;
  				}
  				return tree_page;
  			}
  			/*
  			 * There is now a place for page_node, but the tree may
  			 * have been rebalanced, so re-evaluate parent and new.
  			 */
  			if (page_node)
  				goto again;
  			return NULL;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1158
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1159
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1160
1161
1162
1163
1164
1165
  	if (!page_node)
  		return NULL;
  
  	list_del(&page_node->list);
  	DO_NUMA(page_node->nid = nid);
  	rb_link_node(&page_node->node, parent, new);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1166
  	rb_insert_color(&page_node->node, root);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1167
1168
1169
1170
1171
1172
1173
  	get_page(page);
  	return page;
  
  replace:
  	if (page_node) {
  		list_del(&page_node->list);
  		DO_NUMA(page_node->nid = nid);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1174
  		rb_replace_node(&stable_node->node, &page_node->node, root);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1175
1176
  		get_page(page);
  	} else {
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1177
  		rb_erase(&stable_node->node, root);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1178
1179
1180
1181
1182
  		page = NULL;
  	}
  	stable_node->head = &migrate_nodes;
  	list_add(&stable_node->list, stable_node->head);
  	return page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1183
1184
1185
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1186
   * stable_tree_insert - insert stable tree node pointing to new ksm page
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1187
1188
   * into the stable tree.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1189
1190
   * This function returns the stable tree node just allocated on success,
   * NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1191
   */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1192
  static struct stable_node *stable_tree_insert(struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1193
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1194
1195
  	int nid;
  	unsigned long kpfn;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1196
  	struct rb_root *root;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1197
  	struct rb_node **new;
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1198
  	struct rb_node *parent;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1199
  	struct stable_node *stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1200

90bd6fd31   Petr Holasek   ksm: allow trees ...
1201
1202
  	kpfn = page_to_pfn(kpage);
  	nid = get_kpfn_nid(kpfn);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1203
  	root = root_stable_tree + nid;
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1204
1205
  again:
  	parent = NULL;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1206
  	new = &root->rb_node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1207

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1208
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1209
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1210
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1211
  		cond_resched();
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1212
  		stable_node = rb_entry(*new, struct stable_node, node);
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
1213
  		tree_page = get_ksm_page(stable_node, false);
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
  		if (!tree_page) {
  			/*
  			 * If we walked over a stale stable_node,
  			 * get_ksm_page() will call rb_erase() and it
  			 * may rebalance the tree from under us. So
  			 * restart the search from scratch. Returning
  			 * NULL would be safe too, but we'd generate
  			 * false negative insertions just because some
  			 * stable_node was stale.
  			 */
  			goto again;
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1226

4035c07a8   Hugh Dickins   ksm: take keyhole...
1227
1228
  		ret = memcmp_pages(kpage, tree_page);
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
  
  		parent = *new;
  		if (ret < 0)
  			new = &parent->rb_left;
  		else if (ret > 0)
  			new = &parent->rb_right;
  		else {
  			/*
  			 * It is not a bug that stable_tree_search() didn't
  			 * find this node: because at that time our page was
  			 * not yet write-protected, so may have changed since.
  			 */
  			return NULL;
  		}
  	}
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1244
1245
1246
  	stable_node = alloc_stable_node();
  	if (!stable_node)
  		return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1247

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1248
  	INIT_HLIST_HEAD(&stable_node->hlist);
90bd6fd31   Petr Holasek   ksm: allow trees ...
1249
  	stable_node->kpfn = kpfn;
08beca44d   Hugh Dickins   ksm: stable_node ...
1250
  	set_page_stable_node(kpage, stable_node);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1251
  	DO_NUMA(stable_node->nid = nid);
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1252
  	rb_link_node(&stable_node->node, parent, new);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1253
  	rb_insert_color(&stable_node->node, root);
08beca44d   Hugh Dickins   ksm: stable_node ...
1254

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1255
  	return stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1256
1257
1258
  }
  
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1259
1260
   * unstable_tree_search_insert - search for identical page,
   * else insert rmap_item into the unstable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
   *
   * This function searches for a page in the unstable tree identical to the
   * page currently being scanned; and if no identical page is found in the
   * tree, we insert rmap_item as a new object into the unstable tree.
   *
   * This function returns pointer to rmap_item found to be identical
   * to the currently scanned page, NULL otherwise.
   *
   * This function does both searching and inserting, because they share
   * the same walking algorithm in an rbtree.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1272
1273
1274
1275
  static
  struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
  					      struct page *page,
  					      struct page **tree_pagep)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1276
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1277
1278
  	struct rb_node **new;
  	struct rb_root *root;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1279
  	struct rb_node *parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1280
1281
1282
  	int nid;
  
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1283
  	root = root_unstable_tree + nid;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1284
  	new = &root->rb_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1285
1286
1287
  
  	while (*new) {
  		struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1288
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1289
  		int ret;
d178f27fc   Hugh Dickins   ksm: cond_resched...
1290
  		cond_resched();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1291
  		tree_rmap_item = rb_entry(*new, struct rmap_item, node);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1292
  		tree_page = get_mergeable_page(tree_rmap_item);
c8f95ed1a   Andrea Arcangeli   ksm: unstable_tre...
1293
  		if (!tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1294
1295
1296
  			return NULL;
  
  		/*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1297
  		 * Don't substitute a ksm page for a forked page.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1298
  		 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1299
1300
  		if (page == tree_page) {
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1301
1302
  			return NULL;
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1303
  		ret = memcmp_pages(page, tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1304
1305
1306
  
  		parent = *new;
  		if (ret < 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1307
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1308
1309
  			new = &parent->rb_left;
  		} else if (ret > 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1310
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1311
  			new = &parent->rb_right;
b599cbdf1   Hugh Dickins   ksm: treat unstab...
1312
1313
1314
1315
1316
1317
1318
1319
1320
  		} else if (!ksm_merge_across_nodes &&
  			   page_to_nid(tree_page) != nid) {
  			/*
  			 * If tree_page has been migrated to another NUMA node,
  			 * it will be flushed out and put in the right unstable
  			 * tree next time: only merge with it when across_nodes.
  			 */
  			put_page(tree_page);
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1321
  		} else {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1322
  			*tree_pagep = tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1323
1324
1325
  			return tree_rmap_item;
  		}
  	}
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1326
  	rmap_item->address |= UNSTABLE_FLAG;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1327
  	rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1328
  	DO_NUMA(rmap_item->nid = nid);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1329
  	rb_link_node(&rmap_item->node, parent, new);
90bd6fd31   Petr Holasek   ksm: allow trees ...
1330
  	rb_insert_color(&rmap_item->node, root);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1331

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
1332
  	ksm_pages_unshared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1333
1334
1335
1336
1337
1338
1339
1340
1341
  	return NULL;
  }
  
  /*
   * stable_tree_append - add another rmap_item to the linked list of
   * rmap_items hanging off a given node of the stable tree, all sharing
   * the same ksm page.
   */
  static void stable_tree_append(struct rmap_item *rmap_item,
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1342
  			       struct stable_node *stable_node)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1343
  {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1344
  	rmap_item->head = stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1345
  	rmap_item->address |= STABLE_FLAG;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1346
  	hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
e178dfde3   Hugh Dickins   ksm: move pages_s...
1347

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1348
1349
1350
1351
  	if (rmap_item->hlist.next)
  		ksm_pages_sharing++;
  	else
  		ksm_pages_shared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1352
1353
1354
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1355
1356
1357
1358
   * cmp_and_merge_page - first see if page can be merged into the stable tree;
   * if not, compare checksum to previous and if it's the same, see if page can
   * be inserted into the unstable tree, or merged with a page already there and
   * both transferred to the stable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1359
1360
1361
1362
1363
1364
   *
   * @page: the page that we are searching identical page to.
   * @rmap_item: the reverse mapping into the virtual address of this page
   */
  static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
  {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1365
  	struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1366
  	struct page *tree_page = NULL;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1367
  	struct stable_node *stable_node;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1368
  	struct page *kpage;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1369
1370
  	unsigned int checksum;
  	int err;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1371
1372
1373
1374
1375
  	stable_node = page_stable_node(page);
  	if (stable_node) {
  		if (stable_node->head != &migrate_nodes &&
  		    get_kpfn_nid(stable_node->kpfn) != NUMA(stable_node->nid)) {
  			rb_erase(&stable_node->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1376
  				 root_stable_tree + NUMA(stable_node->nid));
4146d2d67   Hugh Dickins   ksm: make !merge_...
1377
1378
1379
1380
1381
1382
1383
  			stable_node->head = &migrate_nodes;
  			list_add(&stable_node->list, stable_node->head);
  		}
  		if (stable_node->head != &migrate_nodes &&
  		    rmap_item->head == stable_node)
  			return;
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1384
1385
  
  	/* We first start with searching the page inside the stable tree */
62b61f611   Hugh Dickins   ksm: memory hotre...
1386
  	kpage = stable_tree_search(page);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1387
1388
1389
1390
1391
1392
  	if (kpage == page && rmap_item->head == stable_node) {
  		put_page(kpage);
  		return;
  	}
  
  	remove_rmap_item_from_tree(rmap_item);
62b61f611   Hugh Dickins   ksm: memory hotre...
1393
  	if (kpage) {
08beca44d   Hugh Dickins   ksm: stable_node ...
1394
  		err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1395
1396
1397
1398
1399
  		if (!err) {
  			/*
  			 * The page was successfully merged:
  			 * add its rmap_item to the stable tree.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
1400
  			lock_page(kpage);
62b61f611   Hugh Dickins   ksm: memory hotre...
1401
  			stable_tree_append(rmap_item, page_stable_node(kpage));
5ad646880   Hugh Dickins   ksm: let shared p...
1402
  			unlock_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1403
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1404
  		put_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1405
1406
1407
1408
  		return;
  	}
  
  	/*
4035c07a8   Hugh Dickins   ksm: take keyhole...
1409
1410
1411
1412
  	 * If the hash value of the page has changed from the last time
  	 * we calculated it, this page is changing frequently: therefore we
  	 * don't want to insert it in the unstable tree, and we don't want
  	 * to waste our time searching for something identical to it there.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1413
1414
1415
1416
1417
1418
  	 */
  	checksum = calc_checksum(page);
  	if (rmap_item->oldchecksum != checksum) {
  		rmap_item->oldchecksum = checksum;
  		return;
  	}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1419
1420
  	tree_rmap_item =
  		unstable_tree_search_insert(rmap_item, page, &tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1421
  	if (tree_rmap_item) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1422
1423
1424
  		kpage = try_to_merge_two_pages(rmap_item, page,
  						tree_rmap_item, tree_page);
  		put_page(tree_page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1425
  		if (kpage) {
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
1426
1427
1428
1429
  			/*
  			 * The pages were successfully merged: insert new
  			 * node in the stable tree and add both rmap_items.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
1430
  			lock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1431
1432
1433
1434
1435
  			stable_node = stable_tree_insert(kpage);
  			if (stable_node) {
  				stable_tree_append(tree_rmap_item, stable_node);
  				stable_tree_append(rmap_item, stable_node);
  			}
5ad646880   Hugh Dickins   ksm: let shared p...
1436
  			unlock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1437

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1438
1439
1440
1441
1442
1443
  			/*
  			 * If we fail to insert the page into the stable tree,
  			 * we will have 2 virtual addresses that are pointing
  			 * to a ksm page left outside the stable tree,
  			 * in which case we need to break_cow on both.
  			 */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1444
  			if (!stable_node) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1445
1446
  				break_cow(tree_rmap_item);
  				break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1447
1448
  			}
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1449
1450
1451
1452
  	}
  }
  
  static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
1453
  					    struct rmap_item **rmap_list,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1454
1455
1456
  					    unsigned long addr)
  {
  	struct rmap_item *rmap_item;
6514d511d   Hugh Dickins   ksm: singly-linke...
1457
1458
  	while (*rmap_list) {
  		rmap_item = *rmap_list;
93d17715a   Hugh Dickins   ksm: three remove...
1459
  		if ((rmap_item->address & PAGE_MASK) == addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1460
  			return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1461
1462
  		if (rmap_item->address > addr)
  			break;
6514d511d   Hugh Dickins   ksm: singly-linke...
1463
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1464
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1465
1466
1467
1468
1469
1470
1471
1472
  		free_rmap_item(rmap_item);
  	}
  
  	rmap_item = alloc_rmap_item();
  	if (rmap_item) {
  		/* It has already been zeroed */
  		rmap_item->mm = mm_slot->mm;
  		rmap_item->address = addr;
6514d511d   Hugh Dickins   ksm: singly-linke...
1473
1474
  		rmap_item->rmap_list = *rmap_list;
  		*rmap_list = rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
  	}
  	return rmap_item;
  }
  
  static struct rmap_item *scan_get_next_rmap_item(struct page **page)
  {
  	struct mm_struct *mm;
  	struct mm_slot *slot;
  	struct vm_area_struct *vma;
  	struct rmap_item *rmap_item;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1485
  	int nid;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1486
1487
1488
1489
1490
1491
  
  	if (list_empty(&ksm_mm_head.mm_list))
  		return NULL;
  
  	slot = ksm_scan.mm_slot;
  	if (slot == &ksm_mm_head) {
2919bfd07   Hugh Dickins   ksm: drain pageve...
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
  		/*
  		 * A number of pages can hang around indefinitely on per-cpu
  		 * pagevecs, raised page count preventing write_protect_page
  		 * from merging them.  Though it doesn't really matter much,
  		 * it is puzzling to see some stuck in pages_volatile until
  		 * other activity jostles them out, and they also prevented
  		 * LTP's KSM test from succeeding deterministically; so drain
  		 * them here (here rather than on entry to ksm_do_scan(),
  		 * so we don't IPI too often when pages_to_scan is set low).
  		 */
  		lru_add_drain_all();
4146d2d67   Hugh Dickins   ksm: make !merge_...
1503
1504
1505
1506
1507
1508
1509
  		/*
  		 * Whereas stale stable_nodes on the stable_tree itself
  		 * get pruned in the regular course of stable_tree_search(),
  		 * those moved out to the migrate_nodes list can accumulate:
  		 * so prune them once before each full scan.
  		 */
  		if (!ksm_merge_across_nodes) {
036404183   Geliang Tang   mm/ksm.c: use lis...
1510
  			struct stable_node *stable_node, *next;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1511
  			struct page *page;
036404183   Geliang Tang   mm/ksm.c: use lis...
1512
1513
  			list_for_each_entry_safe(stable_node, next,
  						 &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
1514
1515
1516
1517
1518
1519
  				page = get_ksm_page(stable_node, false);
  				if (page)
  					put_page(page);
  				cond_resched();
  			}
  		}
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1520
  		for (nid = 0; nid < ksm_nr_node_ids; nid++)
90bd6fd31   Petr Holasek   ksm: allow trees ...
1521
  			root_unstable_tree[nid] = RB_ROOT;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1522
1523
1524
1525
1526
  
  		spin_lock(&ksm_mmlist_lock);
  		slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
  		ksm_scan.mm_slot = slot;
  		spin_unlock(&ksm_mmlist_lock);
2b472611a   Hugh Dickins   ksm: fix NULL poi...
1527
1528
1529
1530
1531
1532
  		/*
  		 * Although we tested list_empty() above, a racing __ksm_exit
  		 * of the last mm on the list may have removed it since then.
  		 */
  		if (slot == &ksm_mm_head)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1533
1534
  next_mm:
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
1535
  		ksm_scan.rmap_list = &slot->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1536
1537
1538
1539
  	}
  
  	mm = slot->mm;
  	down_read(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1540
1541
1542
1543
1544
1545
  	if (ksm_test_exit(mm))
  		vma = NULL;
  	else
  		vma = find_vma(mm, ksm_scan.address);
  
  	for (; vma; vma = vma->vm_next) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1546
1547
1548
1549
1550
1551
1552
1553
  		if (!(vma->vm_flags & VM_MERGEABLE))
  			continue;
  		if (ksm_scan.address < vma->vm_start)
  			ksm_scan.address = vma->vm_start;
  		if (!vma->anon_vma)
  			ksm_scan.address = vma->vm_end;
  
  		while (ksm_scan.address < vma->vm_end) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
1554
1555
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1556
  			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
21ae5b017   Andrea Arcangeli   thp: skip transhu...
1557
1558
1559
1560
1561
  			if (IS_ERR_OR_NULL(*page)) {
  				ksm_scan.address += PAGE_SIZE;
  				cond_resched();
  				continue;
  			}
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1562
  			if (PageAnon(*page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1563
1564
1565
  				flush_anon_page(vma, *page, ksm_scan.address);
  				flush_dcache_page(*page);
  				rmap_item = get_next_rmap_item(slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
1566
  					ksm_scan.rmap_list, ksm_scan.address);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1567
  				if (rmap_item) {
6514d511d   Hugh Dickins   ksm: singly-linke...
1568
1569
  					ksm_scan.rmap_list =
  							&rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1570
1571
1572
1573
1574
1575
  					ksm_scan.address += PAGE_SIZE;
  				} else
  					put_page(*page);
  				up_read(&mm->mmap_sem);
  				return rmap_item;
  			}
21ae5b017   Andrea Arcangeli   thp: skip transhu...
1576
  			put_page(*page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1577
1578
1579
1580
  			ksm_scan.address += PAGE_SIZE;
  			cond_resched();
  		}
  	}
9ba692948   Hugh Dickins   ksm: fix oom dead...
1581
1582
  	if (ksm_test_exit(mm)) {
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
1583
  		ksm_scan.rmap_list = &slot->rmap_list;
9ba692948   Hugh Dickins   ksm: fix oom dead...
1584
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1585
1586
1587
1588
  	/*
  	 * Nuke all the rmap_items that are above this current rmap:
  	 * because there were no VM_MERGEABLE vmas with such addresses.
  	 */
6514d511d   Hugh Dickins   ksm: singly-linke...
1589
  	remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1590
1591
  
  	spin_lock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
1592
1593
1594
1595
1596
1597
1598
  	ksm_scan.mm_slot = list_entry(slot->mm_list.next,
  						struct mm_slot, mm_list);
  	if (ksm_scan.address == 0) {
  		/*
  		 * We've completed a full scan of all vmas, holding mmap_sem
  		 * throughout, and found no VM_MERGEABLE: so do the same as
  		 * __ksm_exit does to remove this mm from all our lists now.
9ba692948   Hugh Dickins   ksm: fix oom dead...
1599
1600
1601
1602
  		 * This applies either when cleaning up after __ksm_exit
  		 * (but beware: we can reach here even before __ksm_exit),
  		 * or when all VM_MERGEABLE areas have been unmapped (and
  		 * mmap_sem then protects against race with MADV_MERGEABLE).
cd551f975   Hugh Dickins   ksm: distribute r...
1603
  		 */
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
1604
  		hash_del(&slot->link);
cd551f975   Hugh Dickins   ksm: distribute r...
1605
  		list_del(&slot->mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1606
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
1607
1608
  		free_mm_slot(slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1609
1610
1611
  		up_read(&mm->mmap_sem);
  		mmdrop(mm);
  	} else {
9ba692948   Hugh Dickins   ksm: fix oom dead...
1612
  		up_read(&mm->mmap_sem);
7496fea9a   Zhou Chengming   ksm: fix conflict...
1613
1614
1615
1616
1617
1618
1619
1620
  		/*
  		 * up_read(&mm->mmap_sem) first because after
  		 * spin_unlock(&ksm_mmlist_lock) run, the "mm" may
  		 * already have been freed under us by __ksm_exit()
  		 * because the "mm_slot" is still hashed and
  		 * ksm_scan.mm_slot doesn't point to it anymore.
  		 */
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
1621
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1622
1623
  
  	/* Repeat until we've completed scanning the whole list */
cd551f975   Hugh Dickins   ksm: distribute r...
1624
  	slot = ksm_scan.mm_slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1625
1626
  	if (slot != &ksm_mm_head)
  		goto next_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
  	ksm_scan.seqnr++;
  	return NULL;
  }
  
  /**
   * ksm_do_scan  - the ksm scanner main worker function.
   * @scan_npages - number of pages we want to scan before we return.
   */
  static void ksm_do_scan(unsigned int scan_npages)
  {
  	struct rmap_item *rmap_item;
22eccdd7d   Dan Carpenter   ksm: check for ER...
1638
  	struct page *uninitialized_var(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1639

878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1640
  	while (scan_npages-- && likely(!freezing(current))) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1641
1642
1643
1644
  		cond_resched();
  		rmap_item = scan_get_next_rmap_item(&page);
  		if (!rmap_item)
  			return;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1645
  		cmp_and_merge_page(page, rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1646
1647
1648
  		put_page(page);
  	}
  }
6e1583842   Hugh Dickins   ksm: keep quiet w...
1649
1650
1651
1652
  static int ksmd_should_run(void)
  {
  	return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1653
1654
  static int ksm_scan_thread(void *nothing)
  {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1655
  	set_freezable();
339aa6246   Izik Eidus   ksm: change ksm n...
1656
  	set_user_nice(current, 5);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1657
1658
  
  	while (!kthread_should_stop()) {
6e1583842   Hugh Dickins   ksm: keep quiet w...
1659
  		mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1660
  		wait_while_offlining();
6e1583842   Hugh Dickins   ksm: keep quiet w...
1661
  		if (ksmd_should_run())
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1662
  			ksm_do_scan(ksm_thread_pages_to_scan);
6e1583842   Hugh Dickins   ksm: keep quiet w...
1663
  		mutex_unlock(&ksm_thread_mutex);
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1664
  		try_to_freeze();
6e1583842   Hugh Dickins   ksm: keep quiet w...
1665
  		if (ksmd_should_run()) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1666
1667
1668
  			schedule_timeout_interruptible(
  				msecs_to_jiffies(ksm_thread_sleep_millisecs));
  		} else {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
1669
  			wait_event_freezable(ksm_thread_wait,
6e1583842   Hugh Dickins   ksm: keep quiet w...
1670
  				ksmd_should_run() || kthread_should_stop());
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1671
1672
1673
1674
  		}
  	}
  	return 0;
  }
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1675
1676
1677
1678
  int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
  		unsigned long end, int advice, unsigned long *vm_flags)
  {
  	struct mm_struct *mm = vma->vm_mm;
d952b7913   Hugh Dickins   ksm: fix endless ...
1679
  	int err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1680
1681
1682
1683
1684
1685
1686
1687
  
  	switch (advice) {
  	case MADV_MERGEABLE:
  		/*
  		 * Be somewhat over-protective for now!
  		 */
  		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
  				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
0661a3361   Kirill A. Shutemov   mm: remove rest u...
1688
  				 VM_HUGETLB | VM_MIXEDMAP))
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1689
  			return 0;		/* just ignore the advice */
cc2383ec0   Konstantin Khlebnikov   mm: introduce arc...
1690
1691
1692
1693
  #ifdef VM_SAO
  		if (*vm_flags & VM_SAO)
  			return 0;
  #endif
d952b7913   Hugh Dickins   ksm: fix endless ...
1694
1695
1696
1697
1698
  		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
  			err = __ksm_enter(mm);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1699
1700
1701
1702
1703
1704
1705
  
  		*vm_flags |= VM_MERGEABLE;
  		break;
  
  	case MADV_UNMERGEABLE:
  		if (!(*vm_flags & VM_MERGEABLE))
  			return 0;		/* just ignore the advice */
d952b7913   Hugh Dickins   ksm: fix endless ...
1706
1707
1708
1709
1710
  		if (vma->anon_vma) {
  			err = unmerge_ksm_pages(vma, start, end);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
  
  		*vm_flags &= ~VM_MERGEABLE;
  		break;
  	}
  
  	return 0;
  }
  
  int __ksm_enter(struct mm_struct *mm)
  {
6e1583842   Hugh Dickins   ksm: keep quiet w...
1721
1722
1723
1724
  	struct mm_slot *mm_slot;
  	int needs_wakeup;
  
  	mm_slot = alloc_mm_slot();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1725
1726
  	if (!mm_slot)
  		return -ENOMEM;
6e1583842   Hugh Dickins   ksm: keep quiet w...
1727
1728
  	/* Check ksm_run too?  Would need tighter locking */
  	needs_wakeup = list_empty(&ksm_mm_head.mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1729
1730
1731
  	spin_lock(&ksm_mmlist_lock);
  	insert_to_mm_slots_hash(mm, mm_slot);
  	/*
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1732
1733
  	 * When KSM_RUN_MERGE (or KSM_RUN_STOP),
  	 * insert just behind the scanning cursor, to let the area settle
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1734
1735
  	 * down a little; when fork is followed by immediate exec, we don't
  	 * want ksmd to waste time setting up and tearing down an rmap_list.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1736
1737
1738
1739
  	 *
  	 * But when KSM_RUN_UNMERGE, it's important to insert ahead of its
  	 * scanning cursor, otherwise KSM pages in newly forked mms will be
  	 * missed: then we might as well insert at the end of the list.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1740
  	 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1741
1742
1743
1744
  	if (ksm_run & KSM_RUN_UNMERGE)
  		list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
  	else
  		list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1745
  	spin_unlock(&ksm_mmlist_lock);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1746
  	set_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1747
  	atomic_inc(&mm->mm_count);
6e1583842   Hugh Dickins   ksm: keep quiet w...
1748
1749
1750
  
  	if (needs_wakeup)
  		wake_up_interruptible(&ksm_thread_wait);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1751
1752
  	return 0;
  }
1c2fb7a4c   Andrea Arcangeli   ksm: fix deadlock...
1753
  void __ksm_exit(struct mm_struct *mm)
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1754
  {
cd551f975   Hugh Dickins   ksm: distribute r...
1755
  	struct mm_slot *mm_slot;
9ba692948   Hugh Dickins   ksm: fix oom dead...
1756
  	int easy_to_free = 0;
cd551f975   Hugh Dickins   ksm: distribute r...
1757

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1758
  	/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
1759
1760
1761
1762
1763
1764
  	 * This process is exiting: if it's straightforward (as is the
  	 * case when ksmd was never running), free mm_slot immediately.
  	 * But if it's at the cursor or has rmap_items linked to it, use
  	 * mmap_sem to synchronize with any break_cows before pagetables
  	 * are freed, and leave the mm_slot on the list for ksmd to free.
  	 * Beware: ksm may already have noticed it exiting and freed the slot.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1765
  	 */
9ba692948   Hugh Dickins   ksm: fix oom dead...
1766

cd551f975   Hugh Dickins   ksm: distribute r...
1767
1768
  	spin_lock(&ksm_mmlist_lock);
  	mm_slot = get_mm_slot(mm);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1769
  	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
6514d511d   Hugh Dickins   ksm: singly-linke...
1770
  		if (!mm_slot->rmap_list) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
1771
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1772
1773
1774
1775
1776
1777
  			list_del(&mm_slot->mm_list);
  			easy_to_free = 1;
  		} else {
  			list_move(&mm_slot->mm_list,
  				  &ksm_scan.mm_slot->mm_list);
  		}
cd551f975   Hugh Dickins   ksm: distribute r...
1778
  	}
cd551f975   Hugh Dickins   ksm: distribute r...
1779
  	spin_unlock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1780
1781
1782
1783
1784
  	if (easy_to_free) {
  		free_mm_slot(mm_slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
  		mmdrop(mm);
  	} else if (mm_slot) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
1785
1786
  		down_write(&mm->mmap_sem);
  		up_write(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
1787
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1788
  }
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1789
  struct page *ksm_might_need_to_copy(struct page *page,
5ad646880   Hugh Dickins   ksm: let shared p...
1790
1791
  			struct vm_area_struct *vma, unsigned long address)
  {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1792
  	struct anon_vma *anon_vma = page_anon_vma(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1793
  	struct page *new_page;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
  	if (PageKsm(page)) {
  		if (page_stable_node(page) &&
  		    !(ksm_run & KSM_RUN_UNMERGE))
  			return page;	/* no need to copy it */
  	} else if (!anon_vma) {
  		return page;		/* no need to copy it */
  	} else if (anon_vma->root == vma->anon_vma->root &&
  		 page->index == linear_page_index(vma, address)) {
  		return page;		/* still no need to copy it */
  	}
  	if (!PageUptodate(page))
  		return page;		/* let do_swap_page report the error */
5ad646880   Hugh Dickins   ksm: let shared p...
1806
1807
1808
1809
1810
1811
  	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
  	if (new_page) {
  		copy_user_highpage(new_page, page, address, vma);
  
  		SetPageDirty(new_page);
  		__SetPageUptodate(new_page);
48c935ad8   Kirill A. Shutemov   page-flags: defin...
1812
  		__SetPageLocked(new_page);
5ad646880   Hugh Dickins   ksm: let shared p...
1813
  	}
5ad646880   Hugh Dickins   ksm: let shared p...
1814
1815
  	return new_page;
  }
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1816
  int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1817
1818
  {
  	struct stable_node *stable_node;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1819
1820
1821
  	struct rmap_item *rmap_item;
  	int ret = SWAP_AGAIN;
  	int search_new_forks = 0;
309381fea   Sasha Levin   mm: dump page whe...
1822
  	VM_BUG_ON_PAGE(!PageKsm(page), page);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
1823
1824
1825
1826
1827
  
  	/*
  	 * Rely on the page lock to protect against concurrent modifications
  	 * to that page's node of the stable tree.
  	 */
309381fea   Sasha Levin   mm: dump page whe...
1828
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1829
1830
1831
1832
1833
  
  	stable_node = page_stable_node(page);
  	if (!stable_node)
  		return ret;
  again:
b67bfe0d4   Sasha Levin   hlist: drop the n...
1834
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1835
  		struct anon_vma *anon_vma = rmap_item->anon_vma;
5beb49305   Rik van Riel   mm: change anon_v...
1836
  		struct anon_vma_chain *vmac;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1837
  		struct vm_area_struct *vma;
ad12695f1   Andrea Arcangeli   ksm: add cond_res...
1838
  		cond_resched();
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
1839
  		anon_vma_lock_read(anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
1840
1841
  		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
  					       0, ULONG_MAX) {
ad12695f1   Andrea Arcangeli   ksm: add cond_res...
1842
  			cond_resched();
5beb49305   Rik van Riel   mm: change anon_v...
1843
  			vma = vmac->vma;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
  			if (rmap_item->address < vma->vm_start ||
  			    rmap_item->address >= vma->vm_end)
  				continue;
  			/*
  			 * Initially we examine only the vma which covers this
  			 * rmap_item; but later, if there is still work to do,
  			 * we examine covering vmas in other mms: in case they
  			 * were forked from the original since ksmd passed.
  			 */
  			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
  				continue;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1855
1856
  			if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  				continue;
051ac83ad   Joonsoo Kim   mm/rmap: make rma...
1857
1858
  			ret = rwc->rmap_one(page, vma,
  					rmap_item->address, rwc->arg);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1859
  			if (ret != SWAP_AGAIN) {
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
1860
  				anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1861
1862
  				goto out;
  			}
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
1863
1864
1865
1866
  			if (rwc->done && rwc->done(page)) {
  				anon_vma_unlock_read(anon_vma);
  				goto out;
  			}
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1867
  		}
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
1868
  		anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1869
1870
1871
1872
1873
1874
  	}
  	if (!search_new_forks++)
  		goto again;
  out:
  	return ret;
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
1875
  #ifdef CONFIG_MIGRATION
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1876
1877
1878
  void ksm_migrate_page(struct page *newpage, struct page *oldpage)
  {
  	struct stable_node *stable_node;
309381fea   Sasha Levin   mm: dump page whe...
1879
1880
1881
  	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
  	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
  	VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1882
1883
1884
  
  	stable_node = page_stable_node(newpage);
  	if (stable_node) {
309381fea   Sasha Levin   mm: dump page whe...
1885
  		VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage);
62b61f611   Hugh Dickins   ksm: memory hotre...
1886
  		stable_node->kpfn = page_to_pfn(newpage);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1887
1888
1889
1890
1891
1892
1893
1894
  		/*
  		 * newpage->mapping was set in advance; now we need smp_wmb()
  		 * to make sure that the new stable_node->kpfn is visible
  		 * to get_ksm_page() before it can see that oldpage->mapping
  		 * has gone stale (or that PageSwapCache has been cleared).
  		 */
  		smp_wmb();
  		set_page_stable_node(oldpage, NULL);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
1895
1896
1897
  	}
  }
  #endif /* CONFIG_MIGRATION */
62b61f611   Hugh Dickins   ksm: memory hotre...
1898
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1899
1900
1901
1902
1903
  static void wait_while_offlining(void)
  {
  	while (ksm_run & KSM_RUN_OFFLINE) {
  		mutex_unlock(&ksm_thread_mutex);
  		wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
743162013   NeilBrown   sched: Remove pro...
1904
  			    TASK_UNINTERRUPTIBLE);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1905
1906
1907
  		mutex_lock(&ksm_thread_mutex);
  	}
  }
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1908
1909
  static void ksm_check_stable_tree(unsigned long start_pfn,
  				  unsigned long end_pfn)
62b61f611   Hugh Dickins   ksm: memory hotre...
1910
  {
036404183   Geliang Tang   mm/ksm.c: use lis...
1911
  	struct stable_node *stable_node, *next;
62b61f611   Hugh Dickins   ksm: memory hotre...
1912
  	struct rb_node *node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1913
  	int nid;
62b61f611   Hugh Dickins   ksm: memory hotre...
1914

ef53d16cd   Hugh Dickins   ksm: allocate roo...
1915
1916
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
  		node = rb_first(root_stable_tree + nid);
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1917
  		while (node) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1918
1919
  			stable_node = rb_entry(node, struct stable_node, node);
  			if (stable_node->kpfn >= start_pfn &&
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1920
1921
1922
1923
1924
1925
  			    stable_node->kpfn < end_pfn) {
  				/*
  				 * Don't get_ksm_page, page has already gone:
  				 * which is why we keep kpfn instead of page*
  				 */
  				remove_node_from_stable_tree(stable_node);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1926
  				node = rb_first(root_stable_tree + nid);
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1927
1928
1929
  			} else
  				node = rb_next(node);
  			cond_resched();
90bd6fd31   Petr Holasek   ksm: allow trees ...
1930
  		}
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1931
  	}
036404183   Geliang Tang   mm/ksm.c: use lis...
1932
  	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
1933
1934
1935
1936
1937
  		if (stable_node->kpfn >= start_pfn &&
  		    stable_node->kpfn < end_pfn)
  			remove_node_from_stable_tree(stable_node);
  		cond_resched();
  	}
62b61f611   Hugh Dickins   ksm: memory hotre...
1938
1939
1940
1941
1942
1943
  }
  
  static int ksm_memory_callback(struct notifier_block *self,
  			       unsigned long action, void *arg)
  {
  	struct memory_notify *mn = arg;
62b61f611   Hugh Dickins   ksm: memory hotre...
1944
1945
1946
1947
  
  	switch (action) {
  	case MEM_GOING_OFFLINE:
  		/*
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1948
1949
1950
1951
1952
  		 * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
  		 * and remove_all_stable_nodes() while memory is going offline:
  		 * it is unsafe for them to touch the stable tree at this time.
  		 * But unmerge_ksm_pages(), rmap lookups and other entry points
  		 * which do not need the ksm_thread_mutex are all safe.
62b61f611   Hugh Dickins   ksm: memory hotre...
1953
  		 */
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1954
1955
1956
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run |= KSM_RUN_OFFLINE;
  		mutex_unlock(&ksm_thread_mutex);
62b61f611   Hugh Dickins   ksm: memory hotre...
1957
1958
1959
1960
1961
1962
  		break;
  
  	case MEM_OFFLINE:
  		/*
  		 * Most of the work is done by page migration; but there might
  		 * be a few stable_nodes left over, still pointing to struct
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1963
1964
1965
  		 * pages which have been offlined: prune those from the tree,
  		 * otherwise get_ksm_page() might later try to access a
  		 * non-existent struct page.
62b61f611   Hugh Dickins   ksm: memory hotre...
1966
  		 */
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
1967
1968
  		ksm_check_stable_tree(mn->start_pfn,
  				      mn->start_pfn + mn->nr_pages);
62b61f611   Hugh Dickins   ksm: memory hotre...
1969
1970
1971
  		/* fallthrough */
  
  	case MEM_CANCEL_OFFLINE:
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1972
1973
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run &= ~KSM_RUN_OFFLINE;
62b61f611   Hugh Dickins   ksm: memory hotre...
1974
  		mutex_unlock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1975
1976
1977
  
  		smp_mb();	/* wake_up_bit advises this */
  		wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE));
62b61f611   Hugh Dickins   ksm: memory hotre...
1978
1979
1980
1981
  		break;
  	}
  	return NOTIFY_OK;
  }
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
1982
1983
1984
1985
  #else
  static void wait_while_offlining(void)
  {
  }
62b61f611   Hugh Dickins   ksm: memory hotre...
1986
  #endif /* CONFIG_MEMORY_HOTREMOVE */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
1987
1988
1989
1990
  #ifdef CONFIG_SYSFS
  /*
   * This all compiles without CONFIG_SYSFS, but is a waste of space.
   */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
  #define KSM_ATTR_RO(_name) \
  	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
  #define KSM_ATTR(_name) \
  	static struct kobj_attribute _name##_attr = \
  		__ATTR(_name, 0644, _name##_show, _name##_store)
  
  static ssize_t sleep_millisecs_show(struct kobject *kobj,
  				    struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_sleep_millisecs);
  }
  
  static ssize_t sleep_millisecs_store(struct kobject *kobj,
  				     struct kobj_attribute *attr,
  				     const char *buf, size_t count)
  {
  	unsigned long msecs;
  	int err;
3dbb95f78   Jingoo Han   mm: replace stric...
2010
  	err = kstrtoul(buf, 10, &msecs);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
  	if (err || msecs > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_sleep_millisecs = msecs;
  
  	return count;
  }
  KSM_ATTR(sleep_millisecs);
  
  static ssize_t pages_to_scan_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_pages_to_scan);
  }
  
  static ssize_t pages_to_scan_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long nr_pages;
3dbb95f78   Jingoo Han   mm: replace stric...
2033
  	err = kstrtoul(buf, 10, &nr_pages);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
  	if (err || nr_pages > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_pages_to_scan = nr_pages;
  
  	return count;
  }
  KSM_ATTR(pages_to_scan);
  
  static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
  			char *buf)
  {
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2046
2047
  	return sprintf(buf, "%lu
  ", ksm_run);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2048
2049
2050
2051
2052
2053
2054
  }
  
  static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
  			 const char *buf, size_t count)
  {
  	int err;
  	unsigned long flags;
3dbb95f78   Jingoo Han   mm: replace stric...
2055
  	err = kstrtoul(buf, 10, &flags);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2056
2057
2058
2059
2060
2061
2062
2063
  	if (err || flags > UINT_MAX)
  		return -EINVAL;
  	if (flags > KSM_RUN_UNMERGE)
  		return -EINVAL;
  
  	/*
  	 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
  	 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
d0f209f68   Hugh Dickins   ksm: remove unswa...
2064
2065
  	 * breaking COW to free the pages_shared (but leaves mm_slots
  	 * on the list for when ksmd may be set running again).
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2066
2067
2068
  	 */
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2069
  	wait_while_offlining();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2070
2071
  	if (ksm_run != flags) {
  		ksm_run = flags;
d952b7913   Hugh Dickins   ksm: fix endless ...
2072
  		if (flags & KSM_RUN_UNMERGE) {
e1e12d2f3   David Rientjes   mm, oom: fix race...
2073
  			set_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2074
  			err = unmerge_and_remove_all_rmap_items();
e1e12d2f3   David Rientjes   mm, oom: fix race...
2075
  			clear_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2076
2077
2078
2079
2080
  			if (err) {
  				ksm_run = KSM_RUN_STOP;
  				count = err;
  			}
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2081
2082
2083
2084
2085
2086
2087
2088
2089
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	if (flags & KSM_RUN_MERGE)
  		wake_up_interruptible(&ksm_thread_wait);
  
  	return count;
  }
  KSM_ATTR(run);
90bd6fd31   Petr Holasek   ksm: allow trees ...
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
  #ifdef CONFIG_NUMA
  static ssize_t merge_across_nodes_show(struct kobject *kobj,
  				struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_merge_across_nodes);
  }
  
  static ssize_t merge_across_nodes_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long knob;
  
  	err = kstrtoul(buf, 10, &knob);
  	if (err)
  		return err;
  	if (knob > 1)
  		return -EINVAL;
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2112
  	wait_while_offlining();
90bd6fd31   Petr Holasek   ksm: allow trees ...
2113
  	if (ksm_merge_across_nodes != knob) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2114
  		if (ksm_pages_shared || remove_all_stable_nodes())
90bd6fd31   Petr Holasek   ksm: allow trees ...
2115
  			err = -EBUSY;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2116
2117
2118
2119
2120
2121
2122
2123
2124
  		else if (root_stable_tree == one_stable_tree) {
  			struct rb_root *buf;
  			/*
  			 * This is the first time that we switch away from the
  			 * default of merging across nodes: must now allocate
  			 * a buffer to hold as many roots as may be needed.
  			 * Allocate stable and unstable together:
  			 * MAXSMP NODES_SHIFT 10 will use 16kB.
  			 */
bafe1e144   Joe Perches   ksm: remove redun...
2125
2126
  			buf = kcalloc(nr_node_ids + nr_node_ids, sizeof(*buf),
  				      GFP_KERNEL);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
  			/* Let us assume that RB_ROOT is NULL is zero */
  			if (!buf)
  				err = -ENOMEM;
  			else {
  				root_stable_tree = buf;
  				root_unstable_tree = buf + nr_node_ids;
  				/* Stable tree is empty but not the unstable */
  				root_unstable_tree[0] = one_unstable_tree[0];
  			}
  		}
  		if (!err) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
2138
  			ksm_merge_across_nodes = knob;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2139
2140
  			ksm_nr_node_ids = knob ? 1 : nr_node_ids;
  		}
90bd6fd31   Petr Holasek   ksm: allow trees ...
2141
2142
2143
2144
2145
2146
2147
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	return err ? err : count;
  }
  KSM_ATTR(merge_across_nodes);
  #endif
b40282603   Hugh Dickins   ksm: rename kerne...
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
  static ssize_t pages_shared_show(struct kobject *kobj,
  				 struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_shared);
  }
  KSM_ATTR_RO(pages_shared);
  
  static ssize_t pages_sharing_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
e178dfde3   Hugh Dickins   ksm: move pages_s...
2159
2160
  	return sprintf(buf, "%lu
  ", ksm_pages_sharing);
b40282603   Hugh Dickins   ksm: rename kerne...
2161
2162
  }
  KSM_ATTR_RO(pages_sharing);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
  static ssize_t pages_unshared_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_unshared);
  }
  KSM_ATTR_RO(pages_unshared);
  
  static ssize_t pages_volatile_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	long ksm_pages_volatile;
  
  	ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
  				- ksm_pages_sharing - ksm_pages_unshared;
  	/*
  	 * It was not worth any locking to calculate that statistic,
  	 * but it might therefore sometimes be negative: conceal that.
  	 */
  	if (ksm_pages_volatile < 0)
  		ksm_pages_volatile = 0;
  	return sprintf(buf, "%ld
  ", ksm_pages_volatile);
  }
  KSM_ATTR_RO(pages_volatile);
  
  static ssize_t full_scans_show(struct kobject *kobj,
  			       struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_scan.seqnr);
  }
  KSM_ATTR_RO(full_scans);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2196
2197
2198
2199
  static struct attribute *ksm_attrs[] = {
  	&sleep_millisecs_attr.attr,
  	&pages_to_scan_attr.attr,
  	&run_attr.attr,
b40282603   Hugh Dickins   ksm: rename kerne...
2200
2201
  	&pages_shared_attr.attr,
  	&pages_sharing_attr.attr,
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2202
2203
2204
  	&pages_unshared_attr.attr,
  	&pages_volatile_attr.attr,
  	&full_scans_attr.attr,
90bd6fd31   Petr Holasek   ksm: allow trees ...
2205
2206
2207
  #ifdef CONFIG_NUMA
  	&merge_across_nodes_attr.attr,
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2208
2209
2210
2211
2212
2213
2214
  	NULL,
  };
  
  static struct attribute_group ksm_attr_group = {
  	.attrs = ksm_attrs,
  	.name = "ksm",
  };
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2215
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2216
2217
2218
2219
2220
2221
2222
2223
2224
  
  static int __init ksm_init(void)
  {
  	struct task_struct *ksm_thread;
  	int err;
  
  	err = ksm_slab_init();
  	if (err)
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2225
2226
  	ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
  	if (IS_ERR(ksm_thread)) {
25acde317   Paul McQuade   mm: ksm use pr_er...
2227
2228
  		pr_err("ksm: creating kthread failed
  ");
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2229
  		err = PTR_ERR(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
2230
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2231
  	}
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2232
  #ifdef CONFIG_SYSFS
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2233
2234
  	err = sysfs_create_group(mm_kobj, &ksm_attr_group);
  	if (err) {
25acde317   Paul McQuade   mm: ksm use pr_er...
2235
2236
  		pr_err("ksm: register sysfs failed
  ");
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2237
  		kthread_stop(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
2238
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2239
  	}
c73602ad3   Hugh Dickins   ksm: more on defa...
2240
2241
  #else
  	ksm_run = KSM_RUN_MERGE;	/* no way for user to start it */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2242
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2243

62b61f611   Hugh Dickins   ksm: memory hotre...
2244
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2245
  	/* There is no significance to this priority 100 */
62b61f611   Hugh Dickins   ksm: memory hotre...
2246
2247
  	hotplug_memory_notifier(ksm_memory_callback, 100);
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2248
  	return 0;
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
2249
  out_free:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2250
2251
2252
  	ksm_slab_free();
  out:
  	return err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2253
  }
a64fb3cd6   Paul Gortmaker   mm: audit/fix non...
2254
  subsys_initcall(ksm_init);