Blame view

mm/ksm.c 87 KB
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2
3
4
5
6
   * Memory merging support.
   *
   * This code enables dynamic sharing of identical pages found in different
   * memory areas, even if they are not shared by fork()
   *
36b2528dc   Izik Eidus   ksm: change copyr...
7
   * Copyright (C) 2008-2009 Red Hat, Inc.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
8
9
10
11
   * Authors:
   *	Izik Eidus
   *	Andrea Arcangeli
   *	Chris Wright
36b2528dc   Izik Eidus   ksm: change copyr...
12
   *	Hugh Dickins
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
13
14
   *
   * This work is licensed under the terms of the GNU GPL, version 2.
f8af4da3b   Hugh Dickins   ksm: the mm inter...
15
16
17
   */
  
  #include <linux/errno.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
18
19
  #include <linux/mm.h>
  #include <linux/fs.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
20
  #include <linux/mman.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
21
  #include <linux/sched.h>
6e84f3152   Ingo Molnar   sched/headers: Pr...
22
  #include <linux/sched/mm.h>
f7ccbae45   Ingo Molnar   sched/headers: Pr...
23
  #include <linux/sched/coredump.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
24
25
26
27
28
29
30
31
32
33
  #include <linux/rwsem.h>
  #include <linux/pagemap.h>
  #include <linux/rmap.h>
  #include <linux/spinlock.h>
  #include <linux/jhash.h>
  #include <linux/delay.h>
  #include <linux/kthread.h>
  #include <linux/wait.h>
  #include <linux/slab.h>
  #include <linux/rbtree.h>
62b61f611   Hugh Dickins   ksm: memory hotre...
34
  #include <linux/memory.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
35
  #include <linux/mmu_notifier.h>
2c6854fda   Izik Eidus   ksm: change defau...
36
  #include <linux/swap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
37
  #include <linux/ksm.h>
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
38
  #include <linux/hashtable.h>
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
39
  #include <linux/freezer.h>
72788c385   David Rientjes   oom: replace PF_O...
40
  #include <linux/oom.h>
90bd6fd31   Petr Holasek   ksm: allow trees ...
41
  #include <linux/numa.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
42

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
43
  #include <asm/tlbflush.h>
73848b468   Hugh Dickins   ksm: fix mlockfre...
44
  #include "internal.h"
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
45

e850dcf53   Hugh Dickins   ksm: trivial tidyups
46
47
48
49
50
51
52
  #ifdef CONFIG_NUMA
  #define NUMA(x)		(x)
  #define DO_NUMA(x)	do { (x); } while (0)
  #else
  #define NUMA(x)		(0)
  #define DO_NUMA(x)	do { } while (0)
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  /*
   * A few notes about the KSM scanning process,
   * to make it easier to understand the data structures below:
   *
   * In order to reduce excessive scanning, KSM sorts the memory pages by their
   * contents into a data structure that holds pointers to the pages' locations.
   *
   * Since the contents of the pages may change at any moment, KSM cannot just
   * insert the pages into a normal sorted tree and expect it to find anything.
   * Therefore KSM uses two data structures - the stable and the unstable tree.
   *
   * The stable tree holds pointers to all the merged pages (ksm pages), sorted
   * by their contents.  Because each such page is write-protected, searching on
   * this tree is fully assured to be working (except when pages are unmapped),
   * and therefore this tree is called the stable tree.
   *
   * In addition to the stable tree, KSM uses a second data structure called the
   * unstable tree: this tree holds pointers to pages which have been found to
   * be "unchanged for a period of time".  The unstable tree sorts these pages
   * by their contents, but since they are not write-protected, KSM cannot rely
   * upon the unstable tree to work correctly - the unstable tree is liable to
   * be corrupted as its contents are modified, and so it is called unstable.
   *
   * KSM solves this problem by several techniques:
   *
   * 1) The unstable tree is flushed every time KSM completes scanning all
   *    memory areas, and then the tree is rebuilt again from the beginning.
   * 2) KSM will only insert into the unstable tree, pages whose hash value
   *    has not changed since the previous scan of all memory areas.
   * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
   *    colors of the nodes and not on their contents, assuring that even when
   *    the tree gets "corrupted" it won't get out of balance, so scanning time
   *    remains the same (also, searching and inserting nodes in an rbtree uses
   *    the same algorithm, so we have no overhead when we flush and rebuild).
   * 4) KSM never flushes the stable tree, which means that even if it were to
   *    take 10 attempts to find a page in the unstable tree, once it is found,
   *    it is secured in the stable tree.  (When we scan a new page, we first
   *    compare it against the stable tree, and then against the unstable tree.)
8fdb3dbf0   Hugh Dickins   ksm: add some com...
91
92
93
   *
   * If the merge_across_nodes tunable is unset, then KSM maintains multiple
   * stable trees and multiple unstable trees: one of each for each NUMA node.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
94
95
96
97
98
99
   */
  
  /**
   * struct mm_slot - ksm information per mm that is being scanned
   * @link: link to the mm_slots hash list
   * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
6514d511d   Hugh Dickins   ksm: singly-linke...
100
   * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
101
102
103
104
105
   * @mm: the mm that this information is valid for
   */
  struct mm_slot {
  	struct hlist_node link;
  	struct list_head mm_list;
6514d511d   Hugh Dickins   ksm: singly-linke...
106
  	struct rmap_item *rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
107
108
109
110
111
112
113
  	struct mm_struct *mm;
  };
  
  /**
   * struct ksm_scan - cursor for scanning
   * @mm_slot: the current mm_slot we are scanning
   * @address: the next address inside that to be scanned
6514d511d   Hugh Dickins   ksm: singly-linke...
114
   * @rmap_list: link to the next rmap to be scanned in the rmap_list
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
115
116
117
118
119
120
121
   * @seqnr: count of completed full scans (needed when removing unstable node)
   *
   * There is only the one ksm_scan instance of this cursor structure.
   */
  struct ksm_scan {
  	struct mm_slot *mm_slot;
  	unsigned long address;
6514d511d   Hugh Dickins   ksm: singly-linke...
122
  	struct rmap_item **rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
123
124
125
126
  	unsigned long seqnr;
  };
  
  /**
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
127
128
   * struct stable_node - node of the stable rbtree
   * @node: rb node of this ksm page in the stable tree
4146d2d67   Hugh Dickins   ksm: make !merge_...
129
   * @head: (overlaying parent) &migrate_nodes indicates temporarily on that list
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
130
   * @hlist_dup: linked into the stable_node->hlist with a stable_node chain
4146d2d67   Hugh Dickins   ksm: make !merge_...
131
   * @list: linked into migrate_nodes, pending placement in the proper node tree
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
132
   * @hlist: hlist head of rmap_items using this ksm page
4146d2d67   Hugh Dickins   ksm: make !merge_...
133
   * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
134
135
   * @chain_prune_time: time of the last full garbage collection
   * @rmap_hlist_len: number of rmap_item entries in hlist or STABLE_NODE_CHAIN
4146d2d67   Hugh Dickins   ksm: make !merge_...
136
   * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
137
138
   */
  struct stable_node {
4146d2d67   Hugh Dickins   ksm: make !merge_...
139
140
141
142
  	union {
  		struct rb_node node;	/* when node of stable tree */
  		struct {		/* when listed for migration */
  			struct list_head *head;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
143
144
145
146
  			struct {
  				struct hlist_node hlist_dup;
  				struct list_head list;
  			};
4146d2d67   Hugh Dickins   ksm: make !merge_...
147
148
  		};
  	};
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
149
  	struct hlist_head hlist;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
150
151
152
153
154
155
156
157
158
159
160
  	union {
  		unsigned long kpfn;
  		unsigned long chain_prune_time;
  	};
  	/*
  	 * STABLE_NODE_CHAIN can be any negative number in
  	 * rmap_hlist_len negative range, but better not -1 to be able
  	 * to reliably detect underflows.
  	 */
  #define STABLE_NODE_CHAIN -1024
  	int rmap_hlist_len;
4146d2d67   Hugh Dickins   ksm: make !merge_...
161
162
163
  #ifdef CONFIG_NUMA
  	int nid;
  #endif
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
164
165
166
  };
  
  /**
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
167
   * struct rmap_item - reverse mapping item for virtual addresses
6514d511d   Hugh Dickins   ksm: singly-linke...
168
   * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
db114b83a   Hugh Dickins   ksm: hold anon_vm...
169
   * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
170
   * @nid: NUMA node id of unstable tree in which linked (may not match page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
171
172
173
   * @mm: the memory structure this rmap_item is pointing into
   * @address: the virtual address this rmap_item tracks (+ flags in low bits)
   * @oldchecksum: previous checksum of the page at that virtual address
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
174
175
176
   * @node: rb node of this rmap_item in the unstable tree
   * @head: pointer to stable_node heading this list in the stable tree
   * @hlist: link into hlist of rmap_items hanging off that stable_node
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
177
178
   */
  struct rmap_item {
6514d511d   Hugh Dickins   ksm: singly-linke...
179
  	struct rmap_item *rmap_list;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
180
181
182
183
184
185
  	union {
  		struct anon_vma *anon_vma;	/* when stable */
  #ifdef CONFIG_NUMA
  		int nid;		/* when node of unstable tree */
  #endif
  	};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
186
187
  	struct mm_struct *mm;
  	unsigned long address;		/* + low bits used for flags below */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
188
  	unsigned int oldchecksum;	/* when unstable */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
189
  	union {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
190
191
192
193
194
  		struct rb_node node;	/* when node of unstable tree */
  		struct {		/* when listed from stable tree */
  			struct stable_node *head;
  			struct hlist_node hlist;
  		};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
195
196
197
198
  	};
  };
  
  #define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
199
200
  #define UNSTABLE_FLAG	0x100	/* is a node of the unstable tree */
  #define STABLE_FLAG	0x200	/* is listed from the stable tree */
6f2302848   Jia He   mm/ksm.c: ignore ...
201
202
  #define KSM_FLAG_MASK	(SEQNR_MASK|UNSTABLE_FLAG|STABLE_FLAG)
  				/* to mask all the flags */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
203
204
  
  /* The stable and unstable tree heads */
ef53d16cd   Hugh Dickins   ksm: allocate roo...
205
206
207
208
  static struct rb_root one_stable_tree[1] = { RB_ROOT };
  static struct rb_root one_unstable_tree[1] = { RB_ROOT };
  static struct rb_root *root_stable_tree = one_stable_tree;
  static struct rb_root *root_unstable_tree = one_unstable_tree;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
209

4146d2d67   Hugh Dickins   ksm: make !merge_...
210
211
  /* Recently migrated nodes of stable tree, pending proper placement */
  static LIST_HEAD(migrate_nodes);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
212
  #define STABLE_NODE_DUP_HEAD ((struct list_head *)&migrate_nodes.prev)
4146d2d67   Hugh Dickins   ksm: make !merge_...
213

4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
214
215
  #define MM_SLOTS_HASH_BITS 10
  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
216
217
218
219
220
221
222
223
224
  
  static struct mm_slot ksm_mm_head = {
  	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
  };
  static struct ksm_scan ksm_scan = {
  	.mm_slot = &ksm_mm_head,
  };
  
  static struct kmem_cache *rmap_item_cache;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
225
  static struct kmem_cache *stable_node_cache;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
226
227
228
  static struct kmem_cache *mm_slot_cache;
  
  /* The number of nodes in the stable tree */
b40282603   Hugh Dickins   ksm: rename kerne...
229
  static unsigned long ksm_pages_shared;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
230

e178dfde3   Hugh Dickins   ksm: move pages_s...
231
  /* The number of page slots additionally sharing those nodes */
b40282603   Hugh Dickins   ksm: rename kerne...
232
  static unsigned long ksm_pages_sharing;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
233

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
234
235
236
237
238
  /* The number of nodes in the unstable tree */
  static unsigned long ksm_pages_unshared;
  
  /* The number of rmap_items in use: to calculate pages_volatile */
  static unsigned long ksm_rmap_items;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
239
240
241
242
243
244
245
246
247
248
249
  /* The number of stable_node chains */
  static unsigned long ksm_stable_node_chains;
  
  /* The number of stable_node dups linked to the stable_node chains */
  static unsigned long ksm_stable_node_dups;
  
  /* Delay in pruning stale stable_node_dups in the stable_node_chains */
  static int ksm_stable_node_chains_prune_millisecs = 2000;
  
  /* Maximum number of page slots sharing a stable node */
  static int ksm_max_page_sharing = 256;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
250
  /* Number of pages ksmd should scan in one batch */
2c6854fda   Izik Eidus   ksm: change defau...
251
  static unsigned int ksm_thread_pages_to_scan = 100;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
252
253
  
  /* Milliseconds ksmd should sleep between batches */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
254
  static unsigned int ksm_thread_sleep_millisecs = 20;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
255

e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
256
257
258
259
260
  /* Checksum of an empty (zeroed) page */
  static unsigned int zero_checksum __read_mostly;
  
  /* Whether to merge empty (zeroed) pages with actual zero pages */
  static bool ksm_use_zero_pages __read_mostly;
e850dcf53   Hugh Dickins   ksm: trivial tidyups
261
  #ifdef CONFIG_NUMA
90bd6fd31   Petr Holasek   ksm: allow trees ...
262
263
  /* Zeroed when merging across nodes is not allowed */
  static unsigned int ksm_merge_across_nodes = 1;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
264
  static int ksm_nr_node_ids = 1;
e850dcf53   Hugh Dickins   ksm: trivial tidyups
265
266
  #else
  #define ksm_merge_across_nodes	1U
ef53d16cd   Hugh Dickins   ksm: allocate roo...
267
  #define ksm_nr_node_ids		1
e850dcf53   Hugh Dickins   ksm: trivial tidyups
268
  #endif
90bd6fd31   Petr Holasek   ksm: allow trees ...
269

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
270
271
272
  #define KSM_RUN_STOP	0
  #define KSM_RUN_MERGE	1
  #define KSM_RUN_UNMERGE	2
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
273
274
275
  #define KSM_RUN_OFFLINE	4
  static unsigned long ksm_run = KSM_RUN_STOP;
  static void wait_while_offlining(void);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
276
277
278
279
280
281
282
283
284
285
286
287
288
289
  
  static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
  static DEFINE_MUTEX(ksm_thread_mutex);
  static DEFINE_SPINLOCK(ksm_mmlist_lock);
  
  #define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
  		sizeof(struct __struct), __alignof__(struct __struct),\
  		(__flags), NULL)
  
  static int __init ksm_slab_init(void)
  {
  	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
  	if (!rmap_item_cache)
  		goto out;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
290
291
292
  	stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
  	if (!stable_node_cache)
  		goto out_free1;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
293
294
  	mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
  	if (!mm_slot_cache)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
295
  		goto out_free2;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
296
297
  
  	return 0;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
298
299
300
  out_free2:
  	kmem_cache_destroy(stable_node_cache);
  out_free1:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
301
302
303
304
305
306
307
308
  	kmem_cache_destroy(rmap_item_cache);
  out:
  	return -ENOMEM;
  }
  
  static void __init ksm_slab_free(void)
  {
  	kmem_cache_destroy(mm_slot_cache);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
309
  	kmem_cache_destroy(stable_node_cache);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
310
311
312
  	kmem_cache_destroy(rmap_item_cache);
  	mm_slot_cache = NULL;
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
  static __always_inline bool is_stable_node_chain(struct stable_node *chain)
  {
  	return chain->rmap_hlist_len == STABLE_NODE_CHAIN;
  }
  
  static __always_inline bool is_stable_node_dup(struct stable_node *dup)
  {
  	return dup->head == STABLE_NODE_DUP_HEAD;
  }
  
  static inline void stable_node_chain_add_dup(struct stable_node *dup,
  					     struct stable_node *chain)
  {
  	VM_BUG_ON(is_stable_node_dup(dup));
  	dup->head = STABLE_NODE_DUP_HEAD;
  	VM_BUG_ON(!is_stable_node_chain(chain));
  	hlist_add_head(&dup->hlist_dup, &chain->hlist);
  	ksm_stable_node_dups++;
  }
  
  static inline void __stable_node_dup_del(struct stable_node *dup)
  {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
335
  	VM_BUG_ON(!is_stable_node_dup(dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
  	hlist_del(&dup->hlist_dup);
  	ksm_stable_node_dups--;
  }
  
  static inline void stable_node_dup_del(struct stable_node *dup)
  {
  	VM_BUG_ON(is_stable_node_chain(dup));
  	if (is_stable_node_dup(dup))
  		__stable_node_dup_del(dup);
  	else
  		rb_erase(&dup->node, root_stable_tree + NUMA(dup->nid));
  #ifdef CONFIG_DEBUG_VM
  	dup->head = NULL;
  #endif
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
351
352
  static inline struct rmap_item *alloc_rmap_item(void)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
353
  	struct rmap_item *rmap_item;
5b398e416   zhong jiang   mm,ksm: fix endle...
354
355
  	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
  						__GFP_NORETRY | __GFP_NOWARN);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
356
357
358
  	if (rmap_item)
  		ksm_rmap_items++;
  	return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
359
360
361
362
  }
  
  static inline void free_rmap_item(struct rmap_item *rmap_item)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
363
  	ksm_rmap_items--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
364
365
366
  	rmap_item->mm = NULL;	/* debug safety */
  	kmem_cache_free(rmap_item_cache, rmap_item);
  }
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
367
368
  static inline struct stable_node *alloc_stable_node(void)
  {
6213055f2   zhong jiang   mm,ksm: add __GFP...
369
370
371
372
373
374
  	/*
  	 * The allocation can take too long with GFP_KERNEL when memory is under
  	 * pressure, which may lead to hung task warnings.  Adding __GFP_HIGH
  	 * grants access to memory reserves, helping to avoid this problem.
  	 */
  	return kmem_cache_alloc(stable_node_cache, GFP_KERNEL | __GFP_HIGH);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
375
376
377
378
  }
  
  static inline void free_stable_node(struct stable_node *stable_node)
  {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
379
380
  	VM_BUG_ON(stable_node->rmap_hlist_len &&
  		  !is_stable_node_chain(stable_node));
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
381
382
  	kmem_cache_free(stable_node_cache, stable_node);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
383
384
385
386
387
388
389
390
391
392
393
  static inline struct mm_slot *alloc_mm_slot(void)
  {
  	if (!mm_slot_cache)	/* initialization failed */
  		return NULL;
  	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
  }
  
  static inline void free_mm_slot(struct mm_slot *mm_slot)
  {
  	kmem_cache_free(mm_slot_cache, mm_slot);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
394
395
  static struct mm_slot *get_mm_slot(struct mm_struct *mm)
  {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
396
  	struct mm_slot *slot;
b67bfe0d4   Sasha Levin   hlist: drop the n...
397
  	hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm)
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
398
399
  		if (slot->mm == mm)
  			return slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
400

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
401
402
403
404
405
406
  	return NULL;
  }
  
  static void insert_to_mm_slots_hash(struct mm_struct *mm,
  				    struct mm_slot *mm_slot)
  {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
407
  	mm_slot->mm = mm;
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
408
  	hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
409
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
410
  /*
a913e182a   Hugh Dickins   ksm: clean up obs...
411
412
413
414
415
416
417
418
419
420
421
422
423
   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
   * page tables after it has passed through ksm_exit() - which, if necessary,
   * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
   * a special flag: they can just back out as soon as mm_users goes to zero.
   * ksm_test_exit() is used throughout to make this test for exit: in some
   * places for correctness, in some places just to avoid unnecessary work.
   */
  static inline bool ksm_test_exit(struct mm_struct *mm)
  {
  	return atomic_read(&mm->mm_users) == 0;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
424
425
   * We use break_ksm to break COW on a ksm page: it's a stripped down
   *
d4edcf0d5   Dave Hansen   mm/gup: Switch al...
426
   *	if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
427
428
429
430
431
432
   *		put_page(page);
   *
   * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
   * in case the application has unmapped and remapped mm,addr meanwhile.
   * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
   * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
1b2ee1266   Dave Hansen   mm/core: Do not e...
433
434
435
436
   *
   * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context
   * of the process that owns 'vma'.  We also do not want to enforce
   * protection keys here anyway.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
437
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
438
  static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
439
440
  {
  	struct page *page;
d952b7913   Hugh Dickins   ksm: fix endless ...
441
  	int ret = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
442
443
444
  
  	do {
  		cond_resched();
1b2ee1266   Dave Hansen   mm/core: Do not e...
445
446
  		page = follow_page(vma, addr,
  				FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
22eccdd7d   Dan Carpenter   ksm: check for ER...
447
  		if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
448
449
  			break;
  		if (PageKsm(page))
dcddffd41   Kirill A. Shutemov   mm: do not pass m...
450
451
  			ret = handle_mm_fault(vma, addr,
  					FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
452
453
454
  		else
  			ret = VM_FAULT_WRITE;
  		put_page(page);
33692f275   Linus Torvalds   vm: add VM_FAULT_...
455
  	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
d952b7913   Hugh Dickins   ksm: fix endless ...
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
  	/*
  	 * We must loop because handle_mm_fault() may back out if there's
  	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
  	 *
  	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
  	 * COW has been broken, even if the vma does not permit VM_WRITE;
  	 * but note that a concurrent fault might break PageKsm for us.
  	 *
  	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
  	 * backing file, which also invalidates anonymous pages: that's
  	 * okay, that truncation will have unmapped the PageKsm for us.
  	 *
  	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
  	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
  	 * current task has TIF_MEMDIE set, and will be OOM killed on return
  	 * to user; and ksmd, having no mm, would never be chosen for that.
  	 *
  	 * But if the mm is in a limited mem_cgroup, then the fault may fail
  	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
  	 * even ksmd can fail in this way - though it's usually breaking ksm
  	 * just to undo a merge it made a moment before, so unlikely to oom.
  	 *
  	 * That's a pity: we might therefore have more kernel pages allocated
  	 * than we're counting as nodes in the stable tree; but ksm_do_scan
  	 * will retry to break_cow on each pass, so should recover the page
  	 * in due course.  The important thing is to not let VM_MERGEABLE
  	 * be cleared while any such pages might remain in the area.
  	 */
  	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
485
  }
ef6942224   Bob Liu   ksm: cleanup: int...
486
487
488
489
490
491
492
493
494
495
496
497
498
  static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
  		unsigned long addr)
  {
  	struct vm_area_struct *vma;
  	if (ksm_test_exit(mm))
  		return NULL;
  	vma = find_vma(mm, addr);
  	if (!vma || vma->vm_start > addr)
  		return NULL;
  	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  		return NULL;
  	return vma;
  }
8dd3557a5   Hugh Dickins   ksm: cleanup some...
499
  static void break_cow(struct rmap_item *rmap_item)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
500
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
501
502
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
503
  	struct vm_area_struct *vma;
4035c07a8   Hugh Dickins   ksm: take keyhole...
504
505
506
507
  	/*
  	 * It is not an accident that whenever we want to break COW
  	 * to undo, we also need to drop a reference to the anon_vma.
  	 */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
508
  	put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
509

81464e306   Hugh Dickins   ksm: five little ...
510
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
511
512
513
  	vma = find_mergeable_vma(mm, addr);
  	if (vma)
  		break_ksm(vma, addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
514
515
516
517
518
519
520
521
522
523
524
  	up_read(&mm->mmap_sem);
  }
  
  static struct page *get_mergeable_page(struct rmap_item *rmap_item)
  {
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
  	struct vm_area_struct *vma;
  	struct page *page;
  
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
525
526
  	vma = find_mergeable_vma(mm, addr);
  	if (!vma)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
527
528
529
  		goto out;
  
  	page = follow_page(vma, addr, FOLL_GET);
22eccdd7d   Dan Carpenter   ksm: check for ER...
530
  	if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
531
  		goto out;
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
532
  	if (PageAnon(page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
533
534
535
536
  		flush_anon_page(vma, page, addr);
  		flush_dcache_page(page);
  	} else {
  		put_page(page);
c8f95ed1a   Andrea Arcangeli   ksm: unstable_tre...
537
538
  out:
  		page = NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
539
540
541
542
  	}
  	up_read(&mm->mmap_sem);
  	return page;
  }
90bd6fd31   Petr Holasek   ksm: allow trees ...
543
544
545
546
547
548
549
550
  /*
   * This helper is used for getting right index into array of tree roots.
   * When merge_across_nodes knob is set to 1, there are only two rb-trees for
   * stable and unstable pages from all nodes with roots in index 0. Otherwise,
   * every node has its own stable and unstable tree.
   */
  static inline int get_kpfn_nid(unsigned long kpfn)
  {
d8fc16a82   Hugh Dickins   ksm: fix m68k bui...
551
  	return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn));
90bd6fd31   Petr Holasek   ksm: allow trees ...
552
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
  static struct stable_node *alloc_stable_node_chain(struct stable_node *dup,
  						   struct rb_root *root)
  {
  	struct stable_node *chain = alloc_stable_node();
  	VM_BUG_ON(is_stable_node_chain(dup));
  	if (likely(chain)) {
  		INIT_HLIST_HEAD(&chain->hlist);
  		chain->chain_prune_time = jiffies;
  		chain->rmap_hlist_len = STABLE_NODE_CHAIN;
  #if defined (CONFIG_DEBUG_VM) && defined(CONFIG_NUMA)
  		chain->nid = -1; /* debug */
  #endif
  		ksm_stable_node_chains++;
  
  		/*
  		 * Put the stable node chain in the first dimension of
  		 * the stable tree and at the same time remove the old
  		 * stable node.
  		 */
  		rb_replace_node(&dup->node, &chain->node, root);
  
  		/*
  		 * Move the old stable node to the second dimension
  		 * queued in the hlist_dup. The invariant is that all
  		 * dup stable_nodes in the chain->hlist point to pages
  		 * that are wrprotected and have the exact same
  		 * content.
  		 */
  		stable_node_chain_add_dup(dup, chain);
  	}
  	return chain;
  }
  
  static inline void free_stable_node_chain(struct stable_node *chain,
  					  struct rb_root *root)
  {
  	rb_erase(&chain->node, root);
  	free_stable_node(chain);
  	ksm_stable_node_chains--;
  }
4035c07a8   Hugh Dickins   ksm: take keyhole...
593
594
595
  static void remove_node_from_stable_tree(struct stable_node *stable_node)
  {
  	struct rmap_item *rmap_item;
4035c07a8   Hugh Dickins   ksm: take keyhole...
596

2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
597
598
  	/* check it's not STABLE_NODE_CHAIN or negative */
  	BUG_ON(stable_node->rmap_hlist_len < 0);
b67bfe0d4   Sasha Levin   hlist: drop the n...
599
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
600
601
602
603
  		if (rmap_item->hlist.next)
  			ksm_pages_sharing--;
  		else
  			ksm_pages_shared--;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
604
605
  		VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
  		stable_node->rmap_hlist_len--;
9e60109f1   Peter Zijlstra   mm: rename drop_a...
606
  		put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
607
608
609
  		rmap_item->address &= PAGE_MASK;
  		cond_resched();
  	}
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
610
611
612
613
614
615
616
617
618
619
620
  	/*
  	 * We need the second aligned pointer of the migrate_nodes
  	 * list_head to stay clear from the rb_parent_color union
  	 * (aligned and different than any node) and also different
  	 * from &migrate_nodes. This will verify that future list.h changes
  	 * don't break STABLE_NODE_DUP_HEAD.
  	 */
  #if GCC_VERSION >= 40903 /* only recent gcc can handle it */
  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
  #endif
4146d2d67   Hugh Dickins   ksm: make !merge_...
621
622
623
  	if (stable_node->head == &migrate_nodes)
  		list_del(&stable_node->list);
  	else
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
624
  		stable_node_dup_del(stable_node);
4035c07a8   Hugh Dickins   ksm: take keyhole...
625
626
627
628
629
630
631
632
633
  	free_stable_node(stable_node);
  }
  
  /*
   * get_ksm_page: checks if the page indicated by the stable node
   * is still its ksm page, despite having held no reference to it.
   * In which case we can trust the content of the page, and it
   * returns the gotten page; but if the page has now been zapped,
   * remove the stale node from the stable tree and return NULL.
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
634
   * But beware, the stable node's page might be being migrated.
4035c07a8   Hugh Dickins   ksm: take keyhole...
635
636
637
638
639
640
641
642
643
644
   *
   * You would expect the stable_node to hold a reference to the ksm page.
   * But if it increments the page's count, swapping out has to wait for
   * ksmd to come around again before it can free the page, which may take
   * seconds or even minutes: much too unresponsive.  So instead we use a
   * "keyhole reference": access to the ksm page from the stable node peeps
   * out through its keyhole to see if that page still holds the right key,
   * pointing back to this stable node.  This relies on freeing a PageAnon
   * page to reset its page->mapping to NULL, and relies on no other use of
   * a page to put something that might look like our key in page->mapping.
4035c07a8   Hugh Dickins   ksm: take keyhole...
645
646
   * is on its way to being freed; but it is an anomaly to bear in mind.
   */
8fdb3dbf0   Hugh Dickins   ksm: add some com...
647
  static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
4035c07a8   Hugh Dickins   ksm: take keyhole...
648
649
650
  {
  	struct page *page;
  	void *expected_mapping;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
651
  	unsigned long kpfn;
4035c07a8   Hugh Dickins   ksm: take keyhole...
652

bda807d44   Minchan Kim   mm: migrate: supp...
653
654
  	expected_mapping = (void *)((unsigned long)stable_node |
  					PAGE_MAPPING_KSM);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
655
  again:
4db0c3c29   Jason Low   mm: remove rest o...
656
  	kpfn = READ_ONCE(stable_node->kpfn);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
657
658
659
660
661
662
663
664
  	page = pfn_to_page(kpfn);
  
  	/*
  	 * page is computed from kpfn, so on most architectures reading
  	 * page->mapping is naturally ordered after reading node->kpfn,
  	 * but on Alpha we need to be more careful.
  	 */
  	smp_read_barrier_depends();
4db0c3c29   Jason Low   mm: remove rest o...
665
  	if (READ_ONCE(page->mapping) != expected_mapping)
4035c07a8   Hugh Dickins   ksm: take keyhole...
666
  		goto stale;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
  
  	/*
  	 * We cannot do anything with the page while its refcount is 0.
  	 * Usually 0 means free, or tail of a higher-order page: in which
  	 * case this node is no longer referenced, and should be freed;
  	 * however, it might mean that the page is under page_freeze_refs().
  	 * The __remove_mapping() case is easy, again the node is now stale;
  	 * but if page is swapcache in migrate_page_move_mapping(), it might
  	 * still be our page, in which case it's essential to keep the node.
  	 */
  	while (!get_page_unless_zero(page)) {
  		/*
  		 * Another check for page->mapping != expected_mapping would
  		 * work here too.  We have chosen the !PageSwapCache test to
  		 * optimize the common case, when the page is or is about to
  		 * be freed: PageSwapCache is cleared (under spin_lock_irq)
  		 * in the freeze_refs section of __remove_mapping(); but Anon
  		 * page->mapping reset to NULL later, in free_pages_prepare().
  		 */
  		if (!PageSwapCache(page))
  			goto stale;
  		cpu_relax();
  	}
4db0c3c29   Jason Low   mm: remove rest o...
690
  	if (READ_ONCE(page->mapping) != expected_mapping) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
691
692
693
  		put_page(page);
  		goto stale;
  	}
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
694

8fdb3dbf0   Hugh Dickins   ksm: add some com...
695
  	if (lock_it) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
696
  		lock_page(page);
4db0c3c29   Jason Low   mm: remove rest o...
697
  		if (READ_ONCE(page->mapping) != expected_mapping) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
698
699
700
701
702
  			unlock_page(page);
  			put_page(page);
  			goto stale;
  		}
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
703
  	return page;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
704

4035c07a8   Hugh Dickins   ksm: take keyhole...
705
  stale:
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
706
707
708
709
710
711
712
  	/*
  	 * We come here from above when page->mapping or !PageSwapCache
  	 * suggests that the node is stale; but it might be under migration.
  	 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
  	 * before checking whether node->kpfn has been changed.
  	 */
  	smp_rmb();
4db0c3c29   Jason Low   mm: remove rest o...
713
  	if (READ_ONCE(stable_node->kpfn) != kpfn)
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
714
  		goto again;
4035c07a8   Hugh Dickins   ksm: take keyhole...
715
716
717
  	remove_node_from_stable_tree(stable_node);
  	return NULL;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
718
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
719
720
721
722
723
   * Removing rmap_item from stable or unstable tree.
   * This function will clean the information from the stable/unstable tree.
   */
  static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
  {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
724
725
  	if (rmap_item->address & STABLE_FLAG) {
  		struct stable_node *stable_node;
5ad646880   Hugh Dickins   ksm: let shared p...
726
  		struct page *page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
727

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
728
  		stable_node = rmap_item->head;
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
729
  		page = get_ksm_page(stable_node, true);
4035c07a8   Hugh Dickins   ksm: take keyhole...
730
731
  		if (!page)
  			goto out;
5ad646880   Hugh Dickins   ksm: let shared p...
732

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
733
  		hlist_del(&rmap_item->hlist);
4035c07a8   Hugh Dickins   ksm: take keyhole...
734
735
  		unlock_page(page);
  		put_page(page);
08beca44d   Hugh Dickins   ksm: stable_node ...
736

98666f8a2   Andrea Arcangeli   ksm: use the help...
737
  		if (!hlist_empty(&stable_node->hlist))
4035c07a8   Hugh Dickins   ksm: take keyhole...
738
739
  			ksm_pages_sharing--;
  		else
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
740
  			ksm_pages_shared--;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
741
742
  		VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
  		stable_node->rmap_hlist_len--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
743

9e60109f1   Peter Zijlstra   mm: rename drop_a...
744
  		put_anon_vma(rmap_item->anon_vma);
93d17715a   Hugh Dickins   ksm: three remove...
745
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
746

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
747
  	} else if (rmap_item->address & UNSTABLE_FLAG) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
748
749
  		unsigned char age;
  		/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
750
  		 * Usually ksmd can and must skip the rb_erase, because
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
751
  		 * root_unstable_tree was already reset to RB_ROOT.
9ba692948   Hugh Dickins   ksm: fix oom dead...
752
753
754
  		 * But be careful when an mm is exiting: do the rb_erase
  		 * if this rmap_item was inserted by this scan, rather
  		 * than left over from before.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
755
756
  		 */
  		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
cd551f975   Hugh Dickins   ksm: distribute r...
757
  		BUG_ON(age > 1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
758
  		if (!age)
90bd6fd31   Petr Holasek   ksm: allow trees ...
759
  			rb_erase(&rmap_item->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
760
  				 root_unstable_tree + NUMA(rmap_item->nid));
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
761
  		ksm_pages_unshared--;
93d17715a   Hugh Dickins   ksm: three remove...
762
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
763
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
764
  out:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
765
766
  	cond_resched();		/* we're called from many long loops */
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
767
  static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
768
  				       struct rmap_item **rmap_list)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
769
  {
6514d511d   Hugh Dickins   ksm: singly-linke...
770
771
772
  	while (*rmap_list) {
  		struct rmap_item *rmap_item = *rmap_list;
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
773
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
774
775
776
777
778
  		free_rmap_item(rmap_item);
  	}
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
779
   * Though it's very tempting to unmerge rmap_items from stable tree rather
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
780
781
782
783
784
   * than check every pte of a given vma, the locking doesn't quite work for
   * that - an rmap_item is assigned to the stable tree after inserting ksm
   * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
   * rmap_items from parent to child at fork time (so as not to waste time
   * if exit comes before the next scan reaches it).
81464e306   Hugh Dickins   ksm: five little ...
785
786
787
788
789
   *
   * Similarly, although we'd like to remove rmap_items (so updating counts
   * and freeing memory) when unmerging an area, it's easier to leave that
   * to the next pass of ksmd - consider, for example, how ksmd might be
   * in cmp_and_merge_page on one of the rmap_items we would be removing.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
790
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
791
792
  static int unmerge_ksm_pages(struct vm_area_struct *vma,
  			     unsigned long start, unsigned long end)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
793
794
  {
  	unsigned long addr;
d952b7913   Hugh Dickins   ksm: fix endless ...
795
  	int err = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
796

d952b7913   Hugh Dickins   ksm: fix endless ...
797
  	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
798
799
  		if (ksm_test_exit(vma->vm_mm))
  			break;
d952b7913   Hugh Dickins   ksm: fix endless ...
800
801
802
803
804
805
  		if (signal_pending(current))
  			err = -ERESTARTSYS;
  		else
  			err = break_ksm(vma, addr);
  	}
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
806
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
807
808
809
810
  #ifdef CONFIG_SYSFS
  /*
   * Only called through the sysfs control interface:
   */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
811
812
813
814
815
816
817
818
819
820
821
822
  static int remove_stable_node(struct stable_node *stable_node)
  {
  	struct page *page;
  	int err;
  
  	page = get_ksm_page(stable_node, true);
  	if (!page) {
  		/*
  		 * get_ksm_page did remove_node_from_stable_tree itself.
  		 */
  		return 0;
  	}
8fdb3dbf0   Hugh Dickins   ksm: add some com...
823
824
825
826
827
  	if (WARN_ON_ONCE(page_mapped(page))) {
  		/*
  		 * This should not happen: but if it does, just refuse to let
  		 * merge_across_nodes be switched - there is no need to panic.
  		 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
828
  		err = -EBUSY;
8fdb3dbf0   Hugh Dickins   ksm: add some com...
829
  	} else {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
830
  		/*
8fdb3dbf0   Hugh Dickins   ksm: add some com...
831
832
833
  		 * The stable node did not yet appear stale to get_ksm_page(),
  		 * since that allows for an unmapped ksm page to be recognized
  		 * right up until it is freed; but the node is safe to remove.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
834
835
836
837
838
839
840
841
842
843
844
845
846
  		 * This page might be in a pagevec waiting to be freed,
  		 * or it might be PageSwapCache (perhaps under writeback),
  		 * or it might have been removed from swapcache a moment ago.
  		 */
  		set_page_stable_node(page, NULL);
  		remove_node_from_stable_tree(stable_node);
  		err = 0;
  	}
  
  	unlock_page(page);
  	put_page(page);
  	return err;
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
  static int remove_stable_node_chain(struct stable_node *stable_node,
  				    struct rb_root *root)
  {
  	struct stable_node *dup;
  	struct hlist_node *hlist_safe;
  
  	if (!is_stable_node_chain(stable_node)) {
  		VM_BUG_ON(is_stable_node_dup(stable_node));
  		if (remove_stable_node(stable_node))
  			return true;
  		else
  			return false;
  	}
  
  	hlist_for_each_entry_safe(dup, hlist_safe,
  				  &stable_node->hlist, hlist_dup) {
  		VM_BUG_ON(!is_stable_node_dup(dup));
  		if (remove_stable_node(dup))
  			return true;
  	}
  	BUG_ON(!hlist_empty(&stable_node->hlist));
  	free_stable_node_chain(stable_node, root);
  	return false;
  }
cbf86cfe0   Hugh Dickins   ksm: remove old s...
871
872
  static int remove_all_stable_nodes(void)
  {
036404183   Geliang Tang   mm/ksm.c: use lis...
873
  	struct stable_node *stable_node, *next;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
874
875
  	int nid;
  	int err = 0;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
876
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
877
878
879
  		while (root_stable_tree[nid].rb_node) {
  			stable_node = rb_entry(root_stable_tree[nid].rb_node,
  						struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
880
881
  			if (remove_stable_node_chain(stable_node,
  						     root_stable_tree + nid)) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
882
883
884
885
886
887
  				err = -EBUSY;
  				break;	/* proceed to next nid */
  			}
  			cond_resched();
  		}
  	}
036404183   Geliang Tang   mm/ksm.c: use lis...
888
  	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
889
890
891
892
  		if (remove_stable_node(stable_node))
  			err = -EBUSY;
  		cond_resched();
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
893
894
  	return err;
  }
d952b7913   Hugh Dickins   ksm: fix endless ...
895
  static int unmerge_and_remove_all_rmap_items(void)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
896
897
898
899
  {
  	struct mm_slot *mm_slot;
  	struct mm_struct *mm;
  	struct vm_area_struct *vma;
d952b7913   Hugh Dickins   ksm: fix endless ...
900
901
902
  	int err = 0;
  
  	spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
903
  	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
904
905
  						struct mm_slot, mm_list);
  	spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
906

9ba692948   Hugh Dickins   ksm: fix oom dead...
907
908
  	for (mm_slot = ksm_scan.mm_slot;
  			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
909
910
911
  		mm = mm_slot->mm;
  		down_read(&mm->mmap_sem);
  		for (vma = mm->mmap; vma; vma = vma->vm_next) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
912
913
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
914
915
  			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  				continue;
d952b7913   Hugh Dickins   ksm: fix endless ...
916
917
  			err = unmerge_ksm_pages(vma,
  						vma->vm_start, vma->vm_end);
9ba692948   Hugh Dickins   ksm: fix oom dead...
918
919
  			if (err)
  				goto error;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
920
  		}
9ba692948   Hugh Dickins   ksm: fix oom dead...
921

6514d511d   Hugh Dickins   ksm: singly-linke...
922
  		remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
7496fea9a   Zhou Chengming   ksm: fix conflict...
923
  		up_read(&mm->mmap_sem);
d952b7913   Hugh Dickins   ksm: fix endless ...
924
925
  
  		spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
926
  		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
927
  						struct mm_slot, mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
928
  		if (ksm_test_exit(mm)) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
929
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
930
931
932
933
934
  			list_del(&mm_slot->mm_list);
  			spin_unlock(&ksm_mmlist_lock);
  
  			free_mm_slot(mm_slot);
  			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
935
  			mmdrop(mm);
7496fea9a   Zhou Chengming   ksm: fix conflict...
936
  		} else
9ba692948   Hugh Dickins   ksm: fix oom dead...
937
  			spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
938
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
939
940
  	/* Clean up stable nodes, but don't worry if some are still busy */
  	remove_all_stable_nodes();
d952b7913   Hugh Dickins   ksm: fix endless ...
941
  	ksm_scan.seqnr = 0;
9ba692948   Hugh Dickins   ksm: fix oom dead...
942
943
944
945
  	return 0;
  
  error:
  	up_read(&mm->mmap_sem);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
946
  	spin_lock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
947
  	ksm_scan.mm_slot = &ksm_mm_head;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
948
  	spin_unlock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
949
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
950
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
951
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
952

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
953
954
955
  static u32 calc_checksum(struct page *page)
  {
  	u32 checksum;
9b04c5fec   Cong Wang   mm: remove the se...
956
  	void *addr = kmap_atomic(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
957
  	checksum = jhash2(addr, PAGE_SIZE / 4, 17);
9b04c5fec   Cong Wang   mm: remove the se...
958
  	kunmap_atomic(addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
959
960
961
962
963
964
965
  	return checksum;
  }
  
  static int memcmp_pages(struct page *page1, struct page *page2)
  {
  	char *addr1, *addr2;
  	int ret;
9b04c5fec   Cong Wang   mm: remove the se...
966
967
  	addr1 = kmap_atomic(page1);
  	addr2 = kmap_atomic(page2);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
968
  	ret = memcmp(addr1, addr2, PAGE_SIZE);
9b04c5fec   Cong Wang   mm: remove the se...
969
970
  	kunmap_atomic(addr2);
  	kunmap_atomic(addr1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
971
972
973
974
975
976
977
978
979
980
981
982
  	return ret;
  }
  
  static inline int pages_identical(struct page *page1, struct page *page2)
  {
  	return !memcmp_pages(page1, page2);
  }
  
  static int write_protect_page(struct vm_area_struct *vma, struct page *page,
  			      pte_t *orig_pte)
  {
  	struct mm_struct *mm = vma->vm_mm;
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
983
984
985
986
  	struct page_vma_mapped_walk pvmw = {
  		.page = page,
  		.vma = vma,
  	};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
987
988
  	int swapped;
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
989
990
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
991

36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
992
993
  	pvmw.address = page_address_in_vma(page, vma);
  	if (pvmw.address == -EFAULT)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
994
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
995
  	BUG_ON(PageTransCompound(page));
6bdb913f0   Haggai Eran   mm: wrap calls to...
996

36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
997
998
  	mmun_start = pvmw.address;
  	mmun_end   = pvmw.address + PAGE_SIZE;
6bdb913f0   Haggai Eran   mm: wrap calls to...
999
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1000
  	if (!page_vma_mapped_walk(&pvmw))
6bdb913f0   Haggai Eran   mm: wrap calls to...
1001
  		goto out_mn;
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1002
1003
  	if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
  		goto out_unlock;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1004

595cd8f25   Aneesh Kumar K.V   mm/ksm: handle pr...
1005
  	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
b3a81d084   Minchan Kim   mm: fix KSM data ...
1006
1007
  	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) ||
  						mm_tlb_flush_pending(mm)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1008
1009
1010
  		pte_t entry;
  
  		swapped = PageSwapCache(page);
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1011
  		flush_cache_page(vma, pvmw.address, page_to_pfn(page));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1012
  		/*
25985edce   Lucas De Marchi   Fix common misspe...
1013
  		 * Ok this is tricky, when get_user_pages_fast() run it doesn't
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1014
1015
1016
1017
1018
1019
1020
  		 * take any lock, therefore the check that we are going to make
  		 * with the pagecount against the mapcount is racey and
  		 * O_DIRECT can happen right after the check.
  		 * So we clear the pte and flush the tlb before the check
  		 * this assure us that no O_DIRECT can happen after the check
  		 * or in the middle of the check.
  		 */
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1021
  		entry = ptep_clear_flush_notify(vma, pvmw.address, pvmw.pte);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1022
1023
1024
1025
  		/*
  		 * Check that no O_DIRECT or similar I/O is in progress on the
  		 * page
  		 */
31e855ea7   Hugh Dickins   ksm: remove redun...
1026
  		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1027
  			set_pte_at(mm, pvmw.address, pvmw.pte, entry);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1028
1029
  			goto out_unlock;
  		}
4e31635c3   Hugh Dickins   ksm: fix bad user...
1030
1031
  		if (pte_dirty(entry))
  			set_page_dirty(page);
595cd8f25   Aneesh Kumar K.V   mm/ksm: handle pr...
1032
1033
1034
1035
1036
  
  		if (pte_protnone(entry))
  			entry = pte_mkclean(pte_clear_savedwrite(entry));
  		else
  			entry = pte_mkclean(pte_wrprotect(entry));
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1037
  		set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1038
  	}
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1039
  	*orig_pte = *pvmw.pte;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1040
1041
1042
  	err = 0;
  
  out_unlock:
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1043
  	page_vma_mapped_walk_done(&pvmw);
6bdb913f0   Haggai Eran   mm: wrap calls to...
1044
1045
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1046
1047
1048
1049
1050
1051
  out:
  	return err;
  }
  
  /**
   * replace_page - replace page in vma by new ksm page
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1052
1053
1054
   * @vma:      vma that holds the pte pointing to page
   * @page:     the page we are replacing by kpage
   * @kpage:    the ksm page we replace page by
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1055
1056
1057
1058
   * @orig_pte: the original value of the pte
   *
   * Returns 0 on success, -EFAULT on failure.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1059
1060
  static int replace_page(struct vm_area_struct *vma, struct page *page,
  			struct page *kpage, pte_t orig_pte)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1061
1062
  {
  	struct mm_struct *mm = vma->vm_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1063
1064
  	pmd_t *pmd;
  	pte_t *ptep;
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1065
  	pte_t newpte;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1066
1067
  	spinlock_t *ptl;
  	unsigned long addr;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1068
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
1069
1070
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1071

8dd3557a5   Hugh Dickins   ksm: cleanup some...
1072
  	addr = page_address_in_vma(page, vma);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1073
1074
  	if (addr == -EFAULT)
  		goto out;
6219049ae   Bob Liu   mm: introduce mm_...
1075
1076
  	pmd = mm_find_pmd(mm, addr);
  	if (!pmd)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1077
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1078

6bdb913f0   Haggai Eran   mm: wrap calls to...
1079
1080
1081
  	mmun_start = addr;
  	mmun_end   = addr + PAGE_SIZE;
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1082
1083
1084
  	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
  	if (!pte_same(*ptep, orig_pte)) {
  		pte_unmap_unlock(ptep, ptl);
6bdb913f0   Haggai Eran   mm: wrap calls to...
1085
  		goto out_mn;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1086
  	}
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
  	/*
  	 * No need to check ksm_use_zero_pages here: we can only have a
  	 * zero_page here if ksm_use_zero_pages was enabled alreaady.
  	 */
  	if (!is_zero_pfn(page_to_pfn(kpage))) {
  		get_page(kpage);
  		page_add_anon_rmap(kpage, vma, addr, false);
  		newpte = mk_pte(kpage, vma->vm_page_prot);
  	} else {
  		newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
  					       vma->vm_page_prot));
e84e6914c   Claudio Imbrenda   mm/ksm.c: fix inc...
1098
1099
1100
1101
1102
1103
1104
  		/*
  		 * We're replacing an anonymous page with a zero page, which is
  		 * not anonymous. We need to do proper accounting otherwise we
  		 * will get wrong values in /proc, and a BUG message in dmesg
  		 * when tearing down the mm.
  		 */
  		dec_mm_counter(mm, MM_ANONPAGES);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1105
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1106
1107
  
  	flush_cache_page(vma, addr, pte_pfn(*ptep));
34ee645e8   Joerg Roedel   mmu_notifier: cal...
1108
  	ptep_clear_flush_notify(vma, addr, ptep);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1109
  	set_pte_at_notify(mm, addr, ptep, newpte);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1110

d281ee614   Kirill A. Shutemov   rmap: add argumen...
1111
  	page_remove_rmap(page, false);
ae52a2adb   Hugh Dickins   thp: ksm: free sw...
1112
1113
  	if (!page_mapped(page))
  		try_to_free_swap(page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1114
  	put_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1115
1116
1117
  
  	pte_unmap_unlock(ptep, ptl);
  	err = 0;
6bdb913f0   Haggai Eran   mm: wrap calls to...
1118
1119
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1120
1121
1122
1123
1124
1125
  out:
  	return err;
  }
  
  /*
   * try_to_merge_one_page - take two pages and merge them into one
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1126
1127
   * @vma: the vma that holds the pte pointing to page
   * @page: the PageAnon page that we want to replace with kpage
80e148226   Hugh Dickins   ksm: share anon p...
1128
1129
   * @kpage: the PageKsm page that we want to map instead of page,
   *         or NULL the first time when we want to use page as kpage.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1130
1131
1132
1133
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
   */
  static int try_to_merge_one_page(struct vm_area_struct *vma,
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1134
  				 struct page *page, struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1135
1136
1137
  {
  	pte_t orig_pte = __pte(0);
  	int err = -EFAULT;
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1138
1139
  	if (page == kpage)			/* ksm page forked */
  		return 0;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1140
  	if (!PageAnon(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1141
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1142
1143
1144
1145
1146
1147
1148
  	/*
  	 * We need the page lock to read a stable PageSwapCache in
  	 * write_protect_page().  We use trylock_page() instead of
  	 * lock_page() because we don't want to wait here - we
  	 * prefer to continue scanning and merging different pages,
  	 * then come back to this page when it is unlocked.
  	 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1149
  	if (!trylock_page(page))
31e855ea7   Hugh Dickins   ksm: remove redun...
1150
  		goto out;
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1151
1152
  
  	if (PageTransCompound(page)) {
a7306c343   Andrea Arcangeli   ksm: prevent cras...
1153
  		if (split_huge_page(page))
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1154
1155
  			goto out_unlock;
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1156
1157
1158
1159
1160
1161
  	/*
  	 * If this anonymous page is mapped only here, its pte may need
  	 * to be write-protected.  If it's mapped elsewhere, all of its
  	 * ptes are necessarily already write-protected.  But in either
  	 * case, we need to lock and check page_count is not raised.
  	 */
80e148226   Hugh Dickins   ksm: share anon p...
1162
1163
1164
1165
1166
1167
1168
1169
1170
  	if (write_protect_page(vma, page, &orig_pte) == 0) {
  		if (!kpage) {
  			/*
  			 * While we hold page lock, upgrade page from
  			 * PageAnon+anon_vma to PageKsm+NULL stable_node:
  			 * stable_tree_insert() will update stable_node.
  			 */
  			set_page_stable_node(page, NULL);
  			mark_page_accessed(page);
337ed7eb5   Minchan Kim   mm/ksm.c: mark st...
1171
1172
1173
1174
1175
1176
  			/*
  			 * Page reclaim just frees a clean page with no dirty
  			 * ptes: make sure that the ksm page would be swapped.
  			 */
  			if (!PageDirty(page))
  				SetPageDirty(page);
80e148226   Hugh Dickins   ksm: share anon p...
1177
1178
1179
1180
  			err = 0;
  		} else if (pages_identical(page, kpage))
  			err = replace_page(vma, page, kpage, orig_pte);
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1181

80e148226   Hugh Dickins   ksm: share anon p...
1182
  	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
73848b468   Hugh Dickins   ksm: fix mlockfre...
1183
  		munlock_vma_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1184
1185
  		if (!PageMlocked(kpage)) {
  			unlock_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1186
1187
1188
1189
1190
  			lock_page(kpage);
  			mlock_vma_page(kpage);
  			page = kpage;		/* for final unlock */
  		}
  	}
73848b468   Hugh Dickins   ksm: fix mlockfre...
1191

f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1192
  out_unlock:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1193
  	unlock_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1194
1195
1196
1197
1198
  out:
  	return err;
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1199
1200
   * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
   * but no new kernel page is allocated: kpage must already be a ksm page.
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1201
1202
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
81464e306   Hugh Dickins   ksm: five little ...
1203
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1204
1205
  static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
  				      struct page *page, struct page *kpage)
81464e306   Hugh Dickins   ksm: five little ...
1206
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1207
  	struct mm_struct *mm = rmap_item->mm;
81464e306   Hugh Dickins   ksm: five little ...
1208
1209
  	struct vm_area_struct *vma;
  	int err = -EFAULT;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1210
  	down_read(&mm->mmap_sem);
85c6e8dd2   Andrea Arcangeli   ksm: use find_mer...
1211
1212
  	vma = find_mergeable_vma(mm, rmap_item->address);
  	if (!vma)
81464e306   Hugh Dickins   ksm: five little ...
1213
  		goto out;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1214
  	err = try_to_merge_one_page(vma, page, kpage);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1215
1216
  	if (err)
  		goto out;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
1217
1218
  	/* Unstable nid is in union with stable anon_vma: remove first */
  	remove_rmap_item_from_tree(rmap_item);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1219
  	/* Must get reference to anon_vma while still holding mmap_sem */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1220
1221
  	rmap_item->anon_vma = vma->anon_vma;
  	get_anon_vma(vma->anon_vma);
81464e306   Hugh Dickins   ksm: five little ...
1222
  out:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1223
  	up_read(&mm->mmap_sem);
81464e306   Hugh Dickins   ksm: five little ...
1224
1225
1226
1227
  	return err;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1228
1229
1230
   * try_to_merge_two_pages - take two identical pages and prepare them
   * to be merged into one page.
   *
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1231
1232
   * This function returns the kpage if we successfully merged two identical
   * pages into one ksm page, NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1233
   *
80e148226   Hugh Dickins   ksm: share anon p...
1234
   * Note that this function upgrades page to ksm page: if one of the pages
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1235
1236
   * is already a ksm page, try_to_merge_with_ksm_page should be used.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1237
1238
1239
1240
  static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
  					   struct page *page,
  					   struct rmap_item *tree_rmap_item,
  					   struct page *tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1241
  {
80e148226   Hugh Dickins   ksm: share anon p...
1242
  	int err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1243

80e148226   Hugh Dickins   ksm: share anon p...
1244
  	err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1245
  	if (!err) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1246
  		err = try_to_merge_with_ksm_page(tree_rmap_item,
80e148226   Hugh Dickins   ksm: share anon p...
1247
  							tree_page, page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1248
  		/*
81464e306   Hugh Dickins   ksm: five little ...
1249
1250
  		 * If that fails, we have a ksm page with only one pte
  		 * pointing to it: so break it.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1251
  		 */
4035c07a8   Hugh Dickins   ksm: take keyhole...
1252
  		if (err)
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1253
  			break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1254
  	}
80e148226   Hugh Dickins   ksm: share anon p...
1255
  	return err ? NULL : page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1256
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
  static __always_inline
  bool __is_page_sharing_candidate(struct stable_node *stable_node, int offset)
  {
  	VM_BUG_ON(stable_node->rmap_hlist_len < 0);
  	/*
  	 * Check that at least one mapping still exists, otherwise
  	 * there's no much point to merge and share with this
  	 * stable_node, as the underlying tree_page of the other
  	 * sharer is going to be freed soon.
  	 */
  	return stable_node->rmap_hlist_len &&
  		stable_node->rmap_hlist_len + offset < ksm_max_page_sharing;
  }
  
  static __always_inline
  bool is_page_sharing_candidate(struct stable_node *stable_node)
  {
  	return __is_page_sharing_candidate(stable_node, 0);
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1276
1277
1278
1279
  struct page *stable_node_dup(struct stable_node **_stable_node_dup,
  			     struct stable_node **_stable_node,
  			     struct rb_root *root,
  			     bool prune_stale_stable_nodes)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1280
  {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1281
  	struct stable_node *dup, *found = NULL, *stable_node = *_stable_node;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1282
  	struct hlist_node *hlist_safe;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1283
  	struct page *_tree_page, *tree_page = NULL;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
  	int nr = 0;
  	int found_rmap_hlist_len;
  
  	if (!prune_stale_stable_nodes ||
  	    time_before(jiffies, stable_node->chain_prune_time +
  			msecs_to_jiffies(
  				ksm_stable_node_chains_prune_millisecs)))
  		prune_stale_stable_nodes = false;
  	else
  		stable_node->chain_prune_time = jiffies;
  
  	hlist_for_each_entry_safe(dup, hlist_safe,
  				  &stable_node->hlist, hlist_dup) {
  		cond_resched();
  		/*
  		 * We must walk all stable_node_dup to prune the stale
  		 * stable nodes during lookup.
  		 *
  		 * get_ksm_page can drop the nodes from the
  		 * stable_node->hlist if they point to freed pages
  		 * (that's why we do a _safe walk). The "dup"
  		 * stable_node parameter itself will be freed from
  		 * under us if it returns NULL.
  		 */
  		_tree_page = get_ksm_page(dup, false);
  		if (!_tree_page)
  			continue;
  		nr += 1;
  		if (is_page_sharing_candidate(dup)) {
  			if (!found ||
  			    dup->rmap_hlist_len > found_rmap_hlist_len) {
  				if (found)
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1316
  					put_page(tree_page);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1317
1318
  				found = dup;
  				found_rmap_hlist_len = found->rmap_hlist_len;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1319
  				tree_page = _tree_page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1320

8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1321
  				/* skip put_page for found dup */
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1322
1323
  				if (!prune_stale_stable_nodes)
  					break;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1324
1325
1326
1327
1328
  				continue;
  			}
  		}
  		put_page(_tree_page);
  	}
80b18dfa5   Andrea Arcangeli   ksm: optimize ref...
1329
1330
1331
1332
1333
1334
1335
1336
  	if (found) {
  		/*
  		 * nr is counting all dups in the chain only if
  		 * prune_stale_stable_nodes is true, otherwise we may
  		 * break the loop at nr == 1 even if there are
  		 * multiple entries.
  		 */
  		if (prune_stale_stable_nodes && nr == 1) {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
  			/*
  			 * If there's not just one entry it would
  			 * corrupt memory, better BUG_ON. In KSM
  			 * context with no lock held it's not even
  			 * fatal.
  			 */
  			BUG_ON(stable_node->hlist.first->next);
  
  			/*
  			 * There's just one entry and it is below the
  			 * deduplication limit so drop the chain.
  			 */
  			rb_replace_node(&stable_node->node, &found->node,
  					root);
  			free_stable_node(stable_node);
  			ksm_stable_node_chains--;
  			ksm_stable_node_dups--;
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1354
  			/*
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1355
1356
1357
  			 * NOTE: the caller depends on the stable_node
  			 * to be equal to stable_node_dup if the chain
  			 * was collapsed.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1358
  			 */
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1359
1360
1361
1362
1363
1364
1365
1366
  			*_stable_node = found;
  			/*
  			 * Just for robustneess as stable_node is
  			 * otherwise left as a stable pointer, the
  			 * compiler shall optimize it away at build
  			 * time.
  			 */
  			stable_node = NULL;
80b18dfa5   Andrea Arcangeli   ksm: optimize ref...
1367
1368
  		} else if (stable_node->hlist.first != &found->hlist_dup &&
  			   __is_page_sharing_candidate(found, 1)) {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1369
  			/*
80b18dfa5   Andrea Arcangeli   ksm: optimize ref...
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
  			 * If the found stable_node dup can accept one
  			 * more future merge (in addition to the one
  			 * that is underway) and is not at the head of
  			 * the chain, put it there so next search will
  			 * be quicker in the !prune_stale_stable_nodes
  			 * case.
  			 *
  			 * NOTE: it would be inaccurate to use nr > 1
  			 * instead of checking the hlist.first pointer
  			 * directly, because in the
  			 * prune_stale_stable_nodes case "nr" isn't
  			 * the position of the found dup in the chain,
  			 * but the total number of dups in the chain.
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1383
1384
1385
1386
1387
1388
  			 */
  			hlist_del(&found->hlist_dup);
  			hlist_add_head(&found->hlist_dup,
  				       &stable_node->hlist);
  		}
  	}
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1389
1390
  	*_stable_node_dup = found;
  	return tree_page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
  }
  
  static struct stable_node *stable_node_dup_any(struct stable_node *stable_node,
  					       struct rb_root *root)
  {
  	if (!is_stable_node_chain(stable_node))
  		return stable_node;
  	if (hlist_empty(&stable_node->hlist)) {
  		free_stable_node_chain(stable_node, root);
  		return NULL;
  	}
  	return hlist_entry(stable_node->hlist.first,
  			   typeof(*stable_node), hlist_dup);
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
  /*
   * Like for get_ksm_page, this function can free the *_stable_node and
   * *_stable_node_dup if the returned tree_page is NULL.
   *
   * It can also free and overwrite *_stable_node with the found
   * stable_node_dup if the chain is collapsed (in which case
   * *_stable_node will be equal to *_stable_node_dup like if the chain
   * never existed). It's up to the caller to verify tree_page is not
   * NULL before dereferencing *_stable_node or *_stable_node_dup.
   *
   * *_stable_node_dup is really a second output parameter of this
   * function and will be overwritten in all cases, the caller doesn't
   * need to initialize it.
   */
  static struct page *__stable_node_chain(struct stable_node **_stable_node_dup,
  					struct stable_node **_stable_node,
  					struct rb_root *root,
  					bool prune_stale_stable_nodes)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1423
  {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1424
  	struct stable_node *stable_node = *_stable_node;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1425
1426
  	if (!is_stable_node_chain(stable_node)) {
  		if (is_page_sharing_candidate(stable_node)) {
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1427
1428
  			*_stable_node_dup = stable_node;
  			return get_ksm_page(stable_node, false);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1429
  		}
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1430
1431
1432
1433
1434
  		/*
  		 * _stable_node_dup set to NULL means the stable_node
  		 * reached the ksm_max_page_sharing limit.
  		 */
  		*_stable_node_dup = NULL;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1435
1436
  		return NULL;
  	}
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1437
  	return stable_node_dup(_stable_node_dup, _stable_node, root,
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1438
1439
  			       prune_stale_stable_nodes);
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1440
1441
1442
  static __always_inline struct page *chain_prune(struct stable_node **s_n_d,
  						struct stable_node **s_n,
  						struct rb_root *root)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1443
  {
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1444
  	return __stable_node_chain(s_n_d, s_n, root, true);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1445
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1446
1447
1448
  static __always_inline struct page *chain(struct stable_node **s_n_d,
  					  struct stable_node *s_n,
  					  struct rb_root *root)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1449
  {
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1450
1451
1452
1453
1454
1455
1456
  	struct stable_node *old_stable_node = s_n;
  	struct page *tree_page;
  
  	tree_page = __stable_node_chain(s_n_d, &s_n, root, false);
  	/* not pruning dups so s_n cannot have changed */
  	VM_BUG_ON(s_n != old_stable_node);
  	return tree_page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1457
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1458
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1459
   * stable_tree_search - search for page inside the stable tree
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1460
1461
1462
1463
   *
   * This function checks if there is a page inside the stable tree
   * with identical content to the page that we are scanning right now.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1464
   * This function returns the stable tree node of identical content if found,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1465
1466
   * NULL otherwise.
   */
62b61f611   Hugh Dickins   ksm: memory hotre...
1467
  static struct page *stable_tree_search(struct page *page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1468
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1469
  	int nid;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1470
  	struct rb_root *root;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1471
1472
  	struct rb_node **new;
  	struct rb_node *parent;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1473
  	struct stable_node *stable_node, *stable_node_dup, *stable_node_any;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1474
  	struct stable_node *page_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1475

4146d2d67   Hugh Dickins   ksm: make !merge_...
1476
1477
1478
  	page_node = page_stable_node(page);
  	if (page_node && page_node->head != &migrate_nodes) {
  		/* ksm page forked */
08beca44d   Hugh Dickins   ksm: stable_node ...
1479
  		get_page(page);
62b61f611   Hugh Dickins   ksm: memory hotre...
1480
  		return page;
08beca44d   Hugh Dickins   ksm: stable_node ...
1481
  	}
90bd6fd31   Petr Holasek   ksm: allow trees ...
1482
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1483
  	root = root_stable_tree + nid;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1484
  again:
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1485
  	new = &root->rb_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1486
  	parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1487

4146d2d67   Hugh Dickins   ksm: make !merge_...
1488
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1489
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1490
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1491
  		cond_resched();
4146d2d67   Hugh Dickins   ksm: make !merge_...
1492
  		stable_node = rb_entry(*new, struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1493
  		stable_node_any = NULL;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1494
  		tree_page = chain_prune(&stable_node_dup, &stable_node,	root);
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1495
1496
1497
1498
1499
1500
  		/*
  		 * NOTE: stable_node may have been freed by
  		 * chain_prune() if the returned stable_node_dup is
  		 * not NULL. stable_node_dup may have been inserted in
  		 * the rbtree instead as a regular stable_node (in
  		 * order to collapse the stable_node chain if a single
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1501
1502
1503
1504
1505
  		 * stable_node dup was found in it). In such case the
  		 * stable_node is overwritten by the calleee to point
  		 * to the stable_node_dup that was collapsed in the
  		 * stable rbtree and stable_node will be equal to
  		 * stable_node_dup like if the chain never existed.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1506
  		 */
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
  		if (!stable_node_dup) {
  			/*
  			 * Either all stable_node dups were full in
  			 * this stable_node chain, or this chain was
  			 * empty and should be rb_erased.
  			 */
  			stable_node_any = stable_node_dup_any(stable_node,
  							      root);
  			if (!stable_node_any) {
  				/* rb_erase just run */
  				goto again;
  			}
  			/*
  			 * Take any of the stable_node dups page of
  			 * this stable_node chain to let the tree walk
  			 * continue. All KSM pages belonging to the
  			 * stable_node dups in a stable_node chain
  			 * have the same content and they're
  			 * wrprotected at all times. Any will work
  			 * fine to continue the walk.
  			 */
  			tree_page = get_ksm_page(stable_node_any, false);
  		}
  		VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
  		if (!tree_page) {
  			/*
  			 * If we walked over a stale stable_node,
  			 * get_ksm_page() will call rb_erase() and it
  			 * may rebalance the tree from under us. So
  			 * restart the search from scratch. Returning
  			 * NULL would be safe too, but we'd generate
  			 * false negative insertions just because some
  			 * stable_node was stale.
  			 */
  			goto again;
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1543

4035c07a8   Hugh Dickins   ksm: take keyhole...
1544
  		ret = memcmp_pages(page, tree_page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1545
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1546

4146d2d67   Hugh Dickins   ksm: make !merge_...
1547
  		parent = *new;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1548
  		if (ret < 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1549
  			new = &parent->rb_left;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1550
  		else if (ret > 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1551
  			new = &parent->rb_right;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1552
  		else {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
  			if (page_node) {
  				VM_BUG_ON(page_node->head != &migrate_nodes);
  				/*
  				 * Test if the migrated page should be merged
  				 * into a stable node dup. If the mapcount is
  				 * 1 we can migrate it with another KSM page
  				 * without adding it to the chain.
  				 */
  				if (page_mapcount(page) > 1)
  					goto chain_append;
  			}
  
  			if (!stable_node_dup) {
  				/*
  				 * If the stable_node is a chain and
  				 * we got a payload match in memcmp
  				 * but we cannot merge the scanned
  				 * page in any of the existing
  				 * stable_node dups because they're
  				 * all full, we need to wait the
  				 * scanned page to find itself a match
  				 * in the unstable tree to create a
  				 * brand new KSM page to add later to
  				 * the dups of this stable_node.
  				 */
  				return NULL;
  			}
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1580
1581
1582
1583
1584
1585
1586
  			/*
  			 * Lock and unlock the stable_node's page (which
  			 * might already have been migrated) so that page
  			 * migration is sure to notice its raised count.
  			 * It would be more elegant to return stable_node
  			 * than kpage, but that involves more changes.
  			 */
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1587
1588
1589
1590
1591
1592
  			tree_page = get_ksm_page(stable_node_dup, true);
  			if (unlikely(!tree_page))
  				/*
  				 * The tree may have been rebalanced,
  				 * so re-evaluate parent and new.
  				 */
4146d2d67   Hugh Dickins   ksm: make !merge_...
1593
  				goto again;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1594
1595
1596
1597
1598
1599
1600
1601
  			unlock_page(tree_page);
  
  			if (get_kpfn_nid(stable_node_dup->kpfn) !=
  			    NUMA(stable_node_dup->nid)) {
  				put_page(tree_page);
  				goto replace;
  			}
  			return tree_page;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1602
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1603
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1604
1605
1606
1607
1608
1609
  	if (!page_node)
  		return NULL;
  
  	list_del(&page_node->list);
  	DO_NUMA(page_node->nid = nid);
  	rb_link_node(&page_node->node, parent, new);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1610
  	rb_insert_color(&page_node->node, root);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1611
1612
1613
1614
1615
1616
  out:
  	if (is_page_sharing_candidate(page_node)) {
  		get_page(page);
  		return page;
  	} else
  		return NULL;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1617
1618
  
  replace:
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1619
1620
  	/*
  	 * If stable_node was a chain and chain_prune collapsed it,
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1621
1622
1623
1624
1625
  	 * stable_node has been updated to be the new regular
  	 * stable_node. A collapse of the chain is indistinguishable
  	 * from the case there was no chain in the stable
  	 * rbtree. Otherwise stable_node is the chain and
  	 * stable_node_dup is the dup to replace.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1626
  	 */
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1627
  	if (stable_node_dup == stable_node) {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1628
1629
  		VM_BUG_ON(is_stable_node_chain(stable_node_dup));
  		VM_BUG_ON(is_stable_node_dup(stable_node_dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1630
1631
1632
1633
1634
  		/* there is no chain */
  		if (page_node) {
  			VM_BUG_ON(page_node->head != &migrate_nodes);
  			list_del(&page_node->list);
  			DO_NUMA(page_node->nid = nid);
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1635
1636
  			rb_replace_node(&stable_node_dup->node,
  					&page_node->node,
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1637
1638
1639
1640
1641
1642
  					root);
  			if (is_page_sharing_candidate(page_node))
  				get_page(page);
  			else
  				page = NULL;
  		} else {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1643
  			rb_erase(&stable_node_dup->node, root);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1644
1645
  			page = NULL;
  		}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1646
  	} else {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
  		VM_BUG_ON(!is_stable_node_chain(stable_node));
  		__stable_node_dup_del(stable_node_dup);
  		if (page_node) {
  			VM_BUG_ON(page_node->head != &migrate_nodes);
  			list_del(&page_node->list);
  			DO_NUMA(page_node->nid = nid);
  			stable_node_chain_add_dup(page_node, stable_node);
  			if (is_page_sharing_candidate(page_node))
  				get_page(page);
  			else
  				page = NULL;
  		} else {
  			page = NULL;
  		}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1661
  	}
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1662
1663
  	stable_node_dup->head = &migrate_nodes;
  	list_add(&stable_node_dup->list, stable_node_dup->head);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1664
  	return page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1665
1666
1667
1668
1669
  
  chain_append:
  	/* stable_node_dup could be null if it reached the limit */
  	if (!stable_node_dup)
  		stable_node_dup = stable_node_any;
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1670
1671
  	/*
  	 * If stable_node was a chain and chain_prune collapsed it,
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1672
1673
1674
1675
1676
  	 * stable_node has been updated to be the new regular
  	 * stable_node. A collapse of the chain is indistinguishable
  	 * from the case there was no chain in the stable
  	 * rbtree. Otherwise stable_node is the chain and
  	 * stable_node_dup is the dup to replace.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1677
  	 */
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1678
  	if (stable_node_dup == stable_node) {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1679
1680
  		VM_BUG_ON(is_stable_node_chain(stable_node_dup));
  		VM_BUG_ON(is_stable_node_dup(stable_node_dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
  		/* chain is missing so create it */
  		stable_node = alloc_stable_node_chain(stable_node_dup,
  						      root);
  		if (!stable_node)
  			return NULL;
  	}
  	/*
  	 * Add this stable_node dup that was
  	 * migrated to the stable_node chain
  	 * of the current nid for this page
  	 * content.
  	 */
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1693
1694
  	VM_BUG_ON(!is_stable_node_chain(stable_node));
  	VM_BUG_ON(!is_stable_node_dup(stable_node_dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1695
1696
1697
1698
1699
  	VM_BUG_ON(page_node->head != &migrate_nodes);
  	list_del(&page_node->list);
  	DO_NUMA(page_node->nid = nid);
  	stable_node_chain_add_dup(page_node, stable_node);
  	goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1700
1701
1702
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1703
   * stable_tree_insert - insert stable tree node pointing to new ksm page
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1704
1705
   * into the stable tree.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1706
1707
   * This function returns the stable tree node just allocated on success,
   * NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1708
   */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1709
  static struct stable_node *stable_tree_insert(struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1710
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1711
1712
  	int nid;
  	unsigned long kpfn;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1713
  	struct rb_root *root;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1714
  	struct rb_node **new;
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1715
  	struct rb_node *parent;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1716
1717
  	struct stable_node *stable_node, *stable_node_dup, *stable_node_any;
  	bool need_chain = false;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1718

90bd6fd31   Petr Holasek   ksm: allow trees ...
1719
1720
  	kpfn = page_to_pfn(kpage);
  	nid = get_kpfn_nid(kpfn);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1721
  	root = root_stable_tree + nid;
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1722
1723
  again:
  	parent = NULL;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1724
  	new = &root->rb_node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1725

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1726
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1727
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1728
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1729
  		cond_resched();
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1730
  		stable_node = rb_entry(*new, struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1731
  		stable_node_any = NULL;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1732
  		tree_page = chain(&stable_node_dup, stable_node, root);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
  		if (!stable_node_dup) {
  			/*
  			 * Either all stable_node dups were full in
  			 * this stable_node chain, or this chain was
  			 * empty and should be rb_erased.
  			 */
  			stable_node_any = stable_node_dup_any(stable_node,
  							      root);
  			if (!stable_node_any) {
  				/* rb_erase just run */
  				goto again;
  			}
  			/*
  			 * Take any of the stable_node dups page of
  			 * this stable_node chain to let the tree walk
  			 * continue. All KSM pages belonging to the
  			 * stable_node dups in a stable_node chain
  			 * have the same content and they're
  			 * wrprotected at all times. Any will work
  			 * fine to continue the walk.
  			 */
  			tree_page = get_ksm_page(stable_node_any, false);
  		}
  		VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
  		if (!tree_page) {
  			/*
  			 * If we walked over a stale stable_node,
  			 * get_ksm_page() will call rb_erase() and it
  			 * may rebalance the tree from under us. So
  			 * restart the search from scratch. Returning
  			 * NULL would be safe too, but we'd generate
  			 * false negative insertions just because some
  			 * stable_node was stale.
  			 */
  			goto again;
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1769

4035c07a8   Hugh Dickins   ksm: take keyhole...
1770
1771
  		ret = memcmp_pages(kpage, tree_page);
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1772
1773
1774
1775
1776
1777
1778
  
  		parent = *new;
  		if (ret < 0)
  			new = &parent->rb_left;
  		else if (ret > 0)
  			new = &parent->rb_right;
  		else {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1779
1780
  			need_chain = true;
  			break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1781
1782
  		}
  	}
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1783
1784
  	stable_node_dup = alloc_stable_node();
  	if (!stable_node_dup)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1785
  		return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1786

2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
  	INIT_HLIST_HEAD(&stable_node_dup->hlist);
  	stable_node_dup->kpfn = kpfn;
  	set_page_stable_node(kpage, stable_node_dup);
  	stable_node_dup->rmap_hlist_len = 0;
  	DO_NUMA(stable_node_dup->nid = nid);
  	if (!need_chain) {
  		rb_link_node(&stable_node_dup->node, parent, new);
  		rb_insert_color(&stable_node_dup->node, root);
  	} else {
  		if (!is_stable_node_chain(stable_node)) {
  			struct stable_node *orig = stable_node;
  			/* chain is missing so create it */
  			stable_node = alloc_stable_node_chain(orig, root);
  			if (!stable_node) {
  				free_stable_node(stable_node_dup);
  				return NULL;
  			}
  		}
  		stable_node_chain_add_dup(stable_node_dup, stable_node);
  	}
08beca44d   Hugh Dickins   ksm: stable_node ...
1807

2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1808
  	return stable_node_dup;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1809
1810
1811
  }
  
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1812
1813
   * unstable_tree_search_insert - search for identical page,
   * else insert rmap_item into the unstable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
   *
   * This function searches for a page in the unstable tree identical to the
   * page currently being scanned; and if no identical page is found in the
   * tree, we insert rmap_item as a new object into the unstable tree.
   *
   * This function returns pointer to rmap_item found to be identical
   * to the currently scanned page, NULL otherwise.
   *
   * This function does both searching and inserting, because they share
   * the same walking algorithm in an rbtree.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1825
1826
1827
1828
  static
  struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
  					      struct page *page,
  					      struct page **tree_pagep)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1829
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1830
1831
  	struct rb_node **new;
  	struct rb_root *root;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1832
  	struct rb_node *parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1833
1834
1835
  	int nid;
  
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1836
  	root = root_unstable_tree + nid;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1837
  	new = &root->rb_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1838
1839
1840
  
  	while (*new) {
  		struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1841
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1842
  		int ret;
d178f27fc   Hugh Dickins   ksm: cond_resched...
1843
  		cond_resched();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1844
  		tree_rmap_item = rb_entry(*new, struct rmap_item, node);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1845
  		tree_page = get_mergeable_page(tree_rmap_item);
c8f95ed1a   Andrea Arcangeli   ksm: unstable_tre...
1846
  		if (!tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1847
1848
1849
  			return NULL;
  
  		/*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1850
  		 * Don't substitute a ksm page for a forked page.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1851
  		 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1852
1853
  		if (page == tree_page) {
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1854
1855
  			return NULL;
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1856
  		ret = memcmp_pages(page, tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1857
1858
1859
  
  		parent = *new;
  		if (ret < 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1860
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1861
1862
  			new = &parent->rb_left;
  		} else if (ret > 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1863
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1864
  			new = &parent->rb_right;
b599cbdf1   Hugh Dickins   ksm: treat unstab...
1865
1866
1867
1868
1869
1870
1871
1872
1873
  		} else if (!ksm_merge_across_nodes &&
  			   page_to_nid(tree_page) != nid) {
  			/*
  			 * If tree_page has been migrated to another NUMA node,
  			 * it will be flushed out and put in the right unstable
  			 * tree next time: only merge with it when across_nodes.
  			 */
  			put_page(tree_page);
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1874
  		} else {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1875
  			*tree_pagep = tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1876
1877
1878
  			return tree_rmap_item;
  		}
  	}
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1879
  	rmap_item->address |= UNSTABLE_FLAG;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1880
  	rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1881
  	DO_NUMA(rmap_item->nid = nid);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1882
  	rb_link_node(&rmap_item->node, parent, new);
90bd6fd31   Petr Holasek   ksm: allow trees ...
1883
  	rb_insert_color(&rmap_item->node, root);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1884

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
1885
  	ksm_pages_unshared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1886
1887
1888
1889
1890
1891
1892
1893
1894
  	return NULL;
  }
  
  /*
   * stable_tree_append - add another rmap_item to the linked list of
   * rmap_items hanging off a given node of the stable tree, all sharing
   * the same ksm page.
   */
  static void stable_tree_append(struct rmap_item *rmap_item,
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1895
1896
  			       struct stable_node *stable_node,
  			       bool max_page_sharing_bypass)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1897
  {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
  	/*
  	 * rmap won't find this mapping if we don't insert the
  	 * rmap_item in the right stable_node
  	 * duplicate. page_migration could break later if rmap breaks,
  	 * so we can as well crash here. We really need to check for
  	 * rmap_hlist_len == STABLE_NODE_CHAIN, but we can as well check
  	 * for other negative values as an undeflow if detected here
  	 * for the first time (and not when decreasing rmap_hlist_len)
  	 * would be sign of memory corruption in the stable_node.
  	 */
  	BUG_ON(stable_node->rmap_hlist_len < 0);
  
  	stable_node->rmap_hlist_len++;
  	if (!max_page_sharing_bypass)
  		/* possibly non fatal but unexpected overflow, only warn */
  		WARN_ON_ONCE(stable_node->rmap_hlist_len >
  			     ksm_max_page_sharing);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1915
  	rmap_item->head = stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1916
  	rmap_item->address |= STABLE_FLAG;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1917
  	hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
e178dfde3   Hugh Dickins   ksm: move pages_s...
1918

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1919
1920
1921
1922
  	if (rmap_item->hlist.next)
  		ksm_pages_sharing++;
  	else
  		ksm_pages_shared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1923
1924
1925
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1926
1927
1928
1929
   * cmp_and_merge_page - first see if page can be merged into the stable tree;
   * if not, compare checksum to previous and if it's the same, see if page can
   * be inserted into the unstable tree, or merged with a page already there and
   * both transferred to the stable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1930
1931
1932
1933
1934
1935
   *
   * @page: the page that we are searching identical page to.
   * @rmap_item: the reverse mapping into the virtual address of this page
   */
  static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
  {
4b22927f0   Kirill Tkhai   ksm: fix unlocked...
1936
  	struct mm_struct *mm = rmap_item->mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1937
  	struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1938
  	struct page *tree_page = NULL;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1939
  	struct stable_node *stable_node;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1940
  	struct page *kpage;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1941
1942
  	unsigned int checksum;
  	int err;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1943
  	bool max_page_sharing_bypass = false;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1944

4146d2d67   Hugh Dickins   ksm: make !merge_...
1945
1946
1947
  	stable_node = page_stable_node(page);
  	if (stable_node) {
  		if (stable_node->head != &migrate_nodes &&
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1948
1949
1950
  		    get_kpfn_nid(READ_ONCE(stable_node->kpfn)) !=
  		    NUMA(stable_node->nid)) {
  			stable_node_dup_del(stable_node);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1951
1952
1953
1954
1955
1956
  			stable_node->head = &migrate_nodes;
  			list_add(&stable_node->list, stable_node->head);
  		}
  		if (stable_node->head != &migrate_nodes &&
  		    rmap_item->head == stable_node)
  			return;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1957
1958
1959
1960
1961
1962
  		/*
  		 * If it's a KSM fork, allow it to go over the sharing limit
  		 * without warnings.
  		 */
  		if (!is_page_sharing_candidate(stable_node))
  			max_page_sharing_bypass = true;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1963
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1964
1965
  
  	/* We first start with searching the page inside the stable tree */
62b61f611   Hugh Dickins   ksm: memory hotre...
1966
  	kpage = stable_tree_search(page);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1967
1968
1969
1970
1971
1972
  	if (kpage == page && rmap_item->head == stable_node) {
  		put_page(kpage);
  		return;
  	}
  
  	remove_rmap_item_from_tree(rmap_item);
62b61f611   Hugh Dickins   ksm: memory hotre...
1973
  	if (kpage) {
08beca44d   Hugh Dickins   ksm: stable_node ...
1974
  		err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1975
1976
1977
1978
1979
  		if (!err) {
  			/*
  			 * The page was successfully merged:
  			 * add its rmap_item to the stable tree.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
1980
  			lock_page(kpage);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1981
1982
  			stable_tree_append(rmap_item, page_stable_node(kpage),
  					   max_page_sharing_bypass);
5ad646880   Hugh Dickins   ksm: let shared p...
1983
  			unlock_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1984
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1985
  		put_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1986
1987
1988
1989
  		return;
  	}
  
  	/*
4035c07a8   Hugh Dickins   ksm: take keyhole...
1990
1991
1992
1993
  	 * If the hash value of the page has changed from the last time
  	 * we calculated it, this page is changing frequently: therefore we
  	 * don't want to insert it in the unstable tree, and we don't want
  	 * to waste our time searching for something identical to it there.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1994
1995
1996
1997
1998
1999
  	 */
  	checksum = calc_checksum(page);
  	if (rmap_item->oldchecksum != checksum) {
  		rmap_item->oldchecksum = checksum;
  		return;
  	}
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2000
2001
2002
2003
2004
2005
  	/*
  	 * Same checksum as an empty page. We attempt to merge it with the
  	 * appropriate zero page if the user enabled this via sysfs.
  	 */
  	if (ksm_use_zero_pages && (checksum == zero_checksum)) {
  		struct vm_area_struct *vma;
4b22927f0   Kirill Tkhai   ksm: fix unlocked...
2006
2007
  		down_read(&mm->mmap_sem);
  		vma = find_mergeable_vma(mm, rmap_item->address);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2008
2009
  		err = try_to_merge_one_page(vma, page,
  					    ZERO_PAGE(rmap_item->address));
4b22927f0   Kirill Tkhai   ksm: fix unlocked...
2010
  		up_read(&mm->mmap_sem);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2011
2012
2013
2014
2015
2016
2017
  		/*
  		 * In case of failure, the page was not really empty, so we
  		 * need to continue. Otherwise we're done.
  		 */
  		if (!err)
  			return;
  	}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2018
2019
  	tree_rmap_item =
  		unstable_tree_search_insert(rmap_item, page, &tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2020
  	if (tree_rmap_item) {
763111d9f   Claudio Imbrenda   mm/ksm: fix inter...
2021
  		bool split;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2022
2023
  		kpage = try_to_merge_two_pages(rmap_item, page,
  						tree_rmap_item, tree_page);
763111d9f   Claudio Imbrenda   mm/ksm: fix inter...
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
  		/*
  		 * If both pages we tried to merge belong to the same compound
  		 * page, then we actually ended up increasing the reference
  		 * count of the same compound page twice, and split_huge_page
  		 * failed.
  		 * Here we set a flag if that happened, and we use it later to
  		 * try split_huge_page again. Since we call put_page right
  		 * afterwards, the reference count will be correct and
  		 * split_huge_page should succeed.
  		 */
  		split = PageTransCompound(page)
  			&& compound_head(page) == compound_head(tree_page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2036
  		put_page(tree_page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2037
  		if (kpage) {
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
2038
2039
2040
2041
  			/*
  			 * The pages were successfully merged: insert new
  			 * node in the stable tree and add both rmap_items.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
2042
  			lock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2043
2044
  			stable_node = stable_tree_insert(kpage);
  			if (stable_node) {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2045
2046
2047
2048
  				stable_tree_append(tree_rmap_item, stable_node,
  						   false);
  				stable_tree_append(rmap_item, stable_node,
  						   false);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2049
  			}
5ad646880   Hugh Dickins   ksm: let shared p...
2050
  			unlock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2051

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2052
2053
2054
2055
2056
2057
  			/*
  			 * If we fail to insert the page into the stable tree,
  			 * we will have 2 virtual addresses that are pointing
  			 * to a ksm page left outside the stable tree,
  			 * in which case we need to break_cow on both.
  			 */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2058
  			if (!stable_node) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2059
2060
  				break_cow(tree_rmap_item);
  				break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2061
  			}
763111d9f   Claudio Imbrenda   mm/ksm: fix inter...
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
  		} else if (split) {
  			/*
  			 * We are here if we tried to merge two pages and
  			 * failed because they both belonged to the same
  			 * compound page. We will split the page now, but no
  			 * merging will take place.
  			 * We do not want to add the cost of a full lock; if
  			 * the page is locked, it is better to skip it and
  			 * perhaps try again later.
  			 */
  			if (!trylock_page(page))
  				return;
  			split_huge_page(page);
  			unlock_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2076
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2077
2078
2079
2080
  	}
  }
  
  static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
2081
  					    struct rmap_item **rmap_list,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2082
2083
2084
  					    unsigned long addr)
  {
  	struct rmap_item *rmap_item;
6514d511d   Hugh Dickins   ksm: singly-linke...
2085
2086
  	while (*rmap_list) {
  		rmap_item = *rmap_list;
93d17715a   Hugh Dickins   ksm: three remove...
2087
  		if ((rmap_item->address & PAGE_MASK) == addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2088
  			return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2089
2090
  		if (rmap_item->address > addr)
  			break;
6514d511d   Hugh Dickins   ksm: singly-linke...
2091
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2092
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2093
2094
2095
2096
2097
2098
2099
2100
  		free_rmap_item(rmap_item);
  	}
  
  	rmap_item = alloc_rmap_item();
  	if (rmap_item) {
  		/* It has already been zeroed */
  		rmap_item->mm = mm_slot->mm;
  		rmap_item->address = addr;
6514d511d   Hugh Dickins   ksm: singly-linke...
2101
2102
  		rmap_item->rmap_list = *rmap_list;
  		*rmap_list = rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
  	}
  	return rmap_item;
  }
  
  static struct rmap_item *scan_get_next_rmap_item(struct page **page)
  {
  	struct mm_struct *mm;
  	struct mm_slot *slot;
  	struct vm_area_struct *vma;
  	struct rmap_item *rmap_item;
90bd6fd31   Petr Holasek   ksm: allow trees ...
2113
  	int nid;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2114
2115
2116
2117
2118
2119
  
  	if (list_empty(&ksm_mm_head.mm_list))
  		return NULL;
  
  	slot = ksm_scan.mm_slot;
  	if (slot == &ksm_mm_head) {
2919bfd07   Hugh Dickins   ksm: drain pageve...
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
  		/*
  		 * A number of pages can hang around indefinitely on per-cpu
  		 * pagevecs, raised page count preventing write_protect_page
  		 * from merging them.  Though it doesn't really matter much,
  		 * it is puzzling to see some stuck in pages_volatile until
  		 * other activity jostles them out, and they also prevented
  		 * LTP's KSM test from succeeding deterministically; so drain
  		 * them here (here rather than on entry to ksm_do_scan(),
  		 * so we don't IPI too often when pages_to_scan is set low).
  		 */
  		lru_add_drain_all();
4146d2d67   Hugh Dickins   ksm: make !merge_...
2131
2132
2133
2134
2135
2136
2137
  		/*
  		 * Whereas stale stable_nodes on the stable_tree itself
  		 * get pruned in the regular course of stable_tree_search(),
  		 * those moved out to the migrate_nodes list can accumulate:
  		 * so prune them once before each full scan.
  		 */
  		if (!ksm_merge_across_nodes) {
036404183   Geliang Tang   mm/ksm.c: use lis...
2138
  			struct stable_node *stable_node, *next;
4146d2d67   Hugh Dickins   ksm: make !merge_...
2139
  			struct page *page;
036404183   Geliang Tang   mm/ksm.c: use lis...
2140
2141
  			list_for_each_entry_safe(stable_node, next,
  						 &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
2142
2143
2144
2145
2146
2147
  				page = get_ksm_page(stable_node, false);
  				if (page)
  					put_page(page);
  				cond_resched();
  			}
  		}
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2148
  		for (nid = 0; nid < ksm_nr_node_ids; nid++)
90bd6fd31   Petr Holasek   ksm: allow trees ...
2149
  			root_unstable_tree[nid] = RB_ROOT;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2150
2151
2152
2153
2154
  
  		spin_lock(&ksm_mmlist_lock);
  		slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
  		ksm_scan.mm_slot = slot;
  		spin_unlock(&ksm_mmlist_lock);
2b472611a   Hugh Dickins   ksm: fix NULL poi...
2155
2156
2157
2158
2159
2160
  		/*
  		 * Although we tested list_empty() above, a racing __ksm_exit
  		 * of the last mm on the list may have removed it since then.
  		 */
  		if (slot == &ksm_mm_head)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2161
2162
  next_mm:
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
2163
  		ksm_scan.rmap_list = &slot->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2164
2165
2166
2167
  	}
  
  	mm = slot->mm;
  	down_read(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2168
2169
2170
2171
2172
2173
  	if (ksm_test_exit(mm))
  		vma = NULL;
  	else
  		vma = find_vma(mm, ksm_scan.address);
  
  	for (; vma; vma = vma->vm_next) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2174
2175
2176
2177
2178
2179
2180
2181
  		if (!(vma->vm_flags & VM_MERGEABLE))
  			continue;
  		if (ksm_scan.address < vma->vm_start)
  			ksm_scan.address = vma->vm_start;
  		if (!vma->anon_vma)
  			ksm_scan.address = vma->vm_end;
  
  		while (ksm_scan.address < vma->vm_end) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
2182
2183
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2184
  			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
21ae5b017   Andrea Arcangeli   thp: skip transhu...
2185
2186
2187
2188
2189
  			if (IS_ERR_OR_NULL(*page)) {
  				ksm_scan.address += PAGE_SIZE;
  				cond_resched();
  				continue;
  			}
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
2190
  			if (PageAnon(*page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2191
2192
2193
  				flush_anon_page(vma, *page, ksm_scan.address);
  				flush_dcache_page(*page);
  				rmap_item = get_next_rmap_item(slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
2194
  					ksm_scan.rmap_list, ksm_scan.address);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2195
  				if (rmap_item) {
6514d511d   Hugh Dickins   ksm: singly-linke...
2196
2197
  					ksm_scan.rmap_list =
  							&rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2198
2199
2200
2201
2202
2203
  					ksm_scan.address += PAGE_SIZE;
  				} else
  					put_page(*page);
  				up_read(&mm->mmap_sem);
  				return rmap_item;
  			}
21ae5b017   Andrea Arcangeli   thp: skip transhu...
2204
  			put_page(*page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2205
2206
2207
2208
  			ksm_scan.address += PAGE_SIZE;
  			cond_resched();
  		}
  	}
9ba692948   Hugh Dickins   ksm: fix oom dead...
2209
2210
  	if (ksm_test_exit(mm)) {
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
2211
  		ksm_scan.rmap_list = &slot->rmap_list;
9ba692948   Hugh Dickins   ksm: fix oom dead...
2212
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2213
2214
2215
2216
  	/*
  	 * Nuke all the rmap_items that are above this current rmap:
  	 * because there were no VM_MERGEABLE vmas with such addresses.
  	 */
6514d511d   Hugh Dickins   ksm: singly-linke...
2217
  	remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2218
2219
  
  	spin_lock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
2220
2221
2222
2223
2224
2225
2226
  	ksm_scan.mm_slot = list_entry(slot->mm_list.next,
  						struct mm_slot, mm_list);
  	if (ksm_scan.address == 0) {
  		/*
  		 * We've completed a full scan of all vmas, holding mmap_sem
  		 * throughout, and found no VM_MERGEABLE: so do the same as
  		 * __ksm_exit does to remove this mm from all our lists now.
9ba692948   Hugh Dickins   ksm: fix oom dead...
2227
2228
2229
2230
  		 * This applies either when cleaning up after __ksm_exit
  		 * (but beware: we can reach here even before __ksm_exit),
  		 * or when all VM_MERGEABLE areas have been unmapped (and
  		 * mmap_sem then protects against race with MADV_MERGEABLE).
cd551f975   Hugh Dickins   ksm: distribute r...
2231
  		 */
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
2232
  		hash_del(&slot->link);
cd551f975   Hugh Dickins   ksm: distribute r...
2233
  		list_del(&slot->mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2234
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
2235
2236
  		free_mm_slot(slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2237
2238
2239
  		up_read(&mm->mmap_sem);
  		mmdrop(mm);
  	} else {
9ba692948   Hugh Dickins   ksm: fix oom dead...
2240
  		up_read(&mm->mmap_sem);
7496fea9a   Zhou Chengming   ksm: fix conflict...
2241
2242
2243
2244
2245
2246
2247
2248
  		/*
  		 * up_read(&mm->mmap_sem) first because after
  		 * spin_unlock(&ksm_mmlist_lock) run, the "mm" may
  		 * already have been freed under us by __ksm_exit()
  		 * because the "mm_slot" is still hashed and
  		 * ksm_scan.mm_slot doesn't point to it anymore.
  		 */
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
2249
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2250
2251
  
  	/* Repeat until we've completed scanning the whole list */
cd551f975   Hugh Dickins   ksm: distribute r...
2252
  	slot = ksm_scan.mm_slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2253
2254
  	if (slot != &ksm_mm_head)
  		goto next_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
  	ksm_scan.seqnr++;
  	return NULL;
  }
  
  /**
   * ksm_do_scan  - the ksm scanner main worker function.
   * @scan_npages - number of pages we want to scan before we return.
   */
  static void ksm_do_scan(unsigned int scan_npages)
  {
  	struct rmap_item *rmap_item;
22eccdd7d   Dan Carpenter   ksm: check for ER...
2266
  	struct page *uninitialized_var(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2267

878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2268
  	while (scan_npages-- && likely(!freezing(current))) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2269
2270
2271
2272
  		cond_resched();
  		rmap_item = scan_get_next_rmap_item(&page);
  		if (!rmap_item)
  			return;
4146d2d67   Hugh Dickins   ksm: make !merge_...
2273
  		cmp_and_merge_page(page, rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2274
2275
2276
  		put_page(page);
  	}
  }
6e1583842   Hugh Dickins   ksm: keep quiet w...
2277
2278
2279
2280
  static int ksmd_should_run(void)
  {
  	return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2281
2282
  static int ksm_scan_thread(void *nothing)
  {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2283
  	set_freezable();
339aa6246   Izik Eidus   ksm: change ksm n...
2284
  	set_user_nice(current, 5);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2285
2286
  
  	while (!kthread_should_stop()) {
6e1583842   Hugh Dickins   ksm: keep quiet w...
2287
  		mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2288
  		wait_while_offlining();
6e1583842   Hugh Dickins   ksm: keep quiet w...
2289
  		if (ksmd_should_run())
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2290
  			ksm_do_scan(ksm_thread_pages_to_scan);
6e1583842   Hugh Dickins   ksm: keep quiet w...
2291
  		mutex_unlock(&ksm_thread_mutex);
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2292
  		try_to_freeze();
6e1583842   Hugh Dickins   ksm: keep quiet w...
2293
  		if (ksmd_should_run()) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2294
2295
2296
  			schedule_timeout_interruptible(
  				msecs_to_jiffies(ksm_thread_sleep_millisecs));
  		} else {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2297
  			wait_event_freezable(ksm_thread_wait,
6e1583842   Hugh Dickins   ksm: keep quiet w...
2298
  				ksmd_should_run() || kthread_should_stop());
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2299
2300
2301
2302
  		}
  	}
  	return 0;
  }
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2303
2304
2305
2306
  int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
  		unsigned long end, int advice, unsigned long *vm_flags)
  {
  	struct mm_struct *mm = vma->vm_mm;
d952b7913   Hugh Dickins   ksm: fix endless ...
2307
  	int err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2308
2309
2310
2311
2312
2313
2314
2315
  
  	switch (advice) {
  	case MADV_MERGEABLE:
  		/*
  		 * Be somewhat over-protective for now!
  		 */
  		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
  				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
0661a3361   Kirill A. Shutemov   mm: remove rest u...
2316
  				 VM_HUGETLB | VM_MIXEDMAP))
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2317
  			return 0;		/* just ignore the advice */
cc2383ec0   Konstantin Khlebnikov   mm: introduce arc...
2318
2319
2320
2321
  #ifdef VM_SAO
  		if (*vm_flags & VM_SAO)
  			return 0;
  #endif
d952b7913   Hugh Dickins   ksm: fix endless ...
2322
2323
2324
2325
2326
  		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
  			err = __ksm_enter(mm);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2327
2328
2329
2330
2331
2332
2333
  
  		*vm_flags |= VM_MERGEABLE;
  		break;
  
  	case MADV_UNMERGEABLE:
  		if (!(*vm_flags & VM_MERGEABLE))
  			return 0;		/* just ignore the advice */
d952b7913   Hugh Dickins   ksm: fix endless ...
2334
2335
2336
2337
2338
  		if (vma->anon_vma) {
  			err = unmerge_ksm_pages(vma, start, end);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
  
  		*vm_flags &= ~VM_MERGEABLE;
  		break;
  	}
  
  	return 0;
  }
  
  int __ksm_enter(struct mm_struct *mm)
  {
6e1583842   Hugh Dickins   ksm: keep quiet w...
2349
2350
2351
2352
  	struct mm_slot *mm_slot;
  	int needs_wakeup;
  
  	mm_slot = alloc_mm_slot();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2353
2354
  	if (!mm_slot)
  		return -ENOMEM;
6e1583842   Hugh Dickins   ksm: keep quiet w...
2355
2356
  	/* Check ksm_run too?  Would need tighter locking */
  	needs_wakeup = list_empty(&ksm_mm_head.mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2357
2358
2359
  	spin_lock(&ksm_mmlist_lock);
  	insert_to_mm_slots_hash(mm, mm_slot);
  	/*
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2360
2361
  	 * When KSM_RUN_MERGE (or KSM_RUN_STOP),
  	 * insert just behind the scanning cursor, to let the area settle
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2362
2363
  	 * down a little; when fork is followed by immediate exec, we don't
  	 * want ksmd to waste time setting up and tearing down an rmap_list.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2364
2365
2366
2367
  	 *
  	 * But when KSM_RUN_UNMERGE, it's important to insert ahead of its
  	 * scanning cursor, otherwise KSM pages in newly forked mms will be
  	 * missed: then we might as well insert at the end of the list.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2368
  	 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2369
2370
2371
2372
  	if (ksm_run & KSM_RUN_UNMERGE)
  		list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
  	else
  		list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2373
  	spin_unlock(&ksm_mmlist_lock);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2374
  	set_bit(MMF_VM_MERGEABLE, &mm->flags);
f1f100764   Vegard Nossum   mm: add new mmgra...
2375
  	mmgrab(mm);
6e1583842   Hugh Dickins   ksm: keep quiet w...
2376
2377
2378
  
  	if (needs_wakeup)
  		wake_up_interruptible(&ksm_thread_wait);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2379
2380
  	return 0;
  }
1c2fb7a4c   Andrea Arcangeli   ksm: fix deadlock...
2381
  void __ksm_exit(struct mm_struct *mm)
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2382
  {
cd551f975   Hugh Dickins   ksm: distribute r...
2383
  	struct mm_slot *mm_slot;
9ba692948   Hugh Dickins   ksm: fix oom dead...
2384
  	int easy_to_free = 0;
cd551f975   Hugh Dickins   ksm: distribute r...
2385

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2386
  	/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
2387
2388
2389
2390
2391
2392
  	 * This process is exiting: if it's straightforward (as is the
  	 * case when ksmd was never running), free mm_slot immediately.
  	 * But if it's at the cursor or has rmap_items linked to it, use
  	 * mmap_sem to synchronize with any break_cows before pagetables
  	 * are freed, and leave the mm_slot on the list for ksmd to free.
  	 * Beware: ksm may already have noticed it exiting and freed the slot.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2393
  	 */
9ba692948   Hugh Dickins   ksm: fix oom dead...
2394

cd551f975   Hugh Dickins   ksm: distribute r...
2395
2396
  	spin_lock(&ksm_mmlist_lock);
  	mm_slot = get_mm_slot(mm);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2397
  	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
6514d511d   Hugh Dickins   ksm: singly-linke...
2398
  		if (!mm_slot->rmap_list) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
2399
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2400
2401
2402
2403
2404
2405
  			list_del(&mm_slot->mm_list);
  			easy_to_free = 1;
  		} else {
  			list_move(&mm_slot->mm_list,
  				  &ksm_scan.mm_slot->mm_list);
  		}
cd551f975   Hugh Dickins   ksm: distribute r...
2406
  	}
cd551f975   Hugh Dickins   ksm: distribute r...
2407
  	spin_unlock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2408
2409
2410
2411
2412
  	if (easy_to_free) {
  		free_mm_slot(mm_slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
  		mmdrop(mm);
  	} else if (mm_slot) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
2413
2414
  		down_write(&mm->mmap_sem);
  		up_write(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2415
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2416
  }
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2417
  struct page *ksm_might_need_to_copy(struct page *page,
5ad646880   Hugh Dickins   ksm: let shared p...
2418
2419
  			struct vm_area_struct *vma, unsigned long address)
  {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2420
  	struct anon_vma *anon_vma = page_anon_vma(page);
5ad646880   Hugh Dickins   ksm: let shared p...
2421
  	struct page *new_page;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
  	if (PageKsm(page)) {
  		if (page_stable_node(page) &&
  		    !(ksm_run & KSM_RUN_UNMERGE))
  			return page;	/* no need to copy it */
  	} else if (!anon_vma) {
  		return page;		/* no need to copy it */
  	} else if (anon_vma->root == vma->anon_vma->root &&
  		 page->index == linear_page_index(vma, address)) {
  		return page;		/* still no need to copy it */
  	}
  	if (!PageUptodate(page))
  		return page;		/* let do_swap_page report the error */
5ad646880   Hugh Dickins   ksm: let shared p...
2434
2435
2436
2437
2438
2439
  	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
  	if (new_page) {
  		copy_user_highpage(new_page, page, address, vma);
  
  		SetPageDirty(new_page);
  		__SetPageUptodate(new_page);
48c935ad8   Kirill A. Shutemov   page-flags: defin...
2440
  		__SetPageLocked(new_page);
5ad646880   Hugh Dickins   ksm: let shared p...
2441
  	}
5ad646880   Hugh Dickins   ksm: let shared p...
2442
2443
  	return new_page;
  }
1df631ae1   Minchan Kim   mm: make rmap_wal...
2444
  void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2445
2446
  {
  	struct stable_node *stable_node;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2447
  	struct rmap_item *rmap_item;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2448
  	int search_new_forks = 0;
309381fea   Sasha Levin   mm: dump page whe...
2449
  	VM_BUG_ON_PAGE(!PageKsm(page), page);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
2450
2451
2452
2453
2454
  
  	/*
  	 * Rely on the page lock to protect against concurrent modifications
  	 * to that page's node of the stable tree.
  	 */
309381fea   Sasha Levin   mm: dump page whe...
2455
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2456
2457
2458
  
  	stable_node = page_stable_node(page);
  	if (!stable_node)
1df631ae1   Minchan Kim   mm: make rmap_wal...
2459
  		return;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2460
  again:
b67bfe0d4   Sasha Levin   hlist: drop the n...
2461
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2462
  		struct anon_vma *anon_vma = rmap_item->anon_vma;
5beb49305   Rik van Riel   mm: change anon_v...
2463
  		struct anon_vma_chain *vmac;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2464
  		struct vm_area_struct *vma;
ad12695f1   Andrea Arcangeli   ksm: add cond_res...
2465
  		cond_resched();
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
2466
  		anon_vma_lock_read(anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
2467
2468
  		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
  					       0, ULONG_MAX) {
6f2302848   Jia He   mm/ksm.c: ignore ...
2469
  			unsigned long addr;
ad12695f1   Andrea Arcangeli   ksm: add cond_res...
2470
  			cond_resched();
5beb49305   Rik van Riel   mm: change anon_v...
2471
  			vma = vmac->vma;
6f2302848   Jia He   mm/ksm.c: ignore ...
2472
2473
2474
2475
2476
  
  			/* Ignore the stable/unstable/sqnr flags */
  			addr = rmap_item->address & ~KSM_FLAG_MASK;
  
  			if (addr < vma->vm_start || addr >= vma->vm_end)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2477
2478
2479
2480
2481
2482
2483
2484
2485
  				continue;
  			/*
  			 * Initially we examine only the vma which covers this
  			 * rmap_item; but later, if there is still work to do,
  			 * we examine covering vmas in other mms: in case they
  			 * were forked from the original since ksmd passed.
  			 */
  			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
  				continue;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
2486
2487
  			if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  				continue;
6f2302848   Jia He   mm/ksm.c: ignore ...
2488
  			if (!rwc->rmap_one(page, vma, addr, rwc->arg)) {
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
2489
  				anon_vma_unlock_read(anon_vma);
1df631ae1   Minchan Kim   mm: make rmap_wal...
2490
  				return;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2491
  			}
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
2492
2493
  			if (rwc->done && rwc->done(page)) {
  				anon_vma_unlock_read(anon_vma);
1df631ae1   Minchan Kim   mm: make rmap_wal...
2494
  				return;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
2495
  			}
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2496
  		}
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
2497
  		anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2498
2499
2500
  	}
  	if (!search_new_forks++)
  		goto again;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2501
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
2502
  #ifdef CONFIG_MIGRATION
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2503
2504
2505
  void ksm_migrate_page(struct page *newpage, struct page *oldpage)
  {
  	struct stable_node *stable_node;
309381fea   Sasha Levin   mm: dump page whe...
2506
2507
2508
  	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
  	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
  	VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2509
2510
2511
  
  	stable_node = page_stable_node(newpage);
  	if (stable_node) {
309381fea   Sasha Levin   mm: dump page whe...
2512
  		VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage);
62b61f611   Hugh Dickins   ksm: memory hotre...
2513
  		stable_node->kpfn = page_to_pfn(newpage);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
2514
2515
2516
2517
2518
2519
2520
2521
  		/*
  		 * newpage->mapping was set in advance; now we need smp_wmb()
  		 * to make sure that the new stable_node->kpfn is visible
  		 * to get_ksm_page() before it can see that oldpage->mapping
  		 * has gone stale (or that PageSwapCache has been cleared).
  		 */
  		smp_wmb();
  		set_page_stable_node(oldpage, NULL);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2522
2523
2524
  	}
  }
  #endif /* CONFIG_MIGRATION */
62b61f611   Hugh Dickins   ksm: memory hotre...
2525
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2526
2527
2528
2529
2530
  static void wait_while_offlining(void)
  {
  	while (ksm_run & KSM_RUN_OFFLINE) {
  		mutex_unlock(&ksm_thread_mutex);
  		wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
743162013   NeilBrown   sched: Remove pro...
2531
  			    TASK_UNINTERRUPTIBLE);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2532
2533
2534
  		mutex_lock(&ksm_thread_mutex);
  	}
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
  static bool stable_node_dup_remove_range(struct stable_node *stable_node,
  					 unsigned long start_pfn,
  					 unsigned long end_pfn)
  {
  	if (stable_node->kpfn >= start_pfn &&
  	    stable_node->kpfn < end_pfn) {
  		/*
  		 * Don't get_ksm_page, page has already gone:
  		 * which is why we keep kpfn instead of page*
  		 */
  		remove_node_from_stable_tree(stable_node);
  		return true;
  	}
  	return false;
  }
  
  static bool stable_node_chain_remove_range(struct stable_node *stable_node,
  					   unsigned long start_pfn,
  					   unsigned long end_pfn,
  					   struct rb_root *root)
  {
  	struct stable_node *dup;
  	struct hlist_node *hlist_safe;
  
  	if (!is_stable_node_chain(stable_node)) {
  		VM_BUG_ON(is_stable_node_dup(stable_node));
  		return stable_node_dup_remove_range(stable_node, start_pfn,
  						    end_pfn);
  	}
  
  	hlist_for_each_entry_safe(dup, hlist_safe,
  				  &stable_node->hlist, hlist_dup) {
  		VM_BUG_ON(!is_stable_node_dup(dup));
  		stable_node_dup_remove_range(dup, start_pfn, end_pfn);
  	}
  	if (hlist_empty(&stable_node->hlist)) {
  		free_stable_node_chain(stable_node, root);
  		return true; /* notify caller that tree was rebalanced */
  	} else
  		return false;
  }
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2576
2577
  static void ksm_check_stable_tree(unsigned long start_pfn,
  				  unsigned long end_pfn)
62b61f611   Hugh Dickins   ksm: memory hotre...
2578
  {
036404183   Geliang Tang   mm/ksm.c: use lis...
2579
  	struct stable_node *stable_node, *next;
62b61f611   Hugh Dickins   ksm: memory hotre...
2580
  	struct rb_node *node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
2581
  	int nid;
62b61f611   Hugh Dickins   ksm: memory hotre...
2582

ef53d16cd   Hugh Dickins   ksm: allocate roo...
2583
2584
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
  		node = rb_first(root_stable_tree + nid);
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2585
  		while (node) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
2586
  			stable_node = rb_entry(node, struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2587
2588
2589
2590
  			if (stable_node_chain_remove_range(stable_node,
  							   start_pfn, end_pfn,
  							   root_stable_tree +
  							   nid))
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2591
  				node = rb_first(root_stable_tree + nid);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2592
  			else
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2593
2594
  				node = rb_next(node);
  			cond_resched();
90bd6fd31   Petr Holasek   ksm: allow trees ...
2595
  		}
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2596
  	}
036404183   Geliang Tang   mm/ksm.c: use lis...
2597
  	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
2598
2599
2600
2601
2602
  		if (stable_node->kpfn >= start_pfn &&
  		    stable_node->kpfn < end_pfn)
  			remove_node_from_stable_tree(stable_node);
  		cond_resched();
  	}
62b61f611   Hugh Dickins   ksm: memory hotre...
2603
2604
2605
2606
2607
2608
  }
  
  static int ksm_memory_callback(struct notifier_block *self,
  			       unsigned long action, void *arg)
  {
  	struct memory_notify *mn = arg;
62b61f611   Hugh Dickins   ksm: memory hotre...
2609
2610
2611
2612
  
  	switch (action) {
  	case MEM_GOING_OFFLINE:
  		/*
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2613
2614
2615
2616
2617
  		 * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
  		 * and remove_all_stable_nodes() while memory is going offline:
  		 * it is unsafe for them to touch the stable tree at this time.
  		 * But unmerge_ksm_pages(), rmap lookups and other entry points
  		 * which do not need the ksm_thread_mutex are all safe.
62b61f611   Hugh Dickins   ksm: memory hotre...
2618
  		 */
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2619
2620
2621
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run |= KSM_RUN_OFFLINE;
  		mutex_unlock(&ksm_thread_mutex);
62b61f611   Hugh Dickins   ksm: memory hotre...
2622
2623
2624
2625
2626
2627
  		break;
  
  	case MEM_OFFLINE:
  		/*
  		 * Most of the work is done by page migration; but there might
  		 * be a few stable_nodes left over, still pointing to struct
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2628
2629
2630
  		 * pages which have been offlined: prune those from the tree,
  		 * otherwise get_ksm_page() might later try to access a
  		 * non-existent struct page.
62b61f611   Hugh Dickins   ksm: memory hotre...
2631
  		 */
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2632
2633
  		ksm_check_stable_tree(mn->start_pfn,
  				      mn->start_pfn + mn->nr_pages);
62b61f611   Hugh Dickins   ksm: memory hotre...
2634
2635
2636
  		/* fallthrough */
  
  	case MEM_CANCEL_OFFLINE:
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2637
2638
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run &= ~KSM_RUN_OFFLINE;
62b61f611   Hugh Dickins   ksm: memory hotre...
2639
  		mutex_unlock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2640
2641
2642
  
  		smp_mb();	/* wake_up_bit advises this */
  		wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE));
62b61f611   Hugh Dickins   ksm: memory hotre...
2643
2644
2645
2646
  		break;
  	}
  	return NOTIFY_OK;
  }
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2647
2648
2649
2650
  #else
  static void wait_while_offlining(void)
  {
  }
62b61f611   Hugh Dickins   ksm: memory hotre...
2651
  #endif /* CONFIG_MEMORY_HOTREMOVE */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2652
2653
2654
2655
  #ifdef CONFIG_SYSFS
  /*
   * This all compiles without CONFIG_SYSFS, but is a waste of space.
   */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
  #define KSM_ATTR_RO(_name) \
  	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
  #define KSM_ATTR(_name) \
  	static struct kobj_attribute _name##_attr = \
  		__ATTR(_name, 0644, _name##_show, _name##_store)
  
  static ssize_t sleep_millisecs_show(struct kobject *kobj,
  				    struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_sleep_millisecs);
  }
  
  static ssize_t sleep_millisecs_store(struct kobject *kobj,
  				     struct kobj_attribute *attr,
  				     const char *buf, size_t count)
  {
  	unsigned long msecs;
  	int err;
3dbb95f78   Jingoo Han   mm: replace stric...
2675
  	err = kstrtoul(buf, 10, &msecs);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
  	if (err || msecs > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_sleep_millisecs = msecs;
  
  	return count;
  }
  KSM_ATTR(sleep_millisecs);
  
  static ssize_t pages_to_scan_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_pages_to_scan);
  }
  
  static ssize_t pages_to_scan_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long nr_pages;
3dbb95f78   Jingoo Han   mm: replace stric...
2698
  	err = kstrtoul(buf, 10, &nr_pages);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
  	if (err || nr_pages > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_pages_to_scan = nr_pages;
  
  	return count;
  }
  KSM_ATTR(pages_to_scan);
  
  static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
  			char *buf)
  {
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2711
2712
  	return sprintf(buf, "%lu
  ", ksm_run);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2713
2714
2715
2716
2717
2718
2719
  }
  
  static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
  			 const char *buf, size_t count)
  {
  	int err;
  	unsigned long flags;
3dbb95f78   Jingoo Han   mm: replace stric...
2720
  	err = kstrtoul(buf, 10, &flags);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2721
2722
2723
2724
2725
2726
2727
2728
  	if (err || flags > UINT_MAX)
  		return -EINVAL;
  	if (flags > KSM_RUN_UNMERGE)
  		return -EINVAL;
  
  	/*
  	 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
  	 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
d0f209f68   Hugh Dickins   ksm: remove unswa...
2729
2730
  	 * breaking COW to free the pages_shared (but leaves mm_slots
  	 * on the list for when ksmd may be set running again).
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2731
2732
2733
  	 */
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2734
  	wait_while_offlining();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2735
2736
  	if (ksm_run != flags) {
  		ksm_run = flags;
d952b7913   Hugh Dickins   ksm: fix endless ...
2737
  		if (flags & KSM_RUN_UNMERGE) {
e1e12d2f3   David Rientjes   mm, oom: fix race...
2738
  			set_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2739
  			err = unmerge_and_remove_all_rmap_items();
e1e12d2f3   David Rientjes   mm, oom: fix race...
2740
  			clear_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2741
2742
2743
2744
2745
  			if (err) {
  				ksm_run = KSM_RUN_STOP;
  				count = err;
  			}
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2746
2747
2748
2749
2750
2751
2752
2753
2754
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	if (flags & KSM_RUN_MERGE)
  		wake_up_interruptible(&ksm_thread_wait);
  
  	return count;
  }
  KSM_ATTR(run);
90bd6fd31   Petr Holasek   ksm: allow trees ...
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
  #ifdef CONFIG_NUMA
  static ssize_t merge_across_nodes_show(struct kobject *kobj,
  				struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_merge_across_nodes);
  }
  
  static ssize_t merge_across_nodes_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long knob;
  
  	err = kstrtoul(buf, 10, &knob);
  	if (err)
  		return err;
  	if (knob > 1)
  		return -EINVAL;
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2777
  	wait_while_offlining();
90bd6fd31   Petr Holasek   ksm: allow trees ...
2778
  	if (ksm_merge_across_nodes != knob) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2779
  		if (ksm_pages_shared || remove_all_stable_nodes())
90bd6fd31   Petr Holasek   ksm: allow trees ...
2780
  			err = -EBUSY;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2781
2782
2783
2784
2785
2786
2787
2788
2789
  		else if (root_stable_tree == one_stable_tree) {
  			struct rb_root *buf;
  			/*
  			 * This is the first time that we switch away from the
  			 * default of merging across nodes: must now allocate
  			 * a buffer to hold as many roots as may be needed.
  			 * Allocate stable and unstable together:
  			 * MAXSMP NODES_SHIFT 10 will use 16kB.
  			 */
bafe1e144   Joe Perches   ksm: remove redun...
2790
2791
  			buf = kcalloc(nr_node_ids + nr_node_ids, sizeof(*buf),
  				      GFP_KERNEL);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
  			/* Let us assume that RB_ROOT is NULL is zero */
  			if (!buf)
  				err = -ENOMEM;
  			else {
  				root_stable_tree = buf;
  				root_unstable_tree = buf + nr_node_ids;
  				/* Stable tree is empty but not the unstable */
  				root_unstable_tree[0] = one_unstable_tree[0];
  			}
  		}
  		if (!err) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
2803
  			ksm_merge_across_nodes = knob;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2804
2805
  			ksm_nr_node_ids = knob ? 1 : nr_node_ids;
  		}
90bd6fd31   Petr Holasek   ksm: allow trees ...
2806
2807
2808
2809
2810
2811
2812
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	return err ? err : count;
  }
  KSM_ATTR(merge_across_nodes);
  #endif
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
  static ssize_t use_zero_pages_show(struct kobject *kobj,
  				struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_use_zero_pages);
  }
  static ssize_t use_zero_pages_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	bool value;
  
  	err = kstrtobool(buf, &value);
  	if (err)
  		return -EINVAL;
  
  	ksm_use_zero_pages = value;
  
  	return count;
  }
  KSM_ATTR(use_zero_pages);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
  static ssize_t max_page_sharing_show(struct kobject *kobj,
  				     struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_max_page_sharing);
  }
  
  static ssize_t max_page_sharing_store(struct kobject *kobj,
  				      struct kobj_attribute *attr,
  				      const char *buf, size_t count)
  {
  	int err;
  	int knob;
  
  	err = kstrtoint(buf, 10, &knob);
  	if (err)
  		return err;
  	/*
  	 * When a KSM page is created it is shared by 2 mappings. This
  	 * being a signed comparison, it implicitly verifies it's not
  	 * negative.
  	 */
  	if (knob < 2)
  		return -EINVAL;
  
  	if (READ_ONCE(ksm_max_page_sharing) == knob)
  		return count;
  
  	mutex_lock(&ksm_thread_mutex);
  	wait_while_offlining();
  	if (ksm_max_page_sharing != knob) {
  		if (ksm_pages_shared || remove_all_stable_nodes())
  			err = -EBUSY;
  		else
  			ksm_max_page_sharing = knob;
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	return err ? err : count;
  }
  KSM_ATTR(max_page_sharing);
b40282603   Hugh Dickins   ksm: rename kerne...
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
  static ssize_t pages_shared_show(struct kobject *kobj,
  				 struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_shared);
  }
  KSM_ATTR_RO(pages_shared);
  
  static ssize_t pages_sharing_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
e178dfde3   Hugh Dickins   ksm: move pages_s...
2887
2888
  	return sprintf(buf, "%lu
  ", ksm_pages_sharing);
b40282603   Hugh Dickins   ksm: rename kerne...
2889
2890
  }
  KSM_ATTR_RO(pages_sharing);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
  static ssize_t pages_unshared_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_unshared);
  }
  KSM_ATTR_RO(pages_unshared);
  
  static ssize_t pages_volatile_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	long ksm_pages_volatile;
  
  	ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
  				- ksm_pages_sharing - ksm_pages_unshared;
  	/*
  	 * It was not worth any locking to calculate that statistic,
  	 * but it might therefore sometimes be negative: conceal that.
  	 */
  	if (ksm_pages_volatile < 0)
  		ksm_pages_volatile = 0;
  	return sprintf(buf, "%ld
  ", ksm_pages_volatile);
  }
  KSM_ATTR_RO(pages_volatile);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
  static ssize_t stable_node_dups_show(struct kobject *kobj,
  				     struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_stable_node_dups);
  }
  KSM_ATTR_RO(stable_node_dups);
  
  static ssize_t stable_node_chains_show(struct kobject *kobj,
  				       struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_stable_node_chains);
  }
  KSM_ATTR_RO(stable_node_chains);
  
  static ssize_t
  stable_node_chains_prune_millisecs_show(struct kobject *kobj,
  					struct kobj_attribute *attr,
  					char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_stable_node_chains_prune_millisecs);
  }
  
  static ssize_t
  stable_node_chains_prune_millisecs_store(struct kobject *kobj,
  					 struct kobj_attribute *attr,
  					 const char *buf, size_t count)
  {
  	unsigned long msecs;
  	int err;
  
  	err = kstrtoul(buf, 10, &msecs);
  	if (err || msecs > UINT_MAX)
  		return -EINVAL;
  
  	ksm_stable_node_chains_prune_millisecs = msecs;
  
  	return count;
  }
  KSM_ATTR(stable_node_chains_prune_millisecs);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2958
2959
2960
2961
2962
2963
2964
  static ssize_t full_scans_show(struct kobject *kobj,
  			       struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_scan.seqnr);
  }
  KSM_ATTR_RO(full_scans);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2965
2966
2967
2968
  static struct attribute *ksm_attrs[] = {
  	&sleep_millisecs_attr.attr,
  	&pages_to_scan_attr.attr,
  	&run_attr.attr,
b40282603   Hugh Dickins   ksm: rename kerne...
2969
2970
  	&pages_shared_attr.attr,
  	&pages_sharing_attr.attr,
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2971
2972
2973
  	&pages_unshared_attr.attr,
  	&pages_volatile_attr.attr,
  	&full_scans_attr.attr,
90bd6fd31   Petr Holasek   ksm: allow trees ...
2974
2975
2976
  #ifdef CONFIG_NUMA
  	&merge_across_nodes_attr.attr,
  #endif
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2977
2978
2979
2980
  	&max_page_sharing_attr.attr,
  	&stable_node_chains_attr.attr,
  	&stable_node_dups_attr.attr,
  	&stable_node_chains_prune_millisecs_attr.attr,
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2981
  	&use_zero_pages_attr.attr,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2982
2983
  	NULL,
  };
f907c26a9   Arvind Yadav   mm/ksm.c: constif...
2984
  static const struct attribute_group ksm_attr_group = {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2985
2986
2987
  	.attrs = ksm_attrs,
  	.name = "ksm",
  };
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2988
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2989
2990
2991
2992
2993
  
  static int __init ksm_init(void)
  {
  	struct task_struct *ksm_thread;
  	int err;
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2994
2995
2996
2997
  	/* The correct value depends on page size and endianness */
  	zero_checksum = calc_checksum(ZERO_PAGE(0));
  	/* Default to false for backwards compatibility */
  	ksm_use_zero_pages = false;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2998
2999
3000
  	err = ksm_slab_init();
  	if (err)
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3001
3002
  	ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
  	if (IS_ERR(ksm_thread)) {
25acde317   Paul McQuade   mm: ksm use pr_er...
3003
3004
  		pr_err("ksm: creating kthread failed
  ");
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3005
  		err = PTR_ERR(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
3006
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3007
  	}
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3008
  #ifdef CONFIG_SYSFS
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3009
3010
  	err = sysfs_create_group(mm_kobj, &ksm_attr_group);
  	if (err) {
25acde317   Paul McQuade   mm: ksm use pr_er...
3011
3012
  		pr_err("ksm: register sysfs failed
  ");
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3013
  		kthread_stop(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
3014
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3015
  	}
c73602ad3   Hugh Dickins   ksm: more on defa...
3016
3017
  #else
  	ksm_run = KSM_RUN_MERGE;	/* no way for user to start it */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3018
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3019

62b61f611   Hugh Dickins   ksm: memory hotre...
3020
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
3021
  	/* There is no significance to this priority 100 */
62b61f611   Hugh Dickins   ksm: memory hotre...
3022
3023
  	hotplug_memory_notifier(ksm_memory_callback, 100);
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3024
  	return 0;
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
3025
  out_free:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3026
3027
3028
  	ksm_slab_free();
  out:
  	return err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
3029
  }
a64fb3cd6   Paul Gortmaker   mm: audit/fix non...
3030
  subsys_initcall(ksm_init);