Blame view

mm/ksm.c 88.2 KB
f8af4da3b   Hugh Dickins   ksm: the mm inter...
1
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2
3
4
5
6
   * Memory merging support.
   *
   * This code enables dynamic sharing of identical pages found in different
   * memory areas, even if they are not shared by fork()
   *
36b2528dc   Izik Eidus   ksm: change copyr...
7
   * Copyright (C) 2008-2009 Red Hat, Inc.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
8
9
10
11
   * Authors:
   *	Izik Eidus
   *	Andrea Arcangeli
   *	Chris Wright
36b2528dc   Izik Eidus   ksm: change copyr...
12
   *	Hugh Dickins
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
13
14
   *
   * This work is licensed under the terms of the GNU GPL, version 2.
f8af4da3b   Hugh Dickins   ksm: the mm inter...
15
16
17
   */
  
  #include <linux/errno.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
18
19
  #include <linux/mm.h>
  #include <linux/fs.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
20
  #include <linux/mman.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
21
  #include <linux/sched.h>
6e84f3152   Ingo Molnar   sched/headers: Pr...
22
  #include <linux/sched/mm.h>
f7ccbae45   Ingo Molnar   sched/headers: Pr...
23
  #include <linux/sched/coredump.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
24
25
26
27
28
29
30
31
32
33
  #include <linux/rwsem.h>
  #include <linux/pagemap.h>
  #include <linux/rmap.h>
  #include <linux/spinlock.h>
  #include <linux/jhash.h>
  #include <linux/delay.h>
  #include <linux/kthread.h>
  #include <linux/wait.h>
  #include <linux/slab.h>
  #include <linux/rbtree.h>
62b61f611   Hugh Dickins   ksm: memory hotre...
34
  #include <linux/memory.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
35
  #include <linux/mmu_notifier.h>
2c6854fda   Izik Eidus   ksm: change defau...
36
  #include <linux/swap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
37
  #include <linux/ksm.h>
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
38
  #include <linux/hashtable.h>
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
39
  #include <linux/freezer.h>
72788c385   David Rientjes   oom: replace PF_O...
40
  #include <linux/oom.h>
90bd6fd31   Petr Holasek   ksm: allow trees ...
41
  #include <linux/numa.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
42

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
43
  #include <asm/tlbflush.h>
73848b468   Hugh Dickins   ksm: fix mlockfre...
44
  #include "internal.h"
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
45

e850dcf53   Hugh Dickins   ksm: trivial tidyups
46
47
48
49
50
51
52
  #ifdef CONFIG_NUMA
  #define NUMA(x)		(x)
  #define DO_NUMA(x)	do { (x); } while (0)
  #else
  #define NUMA(x)		(0)
  #define DO_NUMA(x)	do { } while (0)
  #endif
5a2ca3efe   Mike Rapoport   mm/ksm: docs: ext...
53
54
55
  /**
   * DOC: Overview
   *
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
   * A few notes about the KSM scanning process,
   * to make it easier to understand the data structures below:
   *
   * In order to reduce excessive scanning, KSM sorts the memory pages by their
   * contents into a data structure that holds pointers to the pages' locations.
   *
   * Since the contents of the pages may change at any moment, KSM cannot just
   * insert the pages into a normal sorted tree and expect it to find anything.
   * Therefore KSM uses two data structures - the stable and the unstable tree.
   *
   * The stable tree holds pointers to all the merged pages (ksm pages), sorted
   * by their contents.  Because each such page is write-protected, searching on
   * this tree is fully assured to be working (except when pages are unmapped),
   * and therefore this tree is called the stable tree.
   *
5a2ca3efe   Mike Rapoport   mm/ksm: docs: ext...
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
   * The stable tree node includes information required for reverse
   * mapping from a KSM page to virtual addresses that map this page.
   *
   * In order to avoid large latencies of the rmap walks on KSM pages,
   * KSM maintains two types of nodes in the stable tree:
   *
   * * the regular nodes that keep the reverse mapping structures in a
   *   linked list
   * * the "chains" that link nodes ("dups") that represent the same
   *   write protected memory content, but each "dup" corresponds to a
   *   different KSM page copy of that content
   *
   * Internally, the regular nodes, "dups" and "chains" are represented
   * using the same :c:type:`struct stable_node` structure.
   *
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
   * In addition to the stable tree, KSM uses a second data structure called the
   * unstable tree: this tree holds pointers to pages which have been found to
   * be "unchanged for a period of time".  The unstable tree sorts these pages
   * by their contents, but since they are not write-protected, KSM cannot rely
   * upon the unstable tree to work correctly - the unstable tree is liable to
   * be corrupted as its contents are modified, and so it is called unstable.
   *
   * KSM solves this problem by several techniques:
   *
   * 1) The unstable tree is flushed every time KSM completes scanning all
   *    memory areas, and then the tree is rebuilt again from the beginning.
   * 2) KSM will only insert into the unstable tree, pages whose hash value
   *    has not changed since the previous scan of all memory areas.
   * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
   *    colors of the nodes and not on their contents, assuring that even when
   *    the tree gets "corrupted" it won't get out of balance, so scanning time
   *    remains the same (also, searching and inserting nodes in an rbtree uses
   *    the same algorithm, so we have no overhead when we flush and rebuild).
   * 4) KSM never flushes the stable tree, which means that even if it were to
   *    take 10 attempts to find a page in the unstable tree, once it is found,
   *    it is secured in the stable tree.  (When we scan a new page, we first
   *    compare it against the stable tree, and then against the unstable tree.)
8fdb3dbf0   Hugh Dickins   ksm: add some com...
108
109
110
   *
   * If the merge_across_nodes tunable is unset, then KSM maintains multiple
   * stable trees and multiple unstable trees: one of each for each NUMA node.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
111
112
113
114
115
116
   */
  
  /**
   * struct mm_slot - ksm information per mm that is being scanned
   * @link: link to the mm_slots hash list
   * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
6514d511d   Hugh Dickins   ksm: singly-linke...
117
   * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
118
119
120
121
122
   * @mm: the mm that this information is valid for
   */
  struct mm_slot {
  	struct hlist_node link;
  	struct list_head mm_list;
6514d511d   Hugh Dickins   ksm: singly-linke...
123
  	struct rmap_item *rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
124
125
126
127
128
129
130
  	struct mm_struct *mm;
  };
  
  /**
   * struct ksm_scan - cursor for scanning
   * @mm_slot: the current mm_slot we are scanning
   * @address: the next address inside that to be scanned
6514d511d   Hugh Dickins   ksm: singly-linke...
131
   * @rmap_list: link to the next rmap to be scanned in the rmap_list
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
132
133
134
135
136
137
138
   * @seqnr: count of completed full scans (needed when removing unstable node)
   *
   * There is only the one ksm_scan instance of this cursor structure.
   */
  struct ksm_scan {
  	struct mm_slot *mm_slot;
  	unsigned long address;
6514d511d   Hugh Dickins   ksm: singly-linke...
139
  	struct rmap_item **rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
140
141
142
143
  	unsigned long seqnr;
  };
  
  /**
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
144
145
   * struct stable_node - node of the stable rbtree
   * @node: rb node of this ksm page in the stable tree
4146d2d67   Hugh Dickins   ksm: make !merge_...
146
   * @head: (overlaying parent) &migrate_nodes indicates temporarily on that list
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
147
   * @hlist_dup: linked into the stable_node->hlist with a stable_node chain
4146d2d67   Hugh Dickins   ksm: make !merge_...
148
   * @list: linked into migrate_nodes, pending placement in the proper node tree
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
149
   * @hlist: hlist head of rmap_items using this ksm page
4146d2d67   Hugh Dickins   ksm: make !merge_...
150
   * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
151
152
   * @chain_prune_time: time of the last full garbage collection
   * @rmap_hlist_len: number of rmap_item entries in hlist or STABLE_NODE_CHAIN
4146d2d67   Hugh Dickins   ksm: make !merge_...
153
   * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
154
155
   */
  struct stable_node {
4146d2d67   Hugh Dickins   ksm: make !merge_...
156
157
158
159
  	union {
  		struct rb_node node;	/* when node of stable tree */
  		struct {		/* when listed for migration */
  			struct list_head *head;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
160
161
162
163
  			struct {
  				struct hlist_node hlist_dup;
  				struct list_head list;
  			};
4146d2d67   Hugh Dickins   ksm: make !merge_...
164
165
  		};
  	};
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
166
  	struct hlist_head hlist;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
167
168
169
170
171
172
173
174
175
176
177
  	union {
  		unsigned long kpfn;
  		unsigned long chain_prune_time;
  	};
  	/*
  	 * STABLE_NODE_CHAIN can be any negative number in
  	 * rmap_hlist_len negative range, but better not -1 to be able
  	 * to reliably detect underflows.
  	 */
  #define STABLE_NODE_CHAIN -1024
  	int rmap_hlist_len;
4146d2d67   Hugh Dickins   ksm: make !merge_...
178
179
180
  #ifdef CONFIG_NUMA
  	int nid;
  #endif
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
181
182
183
  };
  
  /**
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
184
   * struct rmap_item - reverse mapping item for virtual addresses
6514d511d   Hugh Dickins   ksm: singly-linke...
185
   * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
db114b83a   Hugh Dickins   ksm: hold anon_vm...
186
   * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
187
   * @nid: NUMA node id of unstable tree in which linked (may not match page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
188
189
190
   * @mm: the memory structure this rmap_item is pointing into
   * @address: the virtual address this rmap_item tracks (+ flags in low bits)
   * @oldchecksum: previous checksum of the page at that virtual address
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
191
192
193
   * @node: rb node of this rmap_item in the unstable tree
   * @head: pointer to stable_node heading this list in the stable tree
   * @hlist: link into hlist of rmap_items hanging off that stable_node
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
194
195
   */
  struct rmap_item {
6514d511d   Hugh Dickins   ksm: singly-linke...
196
  	struct rmap_item *rmap_list;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
197
198
199
200
201
202
  	union {
  		struct anon_vma *anon_vma;	/* when stable */
  #ifdef CONFIG_NUMA
  		int nid;		/* when node of unstable tree */
  #endif
  	};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
203
204
  	struct mm_struct *mm;
  	unsigned long address;		/* + low bits used for flags below */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
205
  	unsigned int oldchecksum;	/* when unstable */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
206
  	union {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
207
208
209
210
211
  		struct rb_node node;	/* when node of unstable tree */
  		struct {		/* when listed from stable tree */
  			struct stable_node *head;
  			struct hlist_node hlist;
  		};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
212
213
214
215
  	};
  };
  
  #define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
216
217
  #define UNSTABLE_FLAG	0x100	/* is a node of the unstable tree */
  #define STABLE_FLAG	0x200	/* is listed from the stable tree */
1105a2fc0   Jia He   mm/ksm.c: ignore ...
218
219
  #define KSM_FLAG_MASK	(SEQNR_MASK|UNSTABLE_FLAG|STABLE_FLAG)
  				/* to mask all the flags */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
220
221
  
  /* The stable and unstable tree heads */
ef53d16cd   Hugh Dickins   ksm: allocate roo...
222
223
224
225
  static struct rb_root one_stable_tree[1] = { RB_ROOT };
  static struct rb_root one_unstable_tree[1] = { RB_ROOT };
  static struct rb_root *root_stable_tree = one_stable_tree;
  static struct rb_root *root_unstable_tree = one_unstable_tree;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
226

4146d2d67   Hugh Dickins   ksm: make !merge_...
227
228
  /* Recently migrated nodes of stable tree, pending proper placement */
  static LIST_HEAD(migrate_nodes);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
229
  #define STABLE_NODE_DUP_HEAD ((struct list_head *)&migrate_nodes.prev)
4146d2d67   Hugh Dickins   ksm: make !merge_...
230

4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
231
232
  #define MM_SLOTS_HASH_BITS 10
  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
233
234
235
236
237
238
239
240
241
  
  static struct mm_slot ksm_mm_head = {
  	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
  };
  static struct ksm_scan ksm_scan = {
  	.mm_slot = &ksm_mm_head,
  };
  
  static struct kmem_cache *rmap_item_cache;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
242
  static struct kmem_cache *stable_node_cache;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
243
244
245
  static struct kmem_cache *mm_slot_cache;
  
  /* The number of nodes in the stable tree */
b40282603   Hugh Dickins   ksm: rename kerne...
246
  static unsigned long ksm_pages_shared;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
247

e178dfde3   Hugh Dickins   ksm: move pages_s...
248
  /* The number of page slots additionally sharing those nodes */
b40282603   Hugh Dickins   ksm: rename kerne...
249
  static unsigned long ksm_pages_sharing;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
250

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
251
252
253
254
255
  /* The number of nodes in the unstable tree */
  static unsigned long ksm_pages_unshared;
  
  /* The number of rmap_items in use: to calculate pages_volatile */
  static unsigned long ksm_rmap_items;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
256
257
258
259
260
261
262
263
264
265
266
  /* The number of stable_node chains */
  static unsigned long ksm_stable_node_chains;
  
  /* The number of stable_node dups linked to the stable_node chains */
  static unsigned long ksm_stable_node_dups;
  
  /* Delay in pruning stale stable_node_dups in the stable_node_chains */
  static int ksm_stable_node_chains_prune_millisecs = 2000;
  
  /* Maximum number of page slots sharing a stable node */
  static int ksm_max_page_sharing = 256;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
267
  /* Number of pages ksmd should scan in one batch */
2c6854fda   Izik Eidus   ksm: change defau...
268
  static unsigned int ksm_thread_pages_to_scan = 100;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
269
270
  
  /* Milliseconds ksmd should sleep between batches */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
271
  static unsigned int ksm_thread_sleep_millisecs = 20;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
272

e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
273
274
275
276
277
  /* Checksum of an empty (zeroed) page */
  static unsigned int zero_checksum __read_mostly;
  
  /* Whether to merge empty (zeroed) pages with actual zero pages */
  static bool ksm_use_zero_pages __read_mostly;
e850dcf53   Hugh Dickins   ksm: trivial tidyups
278
  #ifdef CONFIG_NUMA
90bd6fd31   Petr Holasek   ksm: allow trees ...
279
280
  /* Zeroed when merging across nodes is not allowed */
  static unsigned int ksm_merge_across_nodes = 1;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
281
  static int ksm_nr_node_ids = 1;
e850dcf53   Hugh Dickins   ksm: trivial tidyups
282
283
  #else
  #define ksm_merge_across_nodes	1U
ef53d16cd   Hugh Dickins   ksm: allocate roo...
284
  #define ksm_nr_node_ids		1
e850dcf53   Hugh Dickins   ksm: trivial tidyups
285
  #endif
90bd6fd31   Petr Holasek   ksm: allow trees ...
286

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
287
288
289
  #define KSM_RUN_STOP	0
  #define KSM_RUN_MERGE	1
  #define KSM_RUN_UNMERGE	2
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
290
291
292
  #define KSM_RUN_OFFLINE	4
  static unsigned long ksm_run = KSM_RUN_STOP;
  static void wait_while_offlining(void);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
293
294
295
296
297
298
299
300
301
302
303
304
305
306
  
  static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
  static DEFINE_MUTEX(ksm_thread_mutex);
  static DEFINE_SPINLOCK(ksm_mmlist_lock);
  
  #define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
  		sizeof(struct __struct), __alignof__(struct __struct),\
  		(__flags), NULL)
  
  static int __init ksm_slab_init(void)
  {
  	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
  	if (!rmap_item_cache)
  		goto out;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
307
308
309
  	stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
  	if (!stable_node_cache)
  		goto out_free1;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
310
311
  	mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
  	if (!mm_slot_cache)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
312
  		goto out_free2;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
313
314
  
  	return 0;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
315
316
317
  out_free2:
  	kmem_cache_destroy(stable_node_cache);
  out_free1:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
318
319
320
321
322
323
324
325
  	kmem_cache_destroy(rmap_item_cache);
  out:
  	return -ENOMEM;
  }
  
  static void __init ksm_slab_free(void)
  {
  	kmem_cache_destroy(mm_slot_cache);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
326
  	kmem_cache_destroy(stable_node_cache);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
327
328
329
  	kmem_cache_destroy(rmap_item_cache);
  	mm_slot_cache = NULL;
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
  static __always_inline bool is_stable_node_chain(struct stable_node *chain)
  {
  	return chain->rmap_hlist_len == STABLE_NODE_CHAIN;
  }
  
  static __always_inline bool is_stable_node_dup(struct stable_node *dup)
  {
  	return dup->head == STABLE_NODE_DUP_HEAD;
  }
  
  static inline void stable_node_chain_add_dup(struct stable_node *dup,
  					     struct stable_node *chain)
  {
  	VM_BUG_ON(is_stable_node_dup(dup));
  	dup->head = STABLE_NODE_DUP_HEAD;
  	VM_BUG_ON(!is_stable_node_chain(chain));
  	hlist_add_head(&dup->hlist_dup, &chain->hlist);
  	ksm_stable_node_dups++;
  }
  
  static inline void __stable_node_dup_del(struct stable_node *dup)
  {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
352
  	VM_BUG_ON(!is_stable_node_dup(dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
  	hlist_del(&dup->hlist_dup);
  	ksm_stable_node_dups--;
  }
  
  static inline void stable_node_dup_del(struct stable_node *dup)
  {
  	VM_BUG_ON(is_stable_node_chain(dup));
  	if (is_stable_node_dup(dup))
  		__stable_node_dup_del(dup);
  	else
  		rb_erase(&dup->node, root_stable_tree + NUMA(dup->nid));
  #ifdef CONFIG_DEBUG_VM
  	dup->head = NULL;
  #endif
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
368
369
  static inline struct rmap_item *alloc_rmap_item(void)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
370
  	struct rmap_item *rmap_item;
5b398e416   zhong jiang   mm,ksm: fix endle...
371
372
  	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
  						__GFP_NORETRY | __GFP_NOWARN);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
373
374
375
  	if (rmap_item)
  		ksm_rmap_items++;
  	return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
376
377
378
379
  }
  
  static inline void free_rmap_item(struct rmap_item *rmap_item)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
380
  	ksm_rmap_items--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
381
382
383
  	rmap_item->mm = NULL;	/* debug safety */
  	kmem_cache_free(rmap_item_cache, rmap_item);
  }
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
384
385
  static inline struct stable_node *alloc_stable_node(void)
  {
6213055f2   zhong jiang   mm,ksm: add __GFP...
386
387
388
389
390
391
  	/*
  	 * The allocation can take too long with GFP_KERNEL when memory is under
  	 * pressure, which may lead to hung task warnings.  Adding __GFP_HIGH
  	 * grants access to memory reserves, helping to avoid this problem.
  	 */
  	return kmem_cache_alloc(stable_node_cache, GFP_KERNEL | __GFP_HIGH);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
392
393
394
395
  }
  
  static inline void free_stable_node(struct stable_node *stable_node)
  {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
396
397
  	VM_BUG_ON(stable_node->rmap_hlist_len &&
  		  !is_stable_node_chain(stable_node));
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
398
399
  	kmem_cache_free(stable_node_cache, stable_node);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
400
401
402
403
404
405
406
407
408
409
410
  static inline struct mm_slot *alloc_mm_slot(void)
  {
  	if (!mm_slot_cache)	/* initialization failed */
  		return NULL;
  	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
  }
  
  static inline void free_mm_slot(struct mm_slot *mm_slot)
  {
  	kmem_cache_free(mm_slot_cache, mm_slot);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
411
412
  static struct mm_slot *get_mm_slot(struct mm_struct *mm)
  {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
413
  	struct mm_slot *slot;
b67bfe0d4   Sasha Levin   hlist: drop the n...
414
  	hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm)
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
415
416
  		if (slot->mm == mm)
  			return slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
417

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
418
419
420
421
422
423
  	return NULL;
  }
  
  static void insert_to_mm_slots_hash(struct mm_struct *mm,
  				    struct mm_slot *mm_slot)
  {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
424
  	mm_slot->mm = mm;
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
425
  	hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
426
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
427
  /*
a913e182a   Hugh Dickins   ksm: clean up obs...
428
429
430
431
432
433
434
435
436
437
438
439
440
   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
   * page tables after it has passed through ksm_exit() - which, if necessary,
   * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
   * a special flag: they can just back out as soon as mm_users goes to zero.
   * ksm_test_exit() is used throughout to make this test for exit: in some
   * places for correctness, in some places just to avoid unnecessary work.
   */
  static inline bool ksm_test_exit(struct mm_struct *mm)
  {
  	return atomic_read(&mm->mm_users) == 0;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
441
442
   * We use break_ksm to break COW on a ksm page: it's a stripped down
   *
d4edcf0d5   Dave Hansen   mm/gup: Switch al...
443
   *	if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
444
445
446
447
448
449
   *		put_page(page);
   *
   * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
   * in case the application has unmapped and remapped mm,addr meanwhile.
   * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
   * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
1b2ee1266   Dave Hansen   mm/core: Do not e...
450
451
452
453
   *
   * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context
   * of the process that owns 'vma'.  We also do not want to enforce
   * protection keys here anyway.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
454
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
455
  static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
456
457
  {
  	struct page *page;
50a7ca3c6   Souptick Joarder   mm: convert retur...
458
  	vm_fault_t ret = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
459
460
461
  
  	do {
  		cond_resched();
1b2ee1266   Dave Hansen   mm/core: Do not e...
462
463
  		page = follow_page(vma, addr,
  				FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
22eccdd7d   Dan Carpenter   ksm: check for ER...
464
  		if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
465
466
  			break;
  		if (PageKsm(page))
dcddffd41   Kirill A. Shutemov   mm: do not pass m...
467
468
  			ret = handle_mm_fault(vma, addr,
  					FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
469
470
471
  		else
  			ret = VM_FAULT_WRITE;
  		put_page(page);
33692f275   Linus Torvalds   vm: add VM_FAULT_...
472
  	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
d952b7913   Hugh Dickins   ksm: fix endless ...
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
  	/*
  	 * We must loop because handle_mm_fault() may back out if there's
  	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
  	 *
  	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
  	 * COW has been broken, even if the vma does not permit VM_WRITE;
  	 * but note that a concurrent fault might break PageKsm for us.
  	 *
  	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
  	 * backing file, which also invalidates anonymous pages: that's
  	 * okay, that truncation will have unmapped the PageKsm for us.
  	 *
  	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
  	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
  	 * current task has TIF_MEMDIE set, and will be OOM killed on return
  	 * to user; and ksmd, having no mm, would never be chosen for that.
  	 *
  	 * But if the mm is in a limited mem_cgroup, then the fault may fail
  	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
  	 * even ksmd can fail in this way - though it's usually breaking ksm
  	 * just to undo a merge it made a moment before, so unlikely to oom.
  	 *
  	 * That's a pity: we might therefore have more kernel pages allocated
  	 * than we're counting as nodes in the stable tree; but ksm_do_scan
  	 * will retry to break_cow on each pass, so should recover the page
  	 * in due course.  The important thing is to not let VM_MERGEABLE
  	 * be cleared while any such pages might remain in the area.
  	 */
  	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
502
  }
ef6942224   Bob Liu   ksm: cleanup: int...
503
504
505
506
507
508
509
510
511
512
513
514
515
  static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
  		unsigned long addr)
  {
  	struct vm_area_struct *vma;
  	if (ksm_test_exit(mm))
  		return NULL;
  	vma = find_vma(mm, addr);
  	if (!vma || vma->vm_start > addr)
  		return NULL;
  	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  		return NULL;
  	return vma;
  }
8dd3557a5   Hugh Dickins   ksm: cleanup some...
516
  static void break_cow(struct rmap_item *rmap_item)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
517
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
518
519
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
520
  	struct vm_area_struct *vma;
4035c07a8   Hugh Dickins   ksm: take keyhole...
521
522
523
524
  	/*
  	 * It is not an accident that whenever we want to break COW
  	 * to undo, we also need to drop a reference to the anon_vma.
  	 */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
525
  	put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
526

81464e306   Hugh Dickins   ksm: five little ...
527
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
528
529
530
  	vma = find_mergeable_vma(mm, addr);
  	if (vma)
  		break_ksm(vma, addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
531
532
533
534
535
536
537
538
539
540
541
  	up_read(&mm->mmap_sem);
  }
  
  static struct page *get_mergeable_page(struct rmap_item *rmap_item)
  {
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
  	struct vm_area_struct *vma;
  	struct page *page;
  
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
542
543
  	vma = find_mergeable_vma(mm, addr);
  	if (!vma)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
544
545
546
  		goto out;
  
  	page = follow_page(vma, addr, FOLL_GET);
22eccdd7d   Dan Carpenter   ksm: check for ER...
547
  	if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
548
  		goto out;
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
549
  	if (PageAnon(page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
550
551
552
553
  		flush_anon_page(vma, page, addr);
  		flush_dcache_page(page);
  	} else {
  		put_page(page);
c8f95ed1a   Andrea Arcangeli   ksm: unstable_tre...
554
555
  out:
  		page = NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
556
557
558
559
  	}
  	up_read(&mm->mmap_sem);
  	return page;
  }
90bd6fd31   Petr Holasek   ksm: allow trees ...
560
561
562
563
564
565
566
567
  /*
   * This helper is used for getting right index into array of tree roots.
   * When merge_across_nodes knob is set to 1, there are only two rb-trees for
   * stable and unstable pages from all nodes with roots in index 0. Otherwise,
   * every node has its own stable and unstable tree.
   */
  static inline int get_kpfn_nid(unsigned long kpfn)
  {
d8fc16a82   Hugh Dickins   ksm: fix m68k bui...
568
  	return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn));
90bd6fd31   Petr Holasek   ksm: allow trees ...
569
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
  static struct stable_node *alloc_stable_node_chain(struct stable_node *dup,
  						   struct rb_root *root)
  {
  	struct stable_node *chain = alloc_stable_node();
  	VM_BUG_ON(is_stable_node_chain(dup));
  	if (likely(chain)) {
  		INIT_HLIST_HEAD(&chain->hlist);
  		chain->chain_prune_time = jiffies;
  		chain->rmap_hlist_len = STABLE_NODE_CHAIN;
  #if defined (CONFIG_DEBUG_VM) && defined(CONFIG_NUMA)
  		chain->nid = -1; /* debug */
  #endif
  		ksm_stable_node_chains++;
  
  		/*
  		 * Put the stable node chain in the first dimension of
  		 * the stable tree and at the same time remove the old
  		 * stable node.
  		 */
  		rb_replace_node(&dup->node, &chain->node, root);
  
  		/*
  		 * Move the old stable node to the second dimension
  		 * queued in the hlist_dup. The invariant is that all
  		 * dup stable_nodes in the chain->hlist point to pages
  		 * that are wrprotected and have the exact same
  		 * content.
  		 */
  		stable_node_chain_add_dup(dup, chain);
  	}
  	return chain;
  }
  
  static inline void free_stable_node_chain(struct stable_node *chain,
  					  struct rb_root *root)
  {
  	rb_erase(&chain->node, root);
  	free_stable_node(chain);
  	ksm_stable_node_chains--;
  }
4035c07a8   Hugh Dickins   ksm: take keyhole...
610
611
612
  static void remove_node_from_stable_tree(struct stable_node *stable_node)
  {
  	struct rmap_item *rmap_item;
4035c07a8   Hugh Dickins   ksm: take keyhole...
613

2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
614
615
  	/* check it's not STABLE_NODE_CHAIN or negative */
  	BUG_ON(stable_node->rmap_hlist_len < 0);
b67bfe0d4   Sasha Levin   hlist: drop the n...
616
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
617
618
619
620
  		if (rmap_item->hlist.next)
  			ksm_pages_sharing--;
  		else
  			ksm_pages_shared--;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
621
622
  		VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
  		stable_node->rmap_hlist_len--;
9e60109f1   Peter Zijlstra   mm: rename drop_a...
623
  		put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
624
625
626
  		rmap_item->address &= PAGE_MASK;
  		cond_resched();
  	}
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
627
628
629
630
631
  	/*
  	 * We need the second aligned pointer of the migrate_nodes
  	 * list_head to stay clear from the rb_parent_color union
  	 * (aligned and different than any node) and also different
  	 * from &migrate_nodes. This will verify that future list.h changes
815f0ddb3   Nick Desaulniers   include/linux/com...
632
  	 * don't break STABLE_NODE_DUP_HEAD. Only recent gcc can handle it.
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
633
  	 */
815f0ddb3   Nick Desaulniers   include/linux/com...
634
  #if defined(GCC_VERSION) && GCC_VERSION >= 40903
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
635
636
637
  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
  #endif
4146d2d67   Hugh Dickins   ksm: make !merge_...
638
639
640
  	if (stable_node->head == &migrate_nodes)
  		list_del(&stable_node->list);
  	else
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
641
  		stable_node_dup_del(stable_node);
4035c07a8   Hugh Dickins   ksm: take keyhole...
642
643
644
645
646
647
648
649
650
  	free_stable_node(stable_node);
  }
  
  /*
   * get_ksm_page: checks if the page indicated by the stable node
   * is still its ksm page, despite having held no reference to it.
   * In which case we can trust the content of the page, and it
   * returns the gotten page; but if the page has now been zapped,
   * remove the stale node from the stable tree and return NULL.
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
651
   * But beware, the stable node's page might be being migrated.
4035c07a8   Hugh Dickins   ksm: take keyhole...
652
653
654
655
656
657
658
659
660
661
   *
   * You would expect the stable_node to hold a reference to the ksm page.
   * But if it increments the page's count, swapping out has to wait for
   * ksmd to come around again before it can free the page, which may take
   * seconds or even minutes: much too unresponsive.  So instead we use a
   * "keyhole reference": access to the ksm page from the stable node peeps
   * out through its keyhole to see if that page still holds the right key,
   * pointing back to this stable node.  This relies on freeing a PageAnon
   * page to reset its page->mapping to NULL, and relies on no other use of
   * a page to put something that might look like our key in page->mapping.
4035c07a8   Hugh Dickins   ksm: take keyhole...
662
663
   * is on its way to being freed; but it is an anomaly to bear in mind.
   */
8fdb3dbf0   Hugh Dickins   ksm: add some com...
664
  static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
4035c07a8   Hugh Dickins   ksm: take keyhole...
665
666
667
  {
  	struct page *page;
  	void *expected_mapping;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
668
  	unsigned long kpfn;
4035c07a8   Hugh Dickins   ksm: take keyhole...
669

bda807d44   Minchan Kim   mm: migrate: supp...
670
671
  	expected_mapping = (void *)((unsigned long)stable_node |
  					PAGE_MAPPING_KSM);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
672
  again:
08df47743   Paul E. McKenney   mm/ksm: Remove no...
673
  	kpfn = READ_ONCE(stable_node->kpfn); /* Address dependency. */
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
674
  	page = pfn_to_page(kpfn);
4db0c3c29   Jason Low   mm: remove rest o...
675
  	if (READ_ONCE(page->mapping) != expected_mapping)
4035c07a8   Hugh Dickins   ksm: take keyhole...
676
  		goto stale;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
677
678
679
680
681
  
  	/*
  	 * We cannot do anything with the page while its refcount is 0.
  	 * Usually 0 means free, or tail of a higher-order page: in which
  	 * case this node is no longer referenced, and should be freed;
1c4c3b99c   Jiang Biao   mm: fix page_free...
682
  	 * however, it might mean that the page is under page_ref_freeze().
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
683
684
685
686
687
688
689
690
691
692
  	 * The __remove_mapping() case is easy, again the node is now stale;
  	 * but if page is swapcache in migrate_page_move_mapping(), it might
  	 * still be our page, in which case it's essential to keep the node.
  	 */
  	while (!get_page_unless_zero(page)) {
  		/*
  		 * Another check for page->mapping != expected_mapping would
  		 * work here too.  We have chosen the !PageSwapCache test to
  		 * optimize the common case, when the page is or is about to
  		 * be freed: PageSwapCache is cleared (under spin_lock_irq)
1c4c3b99c   Jiang Biao   mm: fix page_free...
693
  		 * in the ref_freeze section of __remove_mapping(); but Anon
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
694
695
696
697
698
699
  		 * page->mapping reset to NULL later, in free_pages_prepare().
  		 */
  		if (!PageSwapCache(page))
  			goto stale;
  		cpu_relax();
  	}
4db0c3c29   Jason Low   mm: remove rest o...
700
  	if (READ_ONCE(page->mapping) != expected_mapping) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
701
702
703
  		put_page(page);
  		goto stale;
  	}
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
704

8fdb3dbf0   Hugh Dickins   ksm: add some com...
705
  	if (lock_it) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
706
  		lock_page(page);
4db0c3c29   Jason Low   mm: remove rest o...
707
  		if (READ_ONCE(page->mapping) != expected_mapping) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
708
709
710
711
712
  			unlock_page(page);
  			put_page(page);
  			goto stale;
  		}
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
713
  	return page;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
714

4035c07a8   Hugh Dickins   ksm: take keyhole...
715
  stale:
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
716
717
718
719
720
721
722
  	/*
  	 * We come here from above when page->mapping or !PageSwapCache
  	 * suggests that the node is stale; but it might be under migration.
  	 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
  	 * before checking whether node->kpfn has been changed.
  	 */
  	smp_rmb();
4db0c3c29   Jason Low   mm: remove rest o...
723
  	if (READ_ONCE(stable_node->kpfn) != kpfn)
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
724
  		goto again;
4035c07a8   Hugh Dickins   ksm: take keyhole...
725
726
727
  	remove_node_from_stable_tree(stable_node);
  	return NULL;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
728
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
729
730
731
732
733
   * Removing rmap_item from stable or unstable tree.
   * This function will clean the information from the stable/unstable tree.
   */
  static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
  {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
734
735
  	if (rmap_item->address & STABLE_FLAG) {
  		struct stable_node *stable_node;
5ad646880   Hugh Dickins   ksm: let shared p...
736
  		struct page *page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
737

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
738
  		stable_node = rmap_item->head;
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
739
  		page = get_ksm_page(stable_node, true);
4035c07a8   Hugh Dickins   ksm: take keyhole...
740
741
  		if (!page)
  			goto out;
5ad646880   Hugh Dickins   ksm: let shared p...
742

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
743
  		hlist_del(&rmap_item->hlist);
4035c07a8   Hugh Dickins   ksm: take keyhole...
744
745
  		unlock_page(page);
  		put_page(page);
08beca44d   Hugh Dickins   ksm: stable_node ...
746

98666f8a2   Andrea Arcangeli   ksm: use the help...
747
  		if (!hlist_empty(&stable_node->hlist))
4035c07a8   Hugh Dickins   ksm: take keyhole...
748
749
  			ksm_pages_sharing--;
  		else
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
750
  			ksm_pages_shared--;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
751
752
  		VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
  		stable_node->rmap_hlist_len--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
753

9e60109f1   Peter Zijlstra   mm: rename drop_a...
754
  		put_anon_vma(rmap_item->anon_vma);
93d17715a   Hugh Dickins   ksm: three remove...
755
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
756

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
757
  	} else if (rmap_item->address & UNSTABLE_FLAG) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
758
759
  		unsigned char age;
  		/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
760
  		 * Usually ksmd can and must skip the rb_erase, because
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
761
  		 * root_unstable_tree was already reset to RB_ROOT.
9ba692948   Hugh Dickins   ksm: fix oom dead...
762
763
764
  		 * But be careful when an mm is exiting: do the rb_erase
  		 * if this rmap_item was inserted by this scan, rather
  		 * than left over from before.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
765
766
  		 */
  		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
cd551f975   Hugh Dickins   ksm: distribute r...
767
  		BUG_ON(age > 1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
768
  		if (!age)
90bd6fd31   Petr Holasek   ksm: allow trees ...
769
  			rb_erase(&rmap_item->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
770
  				 root_unstable_tree + NUMA(rmap_item->nid));
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
771
  		ksm_pages_unshared--;
93d17715a   Hugh Dickins   ksm: three remove...
772
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
773
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
774
  out:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
775
776
  	cond_resched();		/* we're called from many long loops */
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
777
  static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
778
  				       struct rmap_item **rmap_list)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
779
  {
6514d511d   Hugh Dickins   ksm: singly-linke...
780
781
782
  	while (*rmap_list) {
  		struct rmap_item *rmap_item = *rmap_list;
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
783
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
784
785
786
787
788
  		free_rmap_item(rmap_item);
  	}
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
789
   * Though it's very tempting to unmerge rmap_items from stable tree rather
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
790
791
792
793
794
   * than check every pte of a given vma, the locking doesn't quite work for
   * that - an rmap_item is assigned to the stable tree after inserting ksm
   * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
   * rmap_items from parent to child at fork time (so as not to waste time
   * if exit comes before the next scan reaches it).
81464e306   Hugh Dickins   ksm: five little ...
795
796
797
798
799
   *
   * Similarly, although we'd like to remove rmap_items (so updating counts
   * and freeing memory) when unmerging an area, it's easier to leave that
   * to the next pass of ksmd - consider, for example, how ksmd might be
   * in cmp_and_merge_page on one of the rmap_items we would be removing.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
800
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
801
802
  static int unmerge_ksm_pages(struct vm_area_struct *vma,
  			     unsigned long start, unsigned long end)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
803
804
  {
  	unsigned long addr;
d952b7913   Hugh Dickins   ksm: fix endless ...
805
  	int err = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
806

d952b7913   Hugh Dickins   ksm: fix endless ...
807
  	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
808
809
  		if (ksm_test_exit(vma->vm_mm))
  			break;
d952b7913   Hugh Dickins   ksm: fix endless ...
810
811
812
813
814
815
  		if (signal_pending(current))
  			err = -ERESTARTSYS;
  		else
  			err = break_ksm(vma, addr);
  	}
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
816
  }
88484826b   Mike Rapoport   mm/ksm: move [set...
817
818
819
820
821
822
823
824
825
826
  static inline struct stable_node *page_stable_node(struct page *page)
  {
  	return PageKsm(page) ? page_rmapping(page) : NULL;
  }
  
  static inline void set_page_stable_node(struct page *page,
  					struct stable_node *stable_node)
  {
  	page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
827
828
829
830
  #ifdef CONFIG_SYSFS
  /*
   * Only called through the sysfs control interface:
   */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
831
832
833
834
835
836
837
838
839
840
841
842
  static int remove_stable_node(struct stable_node *stable_node)
  {
  	struct page *page;
  	int err;
  
  	page = get_ksm_page(stable_node, true);
  	if (!page) {
  		/*
  		 * get_ksm_page did remove_node_from_stable_tree itself.
  		 */
  		return 0;
  	}
8fdb3dbf0   Hugh Dickins   ksm: add some com...
843
844
845
846
847
  	if (WARN_ON_ONCE(page_mapped(page))) {
  		/*
  		 * This should not happen: but if it does, just refuse to let
  		 * merge_across_nodes be switched - there is no need to panic.
  		 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
848
  		err = -EBUSY;
8fdb3dbf0   Hugh Dickins   ksm: add some com...
849
  	} else {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
850
  		/*
8fdb3dbf0   Hugh Dickins   ksm: add some com...
851
852
853
  		 * The stable node did not yet appear stale to get_ksm_page(),
  		 * since that allows for an unmapped ksm page to be recognized
  		 * right up until it is freed; but the node is safe to remove.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
854
855
856
857
858
859
860
861
862
863
864
865
866
  		 * This page might be in a pagevec waiting to be freed,
  		 * or it might be PageSwapCache (perhaps under writeback),
  		 * or it might have been removed from swapcache a moment ago.
  		 */
  		set_page_stable_node(page, NULL);
  		remove_node_from_stable_tree(stable_node);
  		err = 0;
  	}
  
  	unlock_page(page);
  	put_page(page);
  	return err;
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
  static int remove_stable_node_chain(struct stable_node *stable_node,
  				    struct rb_root *root)
  {
  	struct stable_node *dup;
  	struct hlist_node *hlist_safe;
  
  	if (!is_stable_node_chain(stable_node)) {
  		VM_BUG_ON(is_stable_node_dup(stable_node));
  		if (remove_stable_node(stable_node))
  			return true;
  		else
  			return false;
  	}
  
  	hlist_for_each_entry_safe(dup, hlist_safe,
  				  &stable_node->hlist, hlist_dup) {
  		VM_BUG_ON(!is_stable_node_dup(dup));
  		if (remove_stable_node(dup))
  			return true;
  	}
  	BUG_ON(!hlist_empty(&stable_node->hlist));
  	free_stable_node_chain(stable_node, root);
  	return false;
  }
cbf86cfe0   Hugh Dickins   ksm: remove old s...
891
892
  static int remove_all_stable_nodes(void)
  {
036404183   Geliang Tang   mm/ksm.c: use lis...
893
  	struct stable_node *stable_node, *next;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
894
895
  	int nid;
  	int err = 0;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
896
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
897
898
899
  		while (root_stable_tree[nid].rb_node) {
  			stable_node = rb_entry(root_stable_tree[nid].rb_node,
  						struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
900
901
  			if (remove_stable_node_chain(stable_node,
  						     root_stable_tree + nid)) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
902
903
904
905
906
907
  				err = -EBUSY;
  				break;	/* proceed to next nid */
  			}
  			cond_resched();
  		}
  	}
036404183   Geliang Tang   mm/ksm.c: use lis...
908
  	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
909
910
911
912
  		if (remove_stable_node(stable_node))
  			err = -EBUSY;
  		cond_resched();
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
913
914
  	return err;
  }
d952b7913   Hugh Dickins   ksm: fix endless ...
915
  static int unmerge_and_remove_all_rmap_items(void)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
916
917
918
919
  {
  	struct mm_slot *mm_slot;
  	struct mm_struct *mm;
  	struct vm_area_struct *vma;
d952b7913   Hugh Dickins   ksm: fix endless ...
920
921
922
  	int err = 0;
  
  	spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
923
  	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
924
925
  						struct mm_slot, mm_list);
  	spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
926

9ba692948   Hugh Dickins   ksm: fix oom dead...
927
928
  	for (mm_slot = ksm_scan.mm_slot;
  			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
929
930
931
  		mm = mm_slot->mm;
  		down_read(&mm->mmap_sem);
  		for (vma = mm->mmap; vma; vma = vma->vm_next) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
932
933
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
934
935
  			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  				continue;
d952b7913   Hugh Dickins   ksm: fix endless ...
936
937
  			err = unmerge_ksm_pages(vma,
  						vma->vm_start, vma->vm_end);
9ba692948   Hugh Dickins   ksm: fix oom dead...
938
939
  			if (err)
  				goto error;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
940
  		}
9ba692948   Hugh Dickins   ksm: fix oom dead...
941

6514d511d   Hugh Dickins   ksm: singly-linke...
942
  		remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
7496fea9a   Zhou Chengming   ksm: fix conflict...
943
  		up_read(&mm->mmap_sem);
d952b7913   Hugh Dickins   ksm: fix endless ...
944
945
  
  		spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
946
  		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
947
  						struct mm_slot, mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
948
  		if (ksm_test_exit(mm)) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
949
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
950
951
952
953
954
  			list_del(&mm_slot->mm_list);
  			spin_unlock(&ksm_mmlist_lock);
  
  			free_mm_slot(mm_slot);
  			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
955
  			mmdrop(mm);
7496fea9a   Zhou Chengming   ksm: fix conflict...
956
  		} else
9ba692948   Hugh Dickins   ksm: fix oom dead...
957
  			spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
958
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
959
960
  	/* Clean up stable nodes, but don't worry if some are still busy */
  	remove_all_stable_nodes();
d952b7913   Hugh Dickins   ksm: fix endless ...
961
  	ksm_scan.seqnr = 0;
9ba692948   Hugh Dickins   ksm: fix oom dead...
962
963
964
965
  	return 0;
  
  error:
  	up_read(&mm->mmap_sem);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
966
  	spin_lock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
967
  	ksm_scan.mm_slot = &ksm_mm_head;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
968
  	spin_unlock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
969
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
970
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
971
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
972

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
973
974
975
  static u32 calc_checksum(struct page *page)
  {
  	u32 checksum;
9b04c5fec   Cong Wang   mm: remove the se...
976
  	void *addr = kmap_atomic(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
977
  	checksum = jhash2(addr, PAGE_SIZE / 4, 17);
9b04c5fec   Cong Wang   mm: remove the se...
978
  	kunmap_atomic(addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
979
980
981
982
983
984
985
  	return checksum;
  }
  
  static int memcmp_pages(struct page *page1, struct page *page2)
  {
  	char *addr1, *addr2;
  	int ret;
9b04c5fec   Cong Wang   mm: remove the se...
986
987
  	addr1 = kmap_atomic(page1);
  	addr2 = kmap_atomic(page2);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
988
  	ret = memcmp(addr1, addr2, PAGE_SIZE);
9b04c5fec   Cong Wang   mm: remove the se...
989
990
  	kunmap_atomic(addr2);
  	kunmap_atomic(addr1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
991
992
993
994
995
996
997
998
999
1000
1001
1002
  	return ret;
  }
  
  static inline int pages_identical(struct page *page1, struct page *page2)
  {
  	return !memcmp_pages(page1, page2);
  }
  
  static int write_protect_page(struct vm_area_struct *vma, struct page *page,
  			      pte_t *orig_pte)
  {
  	struct mm_struct *mm = vma->vm_mm;
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1003
1004
1005
1006
  	struct page_vma_mapped_walk pvmw = {
  		.page = page,
  		.vma = vma,
  	};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1007
1008
  	int swapped;
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
1009
1010
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1011

36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1012
1013
  	pvmw.address = page_address_in_vma(page, vma);
  	if (pvmw.address == -EFAULT)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1014
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
1015
  	BUG_ON(PageTransCompound(page));
6bdb913f0   Haggai Eran   mm: wrap calls to...
1016

36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1017
1018
  	mmun_start = pvmw.address;
  	mmun_end   = pvmw.address + PAGE_SIZE;
6bdb913f0   Haggai Eran   mm: wrap calls to...
1019
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1020
  	if (!page_vma_mapped_walk(&pvmw))
6bdb913f0   Haggai Eran   mm: wrap calls to...
1021
  		goto out_mn;
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1022
1023
  	if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
  		goto out_unlock;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1024

595cd8f25   Aneesh Kumar K.V   mm/ksm: handle pr...
1025
  	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
b3a81d084   Minchan Kim   mm: fix KSM data ...
1026
1027
  	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) ||
  						mm_tlb_flush_pending(mm)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1028
1029
1030
  		pte_t entry;
  
  		swapped = PageSwapCache(page);
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1031
  		flush_cache_page(vma, pvmw.address, page_to_pfn(page));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1032
  		/*
25985edce   Lucas De Marchi   Fix common misspe...
1033
  		 * Ok this is tricky, when get_user_pages_fast() run it doesn't
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1034
1035
1036
1037
1038
1039
  		 * take any lock, therefore the check that we are going to make
  		 * with the pagecount against the mapcount is racey and
  		 * O_DIRECT can happen right after the check.
  		 * So we clear the pte and flush the tlb before the check
  		 * this assure us that no O_DIRECT can happen after the check
  		 * or in the middle of the check.
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1040
1041
1042
1043
  		 *
  		 * No need to notify as we are downgrading page table to read
  		 * only not changing it to point to a new page.
  		 *
ad56b738c   Mike Rapoport   docs/vm: rename d...
1044
  		 * See Documentation/vm/mmu_notifier.rst
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1045
  		 */
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1046
  		entry = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1047
1048
1049
1050
  		/*
  		 * Check that no O_DIRECT or similar I/O is in progress on the
  		 * page
  		 */
31e855ea7   Hugh Dickins   ksm: remove redun...
1051
  		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1052
  			set_pte_at(mm, pvmw.address, pvmw.pte, entry);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1053
1054
  			goto out_unlock;
  		}
4e31635c3   Hugh Dickins   ksm: fix bad user...
1055
1056
  		if (pte_dirty(entry))
  			set_page_dirty(page);
595cd8f25   Aneesh Kumar K.V   mm/ksm: handle pr...
1057
1058
1059
1060
1061
  
  		if (pte_protnone(entry))
  			entry = pte_mkclean(pte_clear_savedwrite(entry));
  		else
  			entry = pte_mkclean(pte_wrprotect(entry));
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1062
  		set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1063
  	}
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1064
  	*orig_pte = *pvmw.pte;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1065
1066
1067
  	err = 0;
  
  out_unlock:
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1068
  	page_vma_mapped_walk_done(&pvmw);
6bdb913f0   Haggai Eran   mm: wrap calls to...
1069
1070
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1071
1072
1073
1074
1075
1076
  out:
  	return err;
  }
  
  /**
   * replace_page - replace page in vma by new ksm page
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1077
1078
1079
   * @vma:      vma that holds the pte pointing to page
   * @page:     the page we are replacing by kpage
   * @kpage:    the ksm page we replace page by
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1080
1081
1082
1083
   * @orig_pte: the original value of the pte
   *
   * Returns 0 on success, -EFAULT on failure.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1084
1085
  static int replace_page(struct vm_area_struct *vma, struct page *page,
  			struct page *kpage, pte_t orig_pte)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1086
1087
  {
  	struct mm_struct *mm = vma->vm_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1088
1089
  	pmd_t *pmd;
  	pte_t *ptep;
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1090
  	pte_t newpte;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1091
1092
  	spinlock_t *ptl;
  	unsigned long addr;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1093
  	int err = -EFAULT;
6bdb913f0   Haggai Eran   mm: wrap calls to...
1094
1095
  	unsigned long mmun_start;	/* For mmu_notifiers */
  	unsigned long mmun_end;		/* For mmu_notifiers */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1096

8dd3557a5   Hugh Dickins   ksm: cleanup some...
1097
  	addr = page_address_in_vma(page, vma);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1098
1099
  	if (addr == -EFAULT)
  		goto out;
6219049ae   Bob Liu   mm: introduce mm_...
1100
1101
  	pmd = mm_find_pmd(mm, addr);
  	if (!pmd)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1102
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1103

6bdb913f0   Haggai Eran   mm: wrap calls to...
1104
1105
1106
  	mmun_start = addr;
  	mmun_end   = addr + PAGE_SIZE;
  	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1107
1108
1109
  	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
  	if (!pte_same(*ptep, orig_pte)) {
  		pte_unmap_unlock(ptep, ptl);
6bdb913f0   Haggai Eran   mm: wrap calls to...
1110
  		goto out_mn;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1111
  	}
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
  	/*
  	 * No need to check ksm_use_zero_pages here: we can only have a
  	 * zero_page here if ksm_use_zero_pages was enabled alreaady.
  	 */
  	if (!is_zero_pfn(page_to_pfn(kpage))) {
  		get_page(kpage);
  		page_add_anon_rmap(kpage, vma, addr, false);
  		newpte = mk_pte(kpage, vma->vm_page_prot);
  	} else {
  		newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
  					       vma->vm_page_prot));
a38c015f3   Claudio Imbrenda   mm/ksm.c: fix inc...
1123
1124
1125
1126
1127
1128
1129
  		/*
  		 * We're replacing an anonymous page with a zero page, which is
  		 * not anonymous. We need to do proper accounting otherwise we
  		 * will get wrong values in /proc, and a BUG message in dmesg
  		 * when tearing down the mm.
  		 */
  		dec_mm_counter(mm, MM_ANONPAGES);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1130
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1131
1132
  
  	flush_cache_page(vma, addr, pte_pfn(*ptep));
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1133
1134
1135
1136
  	/*
  	 * No need to notify as we are replacing a read only page with another
  	 * read only page with the same content.
  	 *
ad56b738c   Mike Rapoport   docs/vm: rename d...
1137
  	 * See Documentation/vm/mmu_notifier.rst
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1138
1139
  	 */
  	ptep_clear_flush(vma, addr, ptep);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1140
  	set_pte_at_notify(mm, addr, ptep, newpte);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1141

d281ee614   Kirill A. Shutemov   rmap: add argumen...
1142
  	page_remove_rmap(page, false);
ae52a2adb   Hugh Dickins   thp: ksm: free sw...
1143
1144
  	if (!page_mapped(page))
  		try_to_free_swap(page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1145
  	put_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1146
1147
1148
  
  	pte_unmap_unlock(ptep, ptl);
  	err = 0;
6bdb913f0   Haggai Eran   mm: wrap calls to...
1149
1150
  out_mn:
  	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1151
1152
1153
1154
1155
1156
  out:
  	return err;
  }
  
  /*
   * try_to_merge_one_page - take two pages and merge them into one
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1157
1158
   * @vma: the vma that holds the pte pointing to page
   * @page: the PageAnon page that we want to replace with kpage
80e148226   Hugh Dickins   ksm: share anon p...
1159
1160
   * @kpage: the PageKsm page that we want to map instead of page,
   *         or NULL the first time when we want to use page as kpage.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1161
1162
1163
1164
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
   */
  static int try_to_merge_one_page(struct vm_area_struct *vma,
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1165
  				 struct page *page, struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1166
1167
1168
  {
  	pte_t orig_pte = __pte(0);
  	int err = -EFAULT;
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1169
1170
  	if (page == kpage)			/* ksm page forked */
  		return 0;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1171
  	if (!PageAnon(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1172
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1173
1174
1175
1176
1177
1178
1179
  	/*
  	 * We need the page lock to read a stable PageSwapCache in
  	 * write_protect_page().  We use trylock_page() instead of
  	 * lock_page() because we don't want to wait here - we
  	 * prefer to continue scanning and merging different pages,
  	 * then come back to this page when it is unlocked.
  	 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1180
  	if (!trylock_page(page))
31e855ea7   Hugh Dickins   ksm: remove redun...
1181
  		goto out;
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1182
1183
  
  	if (PageTransCompound(page)) {
a7306c343   Andrea Arcangeli   ksm: prevent cras...
1184
  		if (split_huge_page(page))
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1185
1186
  			goto out_unlock;
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1187
1188
1189
1190
1191
1192
  	/*
  	 * If this anonymous page is mapped only here, its pte may need
  	 * to be write-protected.  If it's mapped elsewhere, all of its
  	 * ptes are necessarily already write-protected.  But in either
  	 * case, we need to lock and check page_count is not raised.
  	 */
80e148226   Hugh Dickins   ksm: share anon p...
1193
1194
1195
1196
1197
1198
1199
1200
1201
  	if (write_protect_page(vma, page, &orig_pte) == 0) {
  		if (!kpage) {
  			/*
  			 * While we hold page lock, upgrade page from
  			 * PageAnon+anon_vma to PageKsm+NULL stable_node:
  			 * stable_tree_insert() will update stable_node.
  			 */
  			set_page_stable_node(page, NULL);
  			mark_page_accessed(page);
337ed7eb5   Minchan Kim   mm/ksm.c: mark st...
1202
1203
1204
1205
1206
1207
  			/*
  			 * Page reclaim just frees a clean page with no dirty
  			 * ptes: make sure that the ksm page would be swapped.
  			 */
  			if (!PageDirty(page))
  				SetPageDirty(page);
80e148226   Hugh Dickins   ksm: share anon p...
1208
1209
1210
1211
  			err = 0;
  		} else if (pages_identical(page, kpage))
  			err = replace_page(vma, page, kpage, orig_pte);
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1212

80e148226   Hugh Dickins   ksm: share anon p...
1213
  	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
73848b468   Hugh Dickins   ksm: fix mlockfre...
1214
  		munlock_vma_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1215
1216
  		if (!PageMlocked(kpage)) {
  			unlock_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1217
1218
1219
1220
1221
  			lock_page(kpage);
  			mlock_vma_page(kpage);
  			page = kpage;		/* for final unlock */
  		}
  	}
73848b468   Hugh Dickins   ksm: fix mlockfre...
1222

f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1223
  out_unlock:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1224
  	unlock_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1225
1226
1227
1228
1229
  out:
  	return err;
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1230
1231
   * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
   * but no new kernel page is allocated: kpage must already be a ksm page.
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1232
1233
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
81464e306   Hugh Dickins   ksm: five little ...
1234
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1235
1236
  static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
  				      struct page *page, struct page *kpage)
81464e306   Hugh Dickins   ksm: five little ...
1237
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1238
  	struct mm_struct *mm = rmap_item->mm;
81464e306   Hugh Dickins   ksm: five little ...
1239
1240
  	struct vm_area_struct *vma;
  	int err = -EFAULT;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1241
  	down_read(&mm->mmap_sem);
85c6e8dd2   Andrea Arcangeli   ksm: use find_mer...
1242
1243
  	vma = find_mergeable_vma(mm, rmap_item->address);
  	if (!vma)
81464e306   Hugh Dickins   ksm: five little ...
1244
  		goto out;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1245
  	err = try_to_merge_one_page(vma, page, kpage);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1246
1247
  	if (err)
  		goto out;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
1248
1249
  	/* Unstable nid is in union with stable anon_vma: remove first */
  	remove_rmap_item_from_tree(rmap_item);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1250
  	/* Must get reference to anon_vma while still holding mmap_sem */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1251
1252
  	rmap_item->anon_vma = vma->anon_vma;
  	get_anon_vma(vma->anon_vma);
81464e306   Hugh Dickins   ksm: five little ...
1253
  out:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1254
  	up_read(&mm->mmap_sem);
81464e306   Hugh Dickins   ksm: five little ...
1255
1256
1257
1258
  	return err;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1259
1260
1261
   * try_to_merge_two_pages - take two identical pages and prepare them
   * to be merged into one page.
   *
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1262
1263
   * This function returns the kpage if we successfully merged two identical
   * pages into one ksm page, NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1264
   *
80e148226   Hugh Dickins   ksm: share anon p...
1265
   * Note that this function upgrades page to ksm page: if one of the pages
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1266
1267
   * is already a ksm page, try_to_merge_with_ksm_page should be used.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1268
1269
1270
1271
  static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
  					   struct page *page,
  					   struct rmap_item *tree_rmap_item,
  					   struct page *tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1272
  {
80e148226   Hugh Dickins   ksm: share anon p...
1273
  	int err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1274

80e148226   Hugh Dickins   ksm: share anon p...
1275
  	err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1276
  	if (!err) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1277
  		err = try_to_merge_with_ksm_page(tree_rmap_item,
80e148226   Hugh Dickins   ksm: share anon p...
1278
  							tree_page, page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1279
  		/*
81464e306   Hugh Dickins   ksm: five little ...
1280
1281
  		 * If that fails, we have a ksm page with only one pte
  		 * pointing to it: so break it.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1282
  		 */
4035c07a8   Hugh Dickins   ksm: take keyhole...
1283
  		if (err)
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1284
  			break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1285
  	}
80e148226   Hugh Dickins   ksm: share anon p...
1286
  	return err ? NULL : page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1287
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
  static __always_inline
  bool __is_page_sharing_candidate(struct stable_node *stable_node, int offset)
  {
  	VM_BUG_ON(stable_node->rmap_hlist_len < 0);
  	/*
  	 * Check that at least one mapping still exists, otherwise
  	 * there's no much point to merge and share with this
  	 * stable_node, as the underlying tree_page of the other
  	 * sharer is going to be freed soon.
  	 */
  	return stable_node->rmap_hlist_len &&
  		stable_node->rmap_hlist_len + offset < ksm_max_page_sharing;
  }
  
  static __always_inline
  bool is_page_sharing_candidate(struct stable_node *stable_node)
  {
  	return __is_page_sharing_candidate(stable_node, 0);
  }
c01f0b54e   Colin Ian King   mm/ksm.c: make st...
1307
1308
1309
1310
  static struct page *stable_node_dup(struct stable_node **_stable_node_dup,
  				    struct stable_node **_stable_node,
  				    struct rb_root *root,
  				    bool prune_stale_stable_nodes)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1311
  {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1312
  	struct stable_node *dup, *found = NULL, *stable_node = *_stable_node;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1313
  	struct hlist_node *hlist_safe;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1314
  	struct page *_tree_page, *tree_page = NULL;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
  	int nr = 0;
  	int found_rmap_hlist_len;
  
  	if (!prune_stale_stable_nodes ||
  	    time_before(jiffies, stable_node->chain_prune_time +
  			msecs_to_jiffies(
  				ksm_stable_node_chains_prune_millisecs)))
  		prune_stale_stable_nodes = false;
  	else
  		stable_node->chain_prune_time = jiffies;
  
  	hlist_for_each_entry_safe(dup, hlist_safe,
  				  &stable_node->hlist, hlist_dup) {
  		cond_resched();
  		/*
  		 * We must walk all stable_node_dup to prune the stale
  		 * stable nodes during lookup.
  		 *
  		 * get_ksm_page can drop the nodes from the
  		 * stable_node->hlist if they point to freed pages
  		 * (that's why we do a _safe walk). The "dup"
  		 * stable_node parameter itself will be freed from
  		 * under us if it returns NULL.
  		 */
  		_tree_page = get_ksm_page(dup, false);
  		if (!_tree_page)
  			continue;
  		nr += 1;
  		if (is_page_sharing_candidate(dup)) {
  			if (!found ||
  			    dup->rmap_hlist_len > found_rmap_hlist_len) {
  				if (found)
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1347
  					put_page(tree_page);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1348
1349
  				found = dup;
  				found_rmap_hlist_len = found->rmap_hlist_len;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1350
  				tree_page = _tree_page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1351

8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1352
  				/* skip put_page for found dup */
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1353
1354
  				if (!prune_stale_stable_nodes)
  					break;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1355
1356
1357
1358
1359
  				continue;
  			}
  		}
  		put_page(_tree_page);
  	}
80b18dfa5   Andrea Arcangeli   ksm: optimize ref...
1360
1361
1362
1363
1364
1365
1366
1367
  	if (found) {
  		/*
  		 * nr is counting all dups in the chain only if
  		 * prune_stale_stable_nodes is true, otherwise we may
  		 * break the loop at nr == 1 even if there are
  		 * multiple entries.
  		 */
  		if (prune_stale_stable_nodes && nr == 1) {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
  			/*
  			 * If there's not just one entry it would
  			 * corrupt memory, better BUG_ON. In KSM
  			 * context with no lock held it's not even
  			 * fatal.
  			 */
  			BUG_ON(stable_node->hlist.first->next);
  
  			/*
  			 * There's just one entry and it is below the
  			 * deduplication limit so drop the chain.
  			 */
  			rb_replace_node(&stable_node->node, &found->node,
  					root);
  			free_stable_node(stable_node);
  			ksm_stable_node_chains--;
  			ksm_stable_node_dups--;
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1385
  			/*
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1386
1387
1388
  			 * NOTE: the caller depends on the stable_node
  			 * to be equal to stable_node_dup if the chain
  			 * was collapsed.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1389
  			 */
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1390
1391
1392
1393
1394
1395
1396
1397
  			*_stable_node = found;
  			/*
  			 * Just for robustneess as stable_node is
  			 * otherwise left as a stable pointer, the
  			 * compiler shall optimize it away at build
  			 * time.
  			 */
  			stable_node = NULL;
80b18dfa5   Andrea Arcangeli   ksm: optimize ref...
1398
1399
  		} else if (stable_node->hlist.first != &found->hlist_dup &&
  			   __is_page_sharing_candidate(found, 1)) {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1400
  			/*
80b18dfa5   Andrea Arcangeli   ksm: optimize ref...
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
  			 * If the found stable_node dup can accept one
  			 * more future merge (in addition to the one
  			 * that is underway) and is not at the head of
  			 * the chain, put it there so next search will
  			 * be quicker in the !prune_stale_stable_nodes
  			 * case.
  			 *
  			 * NOTE: it would be inaccurate to use nr > 1
  			 * instead of checking the hlist.first pointer
  			 * directly, because in the
  			 * prune_stale_stable_nodes case "nr" isn't
  			 * the position of the found dup in the chain,
  			 * but the total number of dups in the chain.
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1414
1415
1416
1417
1418
1419
  			 */
  			hlist_del(&found->hlist_dup);
  			hlist_add_head(&found->hlist_dup,
  				       &stable_node->hlist);
  		}
  	}
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1420
1421
  	*_stable_node_dup = found;
  	return tree_page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
  }
  
  static struct stable_node *stable_node_dup_any(struct stable_node *stable_node,
  					       struct rb_root *root)
  {
  	if (!is_stable_node_chain(stable_node))
  		return stable_node;
  	if (hlist_empty(&stable_node->hlist)) {
  		free_stable_node_chain(stable_node, root);
  		return NULL;
  	}
  	return hlist_entry(stable_node->hlist.first,
  			   typeof(*stable_node), hlist_dup);
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
  /*
   * Like for get_ksm_page, this function can free the *_stable_node and
   * *_stable_node_dup if the returned tree_page is NULL.
   *
   * It can also free and overwrite *_stable_node with the found
   * stable_node_dup if the chain is collapsed (in which case
   * *_stable_node will be equal to *_stable_node_dup like if the chain
   * never existed). It's up to the caller to verify tree_page is not
   * NULL before dereferencing *_stable_node or *_stable_node_dup.
   *
   * *_stable_node_dup is really a second output parameter of this
   * function and will be overwritten in all cases, the caller doesn't
   * need to initialize it.
   */
  static struct page *__stable_node_chain(struct stable_node **_stable_node_dup,
  					struct stable_node **_stable_node,
  					struct rb_root *root,
  					bool prune_stale_stable_nodes)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1454
  {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1455
  	struct stable_node *stable_node = *_stable_node;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1456
1457
  	if (!is_stable_node_chain(stable_node)) {
  		if (is_page_sharing_candidate(stable_node)) {
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1458
1459
  			*_stable_node_dup = stable_node;
  			return get_ksm_page(stable_node, false);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1460
  		}
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1461
1462
1463
1464
1465
  		/*
  		 * _stable_node_dup set to NULL means the stable_node
  		 * reached the ksm_max_page_sharing limit.
  		 */
  		*_stable_node_dup = NULL;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1466
1467
  		return NULL;
  	}
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1468
  	return stable_node_dup(_stable_node_dup, _stable_node, root,
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1469
1470
  			       prune_stale_stable_nodes);
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1471
1472
1473
  static __always_inline struct page *chain_prune(struct stable_node **s_n_d,
  						struct stable_node **s_n,
  						struct rb_root *root)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1474
  {
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1475
  	return __stable_node_chain(s_n_d, s_n, root, true);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1476
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1477
1478
1479
  static __always_inline struct page *chain(struct stable_node **s_n_d,
  					  struct stable_node *s_n,
  					  struct rb_root *root)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1480
  {
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1481
1482
1483
1484
1485
1486
1487
  	struct stable_node *old_stable_node = s_n;
  	struct page *tree_page;
  
  	tree_page = __stable_node_chain(s_n_d, &s_n, root, false);
  	/* not pruning dups so s_n cannot have changed */
  	VM_BUG_ON(s_n != old_stable_node);
  	return tree_page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1488
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1489
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1490
   * stable_tree_search - search for page inside the stable tree
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1491
1492
1493
1494
   *
   * This function checks if there is a page inside the stable tree
   * with identical content to the page that we are scanning right now.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1495
   * This function returns the stable tree node of identical content if found,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1496
1497
   * NULL otherwise.
   */
62b61f611   Hugh Dickins   ksm: memory hotre...
1498
  static struct page *stable_tree_search(struct page *page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1499
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1500
  	int nid;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1501
  	struct rb_root *root;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1502
1503
  	struct rb_node **new;
  	struct rb_node *parent;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1504
  	struct stable_node *stable_node, *stable_node_dup, *stable_node_any;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1505
  	struct stable_node *page_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1506

4146d2d67   Hugh Dickins   ksm: make !merge_...
1507
1508
1509
  	page_node = page_stable_node(page);
  	if (page_node && page_node->head != &migrate_nodes) {
  		/* ksm page forked */
08beca44d   Hugh Dickins   ksm: stable_node ...
1510
  		get_page(page);
62b61f611   Hugh Dickins   ksm: memory hotre...
1511
  		return page;
08beca44d   Hugh Dickins   ksm: stable_node ...
1512
  	}
90bd6fd31   Petr Holasek   ksm: allow trees ...
1513
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1514
  	root = root_stable_tree + nid;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1515
  again:
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1516
  	new = &root->rb_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1517
  	parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1518

4146d2d67   Hugh Dickins   ksm: make !merge_...
1519
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1520
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1521
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1522
  		cond_resched();
4146d2d67   Hugh Dickins   ksm: make !merge_...
1523
  		stable_node = rb_entry(*new, struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1524
  		stable_node_any = NULL;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1525
  		tree_page = chain_prune(&stable_node_dup, &stable_node,	root);
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1526
1527
1528
1529
1530
1531
  		/*
  		 * NOTE: stable_node may have been freed by
  		 * chain_prune() if the returned stable_node_dup is
  		 * not NULL. stable_node_dup may have been inserted in
  		 * the rbtree instead as a regular stable_node (in
  		 * order to collapse the stable_node chain if a single
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1532
1533
1534
1535
1536
  		 * stable_node dup was found in it). In such case the
  		 * stable_node is overwritten by the calleee to point
  		 * to the stable_node_dup that was collapsed in the
  		 * stable rbtree and stable_node will be equal to
  		 * stable_node_dup like if the chain never existed.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1537
  		 */
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
  		if (!stable_node_dup) {
  			/*
  			 * Either all stable_node dups were full in
  			 * this stable_node chain, or this chain was
  			 * empty and should be rb_erased.
  			 */
  			stable_node_any = stable_node_dup_any(stable_node,
  							      root);
  			if (!stable_node_any) {
  				/* rb_erase just run */
  				goto again;
  			}
  			/*
  			 * Take any of the stable_node dups page of
  			 * this stable_node chain to let the tree walk
  			 * continue. All KSM pages belonging to the
  			 * stable_node dups in a stable_node chain
  			 * have the same content and they're
  			 * wrprotected at all times. Any will work
  			 * fine to continue the walk.
  			 */
  			tree_page = get_ksm_page(stable_node_any, false);
  		}
  		VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
  		if (!tree_page) {
  			/*
  			 * If we walked over a stale stable_node,
  			 * get_ksm_page() will call rb_erase() and it
  			 * may rebalance the tree from under us. So
  			 * restart the search from scratch. Returning
  			 * NULL would be safe too, but we'd generate
  			 * false negative insertions just because some
  			 * stable_node was stale.
  			 */
  			goto again;
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1574

4035c07a8   Hugh Dickins   ksm: take keyhole...
1575
  		ret = memcmp_pages(page, tree_page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1576
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1577

4146d2d67   Hugh Dickins   ksm: make !merge_...
1578
  		parent = *new;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1579
  		if (ret < 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1580
  			new = &parent->rb_left;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1581
  		else if (ret > 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1582
  			new = &parent->rb_right;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1583
  		else {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
  			if (page_node) {
  				VM_BUG_ON(page_node->head != &migrate_nodes);
  				/*
  				 * Test if the migrated page should be merged
  				 * into a stable node dup. If the mapcount is
  				 * 1 we can migrate it with another KSM page
  				 * without adding it to the chain.
  				 */
  				if (page_mapcount(page) > 1)
  					goto chain_append;
  			}
  
  			if (!stable_node_dup) {
  				/*
  				 * If the stable_node is a chain and
  				 * we got a payload match in memcmp
  				 * but we cannot merge the scanned
  				 * page in any of the existing
  				 * stable_node dups because they're
  				 * all full, we need to wait the
  				 * scanned page to find itself a match
  				 * in the unstable tree to create a
  				 * brand new KSM page to add later to
  				 * the dups of this stable_node.
  				 */
  				return NULL;
  			}
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1611
1612
1613
1614
1615
1616
1617
  			/*
  			 * Lock and unlock the stable_node's page (which
  			 * might already have been migrated) so that page
  			 * migration is sure to notice its raised count.
  			 * It would be more elegant to return stable_node
  			 * than kpage, but that involves more changes.
  			 */
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1618
1619
1620
1621
1622
1623
  			tree_page = get_ksm_page(stable_node_dup, true);
  			if (unlikely(!tree_page))
  				/*
  				 * The tree may have been rebalanced,
  				 * so re-evaluate parent and new.
  				 */
4146d2d67   Hugh Dickins   ksm: make !merge_...
1624
  				goto again;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1625
1626
1627
1628
1629
1630
1631
1632
  			unlock_page(tree_page);
  
  			if (get_kpfn_nid(stable_node_dup->kpfn) !=
  			    NUMA(stable_node_dup->nid)) {
  				put_page(tree_page);
  				goto replace;
  			}
  			return tree_page;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1633
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1634
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1635
1636
1637
1638
1639
1640
  	if (!page_node)
  		return NULL;
  
  	list_del(&page_node->list);
  	DO_NUMA(page_node->nid = nid);
  	rb_link_node(&page_node->node, parent, new);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1641
  	rb_insert_color(&page_node->node, root);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1642
1643
1644
1645
1646
1647
  out:
  	if (is_page_sharing_candidate(page_node)) {
  		get_page(page);
  		return page;
  	} else
  		return NULL;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1648
1649
  
  replace:
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1650
1651
  	/*
  	 * If stable_node was a chain and chain_prune collapsed it,
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1652
1653
1654
1655
1656
  	 * stable_node has been updated to be the new regular
  	 * stable_node. A collapse of the chain is indistinguishable
  	 * from the case there was no chain in the stable
  	 * rbtree. Otherwise stable_node is the chain and
  	 * stable_node_dup is the dup to replace.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1657
  	 */
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1658
  	if (stable_node_dup == stable_node) {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1659
1660
  		VM_BUG_ON(is_stable_node_chain(stable_node_dup));
  		VM_BUG_ON(is_stable_node_dup(stable_node_dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1661
1662
1663
1664
1665
  		/* there is no chain */
  		if (page_node) {
  			VM_BUG_ON(page_node->head != &migrate_nodes);
  			list_del(&page_node->list);
  			DO_NUMA(page_node->nid = nid);
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1666
1667
  			rb_replace_node(&stable_node_dup->node,
  					&page_node->node,
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1668
1669
1670
1671
1672
1673
  					root);
  			if (is_page_sharing_candidate(page_node))
  				get_page(page);
  			else
  				page = NULL;
  		} else {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1674
  			rb_erase(&stable_node_dup->node, root);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1675
1676
  			page = NULL;
  		}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1677
  	} else {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
  		VM_BUG_ON(!is_stable_node_chain(stable_node));
  		__stable_node_dup_del(stable_node_dup);
  		if (page_node) {
  			VM_BUG_ON(page_node->head != &migrate_nodes);
  			list_del(&page_node->list);
  			DO_NUMA(page_node->nid = nid);
  			stable_node_chain_add_dup(page_node, stable_node);
  			if (is_page_sharing_candidate(page_node))
  				get_page(page);
  			else
  				page = NULL;
  		} else {
  			page = NULL;
  		}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1692
  	}
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1693
1694
  	stable_node_dup->head = &migrate_nodes;
  	list_add(&stable_node_dup->list, stable_node_dup->head);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1695
  	return page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1696
1697
1698
1699
1700
  
  chain_append:
  	/* stable_node_dup could be null if it reached the limit */
  	if (!stable_node_dup)
  		stable_node_dup = stable_node_any;
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1701
1702
  	/*
  	 * If stable_node was a chain and chain_prune collapsed it,
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1703
1704
1705
1706
1707
  	 * stable_node has been updated to be the new regular
  	 * stable_node. A collapse of the chain is indistinguishable
  	 * from the case there was no chain in the stable
  	 * rbtree. Otherwise stable_node is the chain and
  	 * stable_node_dup is the dup to replace.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1708
  	 */
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1709
  	if (stable_node_dup == stable_node) {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1710
1711
  		VM_BUG_ON(is_stable_node_chain(stable_node_dup));
  		VM_BUG_ON(is_stable_node_dup(stable_node_dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
  		/* chain is missing so create it */
  		stable_node = alloc_stable_node_chain(stable_node_dup,
  						      root);
  		if (!stable_node)
  			return NULL;
  	}
  	/*
  	 * Add this stable_node dup that was
  	 * migrated to the stable_node chain
  	 * of the current nid for this page
  	 * content.
  	 */
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1724
1725
  	VM_BUG_ON(!is_stable_node_chain(stable_node));
  	VM_BUG_ON(!is_stable_node_dup(stable_node_dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1726
1727
1728
1729
1730
  	VM_BUG_ON(page_node->head != &migrate_nodes);
  	list_del(&page_node->list);
  	DO_NUMA(page_node->nid = nid);
  	stable_node_chain_add_dup(page_node, stable_node);
  	goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1731
1732
1733
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1734
   * stable_tree_insert - insert stable tree node pointing to new ksm page
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1735
1736
   * into the stable tree.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1737
1738
   * This function returns the stable tree node just allocated on success,
   * NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1739
   */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1740
  static struct stable_node *stable_tree_insert(struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1741
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1742
1743
  	int nid;
  	unsigned long kpfn;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1744
  	struct rb_root *root;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1745
  	struct rb_node **new;
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1746
  	struct rb_node *parent;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1747
1748
  	struct stable_node *stable_node, *stable_node_dup, *stable_node_any;
  	bool need_chain = false;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1749

90bd6fd31   Petr Holasek   ksm: allow trees ...
1750
1751
  	kpfn = page_to_pfn(kpage);
  	nid = get_kpfn_nid(kpfn);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1752
  	root = root_stable_tree + nid;
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1753
1754
  again:
  	parent = NULL;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1755
  	new = &root->rb_node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1756

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1757
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1758
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1759
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1760
  		cond_resched();
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1761
  		stable_node = rb_entry(*new, struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1762
  		stable_node_any = NULL;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1763
  		tree_page = chain(&stable_node_dup, stable_node, root);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
  		if (!stable_node_dup) {
  			/*
  			 * Either all stable_node dups were full in
  			 * this stable_node chain, or this chain was
  			 * empty and should be rb_erased.
  			 */
  			stable_node_any = stable_node_dup_any(stable_node,
  							      root);
  			if (!stable_node_any) {
  				/* rb_erase just run */
  				goto again;
  			}
  			/*
  			 * Take any of the stable_node dups page of
  			 * this stable_node chain to let the tree walk
  			 * continue. All KSM pages belonging to the
  			 * stable_node dups in a stable_node chain
  			 * have the same content and they're
  			 * wrprotected at all times. Any will work
  			 * fine to continue the walk.
  			 */
  			tree_page = get_ksm_page(stable_node_any, false);
  		}
  		VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
  		if (!tree_page) {
  			/*
  			 * If we walked over a stale stable_node,
  			 * get_ksm_page() will call rb_erase() and it
  			 * may rebalance the tree from under us. So
  			 * restart the search from scratch. Returning
  			 * NULL would be safe too, but we'd generate
  			 * false negative insertions just because some
  			 * stable_node was stale.
  			 */
  			goto again;
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1800

4035c07a8   Hugh Dickins   ksm: take keyhole...
1801
1802
  		ret = memcmp_pages(kpage, tree_page);
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1803
1804
1805
1806
1807
1808
1809
  
  		parent = *new;
  		if (ret < 0)
  			new = &parent->rb_left;
  		else if (ret > 0)
  			new = &parent->rb_right;
  		else {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1810
1811
  			need_chain = true;
  			break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1812
1813
  		}
  	}
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1814
1815
  	stable_node_dup = alloc_stable_node();
  	if (!stable_node_dup)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1816
  		return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1817

2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
  	INIT_HLIST_HEAD(&stable_node_dup->hlist);
  	stable_node_dup->kpfn = kpfn;
  	set_page_stable_node(kpage, stable_node_dup);
  	stable_node_dup->rmap_hlist_len = 0;
  	DO_NUMA(stable_node_dup->nid = nid);
  	if (!need_chain) {
  		rb_link_node(&stable_node_dup->node, parent, new);
  		rb_insert_color(&stable_node_dup->node, root);
  	} else {
  		if (!is_stable_node_chain(stable_node)) {
  			struct stable_node *orig = stable_node;
  			/* chain is missing so create it */
  			stable_node = alloc_stable_node_chain(orig, root);
  			if (!stable_node) {
  				free_stable_node(stable_node_dup);
  				return NULL;
  			}
  		}
  		stable_node_chain_add_dup(stable_node_dup, stable_node);
  	}
08beca44d   Hugh Dickins   ksm: stable_node ...
1838

2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1839
  	return stable_node_dup;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1840
1841
1842
  }
  
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1843
1844
   * unstable_tree_search_insert - search for identical page,
   * else insert rmap_item into the unstable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
   *
   * This function searches for a page in the unstable tree identical to the
   * page currently being scanned; and if no identical page is found in the
   * tree, we insert rmap_item as a new object into the unstable tree.
   *
   * This function returns pointer to rmap_item found to be identical
   * to the currently scanned page, NULL otherwise.
   *
   * This function does both searching and inserting, because they share
   * the same walking algorithm in an rbtree.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1856
1857
1858
1859
  static
  struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
  					      struct page *page,
  					      struct page **tree_pagep)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1860
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1861
1862
  	struct rb_node **new;
  	struct rb_root *root;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1863
  	struct rb_node *parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1864
1865
1866
  	int nid;
  
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1867
  	root = root_unstable_tree + nid;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1868
  	new = &root->rb_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1869
1870
1871
  
  	while (*new) {
  		struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1872
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1873
  		int ret;
d178f27fc   Hugh Dickins   ksm: cond_resched...
1874
  		cond_resched();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1875
  		tree_rmap_item = rb_entry(*new, struct rmap_item, node);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1876
  		tree_page = get_mergeable_page(tree_rmap_item);
c8f95ed1a   Andrea Arcangeli   ksm: unstable_tre...
1877
  		if (!tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1878
1879
1880
  			return NULL;
  
  		/*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1881
  		 * Don't substitute a ksm page for a forked page.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1882
  		 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1883
1884
  		if (page == tree_page) {
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1885
1886
  			return NULL;
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1887
  		ret = memcmp_pages(page, tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1888
1889
1890
  
  		parent = *new;
  		if (ret < 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1891
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1892
1893
  			new = &parent->rb_left;
  		} else if (ret > 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1894
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1895
  			new = &parent->rb_right;
b599cbdf1   Hugh Dickins   ksm: treat unstab...
1896
1897
1898
1899
1900
1901
1902
1903
1904
  		} else if (!ksm_merge_across_nodes &&
  			   page_to_nid(tree_page) != nid) {
  			/*
  			 * If tree_page has been migrated to another NUMA node,
  			 * it will be flushed out and put in the right unstable
  			 * tree next time: only merge with it when across_nodes.
  			 */
  			put_page(tree_page);
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1905
  		} else {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1906
  			*tree_pagep = tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1907
1908
1909
  			return tree_rmap_item;
  		}
  	}
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1910
  	rmap_item->address |= UNSTABLE_FLAG;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1911
  	rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1912
  	DO_NUMA(rmap_item->nid = nid);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1913
  	rb_link_node(&rmap_item->node, parent, new);
90bd6fd31   Petr Holasek   ksm: allow trees ...
1914
  	rb_insert_color(&rmap_item->node, root);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1915

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
1916
  	ksm_pages_unshared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1917
1918
1919
1920
1921
1922
1923
1924
1925
  	return NULL;
  }
  
  /*
   * stable_tree_append - add another rmap_item to the linked list of
   * rmap_items hanging off a given node of the stable tree, all sharing
   * the same ksm page.
   */
  static void stable_tree_append(struct rmap_item *rmap_item,
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1926
1927
  			       struct stable_node *stable_node,
  			       bool max_page_sharing_bypass)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1928
  {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
  	/*
  	 * rmap won't find this mapping if we don't insert the
  	 * rmap_item in the right stable_node
  	 * duplicate. page_migration could break later if rmap breaks,
  	 * so we can as well crash here. We really need to check for
  	 * rmap_hlist_len == STABLE_NODE_CHAIN, but we can as well check
  	 * for other negative values as an undeflow if detected here
  	 * for the first time (and not when decreasing rmap_hlist_len)
  	 * would be sign of memory corruption in the stable_node.
  	 */
  	BUG_ON(stable_node->rmap_hlist_len < 0);
  
  	stable_node->rmap_hlist_len++;
  	if (!max_page_sharing_bypass)
  		/* possibly non fatal but unexpected overflow, only warn */
  		WARN_ON_ONCE(stable_node->rmap_hlist_len >
  			     ksm_max_page_sharing);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1946
  	rmap_item->head = stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1947
  	rmap_item->address |= STABLE_FLAG;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1948
  	hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
e178dfde3   Hugh Dickins   ksm: move pages_s...
1949

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1950
1951
1952
1953
  	if (rmap_item->hlist.next)
  		ksm_pages_sharing++;
  	else
  		ksm_pages_shared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1954
1955
1956
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1957
1958
1959
1960
   * cmp_and_merge_page - first see if page can be merged into the stable tree;
   * if not, compare checksum to previous and if it's the same, see if page can
   * be inserted into the unstable tree, or merged with a page already there and
   * both transferred to the stable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1961
1962
1963
1964
1965
1966
   *
   * @page: the page that we are searching identical page to.
   * @rmap_item: the reverse mapping into the virtual address of this page
   */
  static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
  {
4b22927f0   Kirill Tkhai   ksm: fix unlocked...
1967
  	struct mm_struct *mm = rmap_item->mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1968
  	struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1969
  	struct page *tree_page = NULL;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1970
  	struct stable_node *stable_node;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1971
  	struct page *kpage;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1972
1973
  	unsigned int checksum;
  	int err;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1974
  	bool max_page_sharing_bypass = false;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1975

4146d2d67   Hugh Dickins   ksm: make !merge_...
1976
1977
1978
  	stable_node = page_stable_node(page);
  	if (stable_node) {
  		if (stable_node->head != &migrate_nodes &&
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1979
1980
1981
  		    get_kpfn_nid(READ_ONCE(stable_node->kpfn)) !=
  		    NUMA(stable_node->nid)) {
  			stable_node_dup_del(stable_node);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1982
1983
1984
1985
1986
1987
  			stable_node->head = &migrate_nodes;
  			list_add(&stable_node->list, stable_node->head);
  		}
  		if (stable_node->head != &migrate_nodes &&
  		    rmap_item->head == stable_node)
  			return;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1988
1989
1990
1991
1992
1993
  		/*
  		 * If it's a KSM fork, allow it to go over the sharing limit
  		 * without warnings.
  		 */
  		if (!is_page_sharing_candidate(stable_node))
  			max_page_sharing_bypass = true;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1994
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1995
1996
  
  	/* We first start with searching the page inside the stable tree */
62b61f611   Hugh Dickins   ksm: memory hotre...
1997
  	kpage = stable_tree_search(page);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1998
1999
2000
2001
2002
2003
  	if (kpage == page && rmap_item->head == stable_node) {
  		put_page(kpage);
  		return;
  	}
  
  	remove_rmap_item_from_tree(rmap_item);
62b61f611   Hugh Dickins   ksm: memory hotre...
2004
  	if (kpage) {
08beca44d   Hugh Dickins   ksm: stable_node ...
2005
  		err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2006
2007
2008
2009
2010
  		if (!err) {
  			/*
  			 * The page was successfully merged:
  			 * add its rmap_item to the stable tree.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
2011
  			lock_page(kpage);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2012
2013
  			stable_tree_append(rmap_item, page_stable_node(kpage),
  					   max_page_sharing_bypass);
5ad646880   Hugh Dickins   ksm: let shared p...
2014
  			unlock_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2015
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2016
  		put_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2017
2018
2019
2020
  		return;
  	}
  
  	/*
4035c07a8   Hugh Dickins   ksm: take keyhole...
2021
2022
2023
2024
  	 * If the hash value of the page has changed from the last time
  	 * we calculated it, this page is changing frequently: therefore we
  	 * don't want to insert it in the unstable tree, and we don't want
  	 * to waste our time searching for something identical to it there.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2025
2026
2027
2028
2029
2030
  	 */
  	checksum = calc_checksum(page);
  	if (rmap_item->oldchecksum != checksum) {
  		rmap_item->oldchecksum = checksum;
  		return;
  	}
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2031
2032
2033
2034
2035
2036
  	/*
  	 * Same checksum as an empty page. We attempt to merge it with the
  	 * appropriate zero page if the user enabled this via sysfs.
  	 */
  	if (ksm_use_zero_pages && (checksum == zero_checksum)) {
  		struct vm_area_struct *vma;
4b22927f0   Kirill Tkhai   ksm: fix unlocked...
2037
2038
  		down_read(&mm->mmap_sem);
  		vma = find_mergeable_vma(mm, rmap_item->address);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2039
2040
  		err = try_to_merge_one_page(vma, page,
  					    ZERO_PAGE(rmap_item->address));
4b22927f0   Kirill Tkhai   ksm: fix unlocked...
2041
  		up_read(&mm->mmap_sem);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2042
2043
2044
2045
2046
2047
2048
  		/*
  		 * In case of failure, the page was not really empty, so we
  		 * need to continue. Otherwise we're done.
  		 */
  		if (!err)
  			return;
  	}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2049
2050
  	tree_rmap_item =
  		unstable_tree_search_insert(rmap_item, page, &tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2051
  	if (tree_rmap_item) {
77da2ba06   Claudio Imbrenda   mm/ksm: fix inter...
2052
  		bool split;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2053
2054
  		kpage = try_to_merge_two_pages(rmap_item, page,
  						tree_rmap_item, tree_page);
77da2ba06   Claudio Imbrenda   mm/ksm: fix inter...
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
  		/*
  		 * If both pages we tried to merge belong to the same compound
  		 * page, then we actually ended up increasing the reference
  		 * count of the same compound page twice, and split_huge_page
  		 * failed.
  		 * Here we set a flag if that happened, and we use it later to
  		 * try split_huge_page again. Since we call put_page right
  		 * afterwards, the reference count will be correct and
  		 * split_huge_page should succeed.
  		 */
  		split = PageTransCompound(page)
  			&& compound_head(page) == compound_head(tree_page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2067
  		put_page(tree_page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2068
  		if (kpage) {
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
2069
2070
2071
2072
  			/*
  			 * The pages were successfully merged: insert new
  			 * node in the stable tree and add both rmap_items.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
2073
  			lock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2074
2075
  			stable_node = stable_tree_insert(kpage);
  			if (stable_node) {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2076
2077
2078
2079
  				stable_tree_append(tree_rmap_item, stable_node,
  						   false);
  				stable_tree_append(rmap_item, stable_node,
  						   false);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2080
  			}
5ad646880   Hugh Dickins   ksm: let shared p...
2081
  			unlock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2082

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2083
2084
2085
2086
2087
2088
  			/*
  			 * If we fail to insert the page into the stable tree,
  			 * we will have 2 virtual addresses that are pointing
  			 * to a ksm page left outside the stable tree,
  			 * in which case we need to break_cow on both.
  			 */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2089
  			if (!stable_node) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2090
2091
  				break_cow(tree_rmap_item);
  				break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2092
  			}
77da2ba06   Claudio Imbrenda   mm/ksm: fix inter...
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
  		} else if (split) {
  			/*
  			 * We are here if we tried to merge two pages and
  			 * failed because they both belonged to the same
  			 * compound page. We will split the page now, but no
  			 * merging will take place.
  			 * We do not want to add the cost of a full lock; if
  			 * the page is locked, it is better to skip it and
  			 * perhaps try again later.
  			 */
  			if (!trylock_page(page))
  				return;
  			split_huge_page(page);
  			unlock_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2107
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2108
2109
2110
2111
  	}
  }
  
  static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
2112
  					    struct rmap_item **rmap_list,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2113
2114
2115
  					    unsigned long addr)
  {
  	struct rmap_item *rmap_item;
6514d511d   Hugh Dickins   ksm: singly-linke...
2116
2117
  	while (*rmap_list) {
  		rmap_item = *rmap_list;
93d17715a   Hugh Dickins   ksm: three remove...
2118
  		if ((rmap_item->address & PAGE_MASK) == addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2119
  			return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2120
2121
  		if (rmap_item->address > addr)
  			break;
6514d511d   Hugh Dickins   ksm: singly-linke...
2122
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2123
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2124
2125
2126
2127
2128
2129
2130
2131
  		free_rmap_item(rmap_item);
  	}
  
  	rmap_item = alloc_rmap_item();
  	if (rmap_item) {
  		/* It has already been zeroed */
  		rmap_item->mm = mm_slot->mm;
  		rmap_item->address = addr;
6514d511d   Hugh Dickins   ksm: singly-linke...
2132
2133
  		rmap_item->rmap_list = *rmap_list;
  		*rmap_list = rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
  	}
  	return rmap_item;
  }
  
  static struct rmap_item *scan_get_next_rmap_item(struct page **page)
  {
  	struct mm_struct *mm;
  	struct mm_slot *slot;
  	struct vm_area_struct *vma;
  	struct rmap_item *rmap_item;
90bd6fd31   Petr Holasek   ksm: allow trees ...
2144
  	int nid;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2145
2146
2147
2148
2149
2150
  
  	if (list_empty(&ksm_mm_head.mm_list))
  		return NULL;
  
  	slot = ksm_scan.mm_slot;
  	if (slot == &ksm_mm_head) {
2919bfd07   Hugh Dickins   ksm: drain pageve...
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
  		/*
  		 * A number of pages can hang around indefinitely on per-cpu
  		 * pagevecs, raised page count preventing write_protect_page
  		 * from merging them.  Though it doesn't really matter much,
  		 * it is puzzling to see some stuck in pages_volatile until
  		 * other activity jostles them out, and they also prevented
  		 * LTP's KSM test from succeeding deterministically; so drain
  		 * them here (here rather than on entry to ksm_do_scan(),
  		 * so we don't IPI too often when pages_to_scan is set low).
  		 */
  		lru_add_drain_all();
4146d2d67   Hugh Dickins   ksm: make !merge_...
2162
2163
2164
2165
2166
2167
2168
  		/*
  		 * Whereas stale stable_nodes on the stable_tree itself
  		 * get pruned in the regular course of stable_tree_search(),
  		 * those moved out to the migrate_nodes list can accumulate:
  		 * so prune them once before each full scan.
  		 */
  		if (!ksm_merge_across_nodes) {
036404183   Geliang Tang   mm/ksm.c: use lis...
2169
  			struct stable_node *stable_node, *next;
4146d2d67   Hugh Dickins   ksm: make !merge_...
2170
  			struct page *page;
036404183   Geliang Tang   mm/ksm.c: use lis...
2171
2172
  			list_for_each_entry_safe(stable_node, next,
  						 &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
2173
2174
2175
2176
2177
2178
  				page = get_ksm_page(stable_node, false);
  				if (page)
  					put_page(page);
  				cond_resched();
  			}
  		}
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2179
  		for (nid = 0; nid < ksm_nr_node_ids; nid++)
90bd6fd31   Petr Holasek   ksm: allow trees ...
2180
  			root_unstable_tree[nid] = RB_ROOT;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2181
2182
2183
2184
2185
  
  		spin_lock(&ksm_mmlist_lock);
  		slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
  		ksm_scan.mm_slot = slot;
  		spin_unlock(&ksm_mmlist_lock);
2b472611a   Hugh Dickins   ksm: fix NULL poi...
2186
2187
2188
2189
2190
2191
  		/*
  		 * Although we tested list_empty() above, a racing __ksm_exit
  		 * of the last mm on the list may have removed it since then.
  		 */
  		if (slot == &ksm_mm_head)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2192
2193
  next_mm:
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
2194
  		ksm_scan.rmap_list = &slot->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2195
2196
2197
2198
  	}
  
  	mm = slot->mm;
  	down_read(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2199
2200
2201
2202
2203
2204
  	if (ksm_test_exit(mm))
  		vma = NULL;
  	else
  		vma = find_vma(mm, ksm_scan.address);
  
  	for (; vma; vma = vma->vm_next) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2205
2206
2207
2208
2209
2210
2211
2212
  		if (!(vma->vm_flags & VM_MERGEABLE))
  			continue;
  		if (ksm_scan.address < vma->vm_start)
  			ksm_scan.address = vma->vm_start;
  		if (!vma->anon_vma)
  			ksm_scan.address = vma->vm_end;
  
  		while (ksm_scan.address < vma->vm_end) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
2213
2214
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2215
  			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
21ae5b017   Andrea Arcangeli   thp: skip transhu...
2216
2217
2218
2219
2220
  			if (IS_ERR_OR_NULL(*page)) {
  				ksm_scan.address += PAGE_SIZE;
  				cond_resched();
  				continue;
  			}
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
2221
  			if (PageAnon(*page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2222
2223
2224
  				flush_anon_page(vma, *page, ksm_scan.address);
  				flush_dcache_page(*page);
  				rmap_item = get_next_rmap_item(slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
2225
  					ksm_scan.rmap_list, ksm_scan.address);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2226
  				if (rmap_item) {
6514d511d   Hugh Dickins   ksm: singly-linke...
2227
2228
  					ksm_scan.rmap_list =
  							&rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2229
2230
2231
2232
2233
2234
  					ksm_scan.address += PAGE_SIZE;
  				} else
  					put_page(*page);
  				up_read(&mm->mmap_sem);
  				return rmap_item;
  			}
21ae5b017   Andrea Arcangeli   thp: skip transhu...
2235
  			put_page(*page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2236
2237
2238
2239
  			ksm_scan.address += PAGE_SIZE;
  			cond_resched();
  		}
  	}
9ba692948   Hugh Dickins   ksm: fix oom dead...
2240
2241
  	if (ksm_test_exit(mm)) {
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
2242
  		ksm_scan.rmap_list = &slot->rmap_list;
9ba692948   Hugh Dickins   ksm: fix oom dead...
2243
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2244
2245
2246
2247
  	/*
  	 * Nuke all the rmap_items that are above this current rmap:
  	 * because there were no VM_MERGEABLE vmas with such addresses.
  	 */
6514d511d   Hugh Dickins   ksm: singly-linke...
2248
  	remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2249
2250
  
  	spin_lock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
2251
2252
2253
2254
2255
2256
2257
  	ksm_scan.mm_slot = list_entry(slot->mm_list.next,
  						struct mm_slot, mm_list);
  	if (ksm_scan.address == 0) {
  		/*
  		 * We've completed a full scan of all vmas, holding mmap_sem
  		 * throughout, and found no VM_MERGEABLE: so do the same as
  		 * __ksm_exit does to remove this mm from all our lists now.
9ba692948   Hugh Dickins   ksm: fix oom dead...
2258
2259
2260
2261
  		 * This applies either when cleaning up after __ksm_exit
  		 * (but beware: we can reach here even before __ksm_exit),
  		 * or when all VM_MERGEABLE areas have been unmapped (and
  		 * mmap_sem then protects against race with MADV_MERGEABLE).
cd551f975   Hugh Dickins   ksm: distribute r...
2262
  		 */
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
2263
  		hash_del(&slot->link);
cd551f975   Hugh Dickins   ksm: distribute r...
2264
  		list_del(&slot->mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2265
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
2266
2267
  		free_mm_slot(slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2268
2269
2270
  		up_read(&mm->mmap_sem);
  		mmdrop(mm);
  	} else {
9ba692948   Hugh Dickins   ksm: fix oom dead...
2271
  		up_read(&mm->mmap_sem);
7496fea9a   Zhou Chengming   ksm: fix conflict...
2272
2273
2274
2275
2276
2277
2278
2279
  		/*
  		 * up_read(&mm->mmap_sem) first because after
  		 * spin_unlock(&ksm_mmlist_lock) run, the "mm" may
  		 * already have been freed under us by __ksm_exit()
  		 * because the "mm_slot" is still hashed and
  		 * ksm_scan.mm_slot doesn't point to it anymore.
  		 */
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
2280
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2281
2282
  
  	/* Repeat until we've completed scanning the whole list */
cd551f975   Hugh Dickins   ksm: distribute r...
2283
  	slot = ksm_scan.mm_slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2284
2285
  	if (slot != &ksm_mm_head)
  		goto next_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2286
2287
2288
2289
2290
2291
  	ksm_scan.seqnr++;
  	return NULL;
  }
  
  /**
   * ksm_do_scan  - the ksm scanner main worker function.
b7701a5f2   Mike Rapoport   mm: docs: fixup p...
2292
   * @scan_npages:  number of pages we want to scan before we return.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2293
2294
2295
2296
   */
  static void ksm_do_scan(unsigned int scan_npages)
  {
  	struct rmap_item *rmap_item;
22eccdd7d   Dan Carpenter   ksm: check for ER...
2297
  	struct page *uninitialized_var(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2298

878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2299
  	while (scan_npages-- && likely(!freezing(current))) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2300
2301
2302
2303
  		cond_resched();
  		rmap_item = scan_get_next_rmap_item(&page);
  		if (!rmap_item)
  			return;
4146d2d67   Hugh Dickins   ksm: make !merge_...
2304
  		cmp_and_merge_page(page, rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2305
2306
2307
  		put_page(page);
  	}
  }
6e1583842   Hugh Dickins   ksm: keep quiet w...
2308
2309
2310
2311
  static int ksmd_should_run(void)
  {
  	return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2312
2313
  static int ksm_scan_thread(void *nothing)
  {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2314
  	set_freezable();
339aa6246   Izik Eidus   ksm: change ksm n...
2315
  	set_user_nice(current, 5);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2316
2317
  
  	while (!kthread_should_stop()) {
6e1583842   Hugh Dickins   ksm: keep quiet w...
2318
  		mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2319
  		wait_while_offlining();
6e1583842   Hugh Dickins   ksm: keep quiet w...
2320
  		if (ksmd_should_run())
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2321
  			ksm_do_scan(ksm_thread_pages_to_scan);
6e1583842   Hugh Dickins   ksm: keep quiet w...
2322
  		mutex_unlock(&ksm_thread_mutex);
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2323
  		try_to_freeze();
6e1583842   Hugh Dickins   ksm: keep quiet w...
2324
  		if (ksmd_should_run()) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2325
2326
2327
  			schedule_timeout_interruptible(
  				msecs_to_jiffies(ksm_thread_sleep_millisecs));
  		} else {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2328
  			wait_event_freezable(ksm_thread_wait,
6e1583842   Hugh Dickins   ksm: keep quiet w...
2329
  				ksmd_should_run() || kthread_should_stop());
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2330
2331
2332
2333
  		}
  	}
  	return 0;
  }
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2334
2335
2336
2337
  int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
  		unsigned long end, int advice, unsigned long *vm_flags)
  {
  	struct mm_struct *mm = vma->vm_mm;
d952b7913   Hugh Dickins   ksm: fix endless ...
2338
  	int err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2339
2340
2341
2342
2343
2344
2345
2346
  
  	switch (advice) {
  	case MADV_MERGEABLE:
  		/*
  		 * Be somewhat over-protective for now!
  		 */
  		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
  				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
0661a3361   Kirill A. Shutemov   mm: remove rest u...
2347
  				 VM_HUGETLB | VM_MIXEDMAP))
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2348
  			return 0;		/* just ignore the advice */
e1fb4a086   Dave Jiang   dax: remove VM_MI...
2349
2350
  		if (vma_is_dax(vma))
  			return 0;
cc2383ec0   Konstantin Khlebnikov   mm: introduce arc...
2351
2352
2353
2354
  #ifdef VM_SAO
  		if (*vm_flags & VM_SAO)
  			return 0;
  #endif
74a049674   Khalid Aziz   sparc64: Add supp...
2355
2356
2357
2358
  #ifdef VM_SPARC_ADI
  		if (*vm_flags & VM_SPARC_ADI)
  			return 0;
  #endif
cc2383ec0   Konstantin Khlebnikov   mm: introduce arc...
2359

d952b7913   Hugh Dickins   ksm: fix endless ...
2360
2361
2362
2363
2364
  		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
  			err = __ksm_enter(mm);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2365
2366
2367
2368
2369
2370
2371
  
  		*vm_flags |= VM_MERGEABLE;
  		break;
  
  	case MADV_UNMERGEABLE:
  		if (!(*vm_flags & VM_MERGEABLE))
  			return 0;		/* just ignore the advice */
d952b7913   Hugh Dickins   ksm: fix endless ...
2372
2373
2374
2375
2376
  		if (vma->anon_vma) {
  			err = unmerge_ksm_pages(vma, start, end);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
  
  		*vm_flags &= ~VM_MERGEABLE;
  		break;
  	}
  
  	return 0;
  }
  
  int __ksm_enter(struct mm_struct *mm)
  {
6e1583842   Hugh Dickins   ksm: keep quiet w...
2387
2388
2389
2390
  	struct mm_slot *mm_slot;
  	int needs_wakeup;
  
  	mm_slot = alloc_mm_slot();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2391
2392
  	if (!mm_slot)
  		return -ENOMEM;
6e1583842   Hugh Dickins   ksm: keep quiet w...
2393
2394
  	/* Check ksm_run too?  Would need tighter locking */
  	needs_wakeup = list_empty(&ksm_mm_head.mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2395
2396
2397
  	spin_lock(&ksm_mmlist_lock);
  	insert_to_mm_slots_hash(mm, mm_slot);
  	/*
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2398
2399
  	 * When KSM_RUN_MERGE (or KSM_RUN_STOP),
  	 * insert just behind the scanning cursor, to let the area settle
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2400
2401
  	 * down a little; when fork is followed by immediate exec, we don't
  	 * want ksmd to waste time setting up and tearing down an rmap_list.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2402
2403
2404
2405
  	 *
  	 * But when KSM_RUN_UNMERGE, it's important to insert ahead of its
  	 * scanning cursor, otherwise KSM pages in newly forked mms will be
  	 * missed: then we might as well insert at the end of the list.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2406
  	 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2407
2408
2409
2410
  	if (ksm_run & KSM_RUN_UNMERGE)
  		list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
  	else
  		list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2411
  	spin_unlock(&ksm_mmlist_lock);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2412
  	set_bit(MMF_VM_MERGEABLE, &mm->flags);
f1f100764   Vegard Nossum   mm: add new mmgra...
2413
  	mmgrab(mm);
6e1583842   Hugh Dickins   ksm: keep quiet w...
2414
2415
2416
  
  	if (needs_wakeup)
  		wake_up_interruptible(&ksm_thread_wait);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2417
2418
  	return 0;
  }
1c2fb7a4c   Andrea Arcangeli   ksm: fix deadlock...
2419
  void __ksm_exit(struct mm_struct *mm)
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2420
  {
cd551f975   Hugh Dickins   ksm: distribute r...
2421
  	struct mm_slot *mm_slot;
9ba692948   Hugh Dickins   ksm: fix oom dead...
2422
  	int easy_to_free = 0;
cd551f975   Hugh Dickins   ksm: distribute r...
2423

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2424
  	/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
2425
2426
2427
2428
2429
2430
  	 * This process is exiting: if it's straightforward (as is the
  	 * case when ksmd was never running), free mm_slot immediately.
  	 * But if it's at the cursor or has rmap_items linked to it, use
  	 * mmap_sem to synchronize with any break_cows before pagetables
  	 * are freed, and leave the mm_slot on the list for ksmd to free.
  	 * Beware: ksm may already have noticed it exiting and freed the slot.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2431
  	 */
9ba692948   Hugh Dickins   ksm: fix oom dead...
2432

cd551f975   Hugh Dickins   ksm: distribute r...
2433
2434
  	spin_lock(&ksm_mmlist_lock);
  	mm_slot = get_mm_slot(mm);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2435
  	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
6514d511d   Hugh Dickins   ksm: singly-linke...
2436
  		if (!mm_slot->rmap_list) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
2437
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2438
2439
2440
2441
2442
2443
  			list_del(&mm_slot->mm_list);
  			easy_to_free = 1;
  		} else {
  			list_move(&mm_slot->mm_list,
  				  &ksm_scan.mm_slot->mm_list);
  		}
cd551f975   Hugh Dickins   ksm: distribute r...
2444
  	}
cd551f975   Hugh Dickins   ksm: distribute r...
2445
  	spin_unlock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2446
2447
2448
2449
2450
  	if (easy_to_free) {
  		free_mm_slot(mm_slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
  		mmdrop(mm);
  	} else if (mm_slot) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
2451
2452
  		down_write(&mm->mmap_sem);
  		up_write(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2453
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2454
  }
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2455
  struct page *ksm_might_need_to_copy(struct page *page,
5ad646880   Hugh Dickins   ksm: let shared p...
2456
2457
  			struct vm_area_struct *vma, unsigned long address)
  {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2458
  	struct anon_vma *anon_vma = page_anon_vma(page);
5ad646880   Hugh Dickins   ksm: let shared p...
2459
  	struct page *new_page;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
  	if (PageKsm(page)) {
  		if (page_stable_node(page) &&
  		    !(ksm_run & KSM_RUN_UNMERGE))
  			return page;	/* no need to copy it */
  	} else if (!anon_vma) {
  		return page;		/* no need to copy it */
  	} else if (anon_vma->root == vma->anon_vma->root &&
  		 page->index == linear_page_index(vma, address)) {
  		return page;		/* still no need to copy it */
  	}
  	if (!PageUptodate(page))
  		return page;		/* let do_swap_page report the error */
5ad646880   Hugh Dickins   ksm: let shared p...
2472
2473
2474
2475
2476
2477
  	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
  	if (new_page) {
  		copy_user_highpage(new_page, page, address, vma);
  
  		SetPageDirty(new_page);
  		__SetPageUptodate(new_page);
48c935ad8   Kirill A. Shutemov   page-flags: defin...
2478
  		__SetPageLocked(new_page);
5ad646880   Hugh Dickins   ksm: let shared p...
2479
  	}
5ad646880   Hugh Dickins   ksm: let shared p...
2480
2481
  	return new_page;
  }
1df631ae1   Minchan Kim   mm: make rmap_wal...
2482
  void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2483
2484
  {
  	struct stable_node *stable_node;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2485
  	struct rmap_item *rmap_item;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2486
  	int search_new_forks = 0;
309381fea   Sasha Levin   mm: dump page whe...
2487
  	VM_BUG_ON_PAGE(!PageKsm(page), page);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
2488
2489
2490
2491
2492
  
  	/*
  	 * Rely on the page lock to protect against concurrent modifications
  	 * to that page's node of the stable tree.
  	 */
309381fea   Sasha Levin   mm: dump page whe...
2493
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2494
2495
2496
  
  	stable_node = page_stable_node(page);
  	if (!stable_node)
1df631ae1   Minchan Kim   mm: make rmap_wal...
2497
  		return;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2498
  again:
b67bfe0d4   Sasha Levin   hlist: drop the n...
2499
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2500
  		struct anon_vma *anon_vma = rmap_item->anon_vma;
5beb49305   Rik van Riel   mm: change anon_v...
2501
  		struct anon_vma_chain *vmac;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2502
  		struct vm_area_struct *vma;
ad12695f1   Andrea Arcangeli   ksm: add cond_res...
2503
  		cond_resched();
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
2504
  		anon_vma_lock_read(anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
2505
2506
  		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
  					       0, ULONG_MAX) {
1105a2fc0   Jia He   mm/ksm.c: ignore ...
2507
  			unsigned long addr;
ad12695f1   Andrea Arcangeli   ksm: add cond_res...
2508
  			cond_resched();
5beb49305   Rik van Riel   mm: change anon_v...
2509
  			vma = vmac->vma;
1105a2fc0   Jia He   mm/ksm.c: ignore ...
2510
2511
2512
2513
2514
  
  			/* Ignore the stable/unstable/sqnr flags */
  			addr = rmap_item->address & ~KSM_FLAG_MASK;
  
  			if (addr < vma->vm_start || addr >= vma->vm_end)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2515
2516
2517
2518
2519
2520
2521
2522
2523
  				continue;
  			/*
  			 * Initially we examine only the vma which covers this
  			 * rmap_item; but later, if there is still work to do,
  			 * we examine covering vmas in other mms: in case they
  			 * were forked from the original since ksmd passed.
  			 */
  			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
  				continue;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
2524
2525
  			if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  				continue;
1105a2fc0   Jia He   mm/ksm.c: ignore ...
2526
  			if (!rwc->rmap_one(page, vma, addr, rwc->arg)) {
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
2527
  				anon_vma_unlock_read(anon_vma);
1df631ae1   Minchan Kim   mm: make rmap_wal...
2528
  				return;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2529
  			}
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
2530
2531
  			if (rwc->done && rwc->done(page)) {
  				anon_vma_unlock_read(anon_vma);
1df631ae1   Minchan Kim   mm: make rmap_wal...
2532
  				return;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
2533
  			}
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2534
  		}
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
2535
  		anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2536
2537
2538
  	}
  	if (!search_new_forks++)
  		goto again;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2539
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
2540
  #ifdef CONFIG_MIGRATION
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2541
2542
2543
  void ksm_migrate_page(struct page *newpage, struct page *oldpage)
  {
  	struct stable_node *stable_node;
309381fea   Sasha Levin   mm: dump page whe...
2544
2545
2546
  	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
  	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
  	VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2547
2548
2549
  
  	stable_node = page_stable_node(newpage);
  	if (stable_node) {
309381fea   Sasha Levin   mm: dump page whe...
2550
  		VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage);
62b61f611   Hugh Dickins   ksm: memory hotre...
2551
  		stable_node->kpfn = page_to_pfn(newpage);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
2552
2553
2554
2555
2556
2557
2558
2559
  		/*
  		 * newpage->mapping was set in advance; now we need smp_wmb()
  		 * to make sure that the new stable_node->kpfn is visible
  		 * to get_ksm_page() before it can see that oldpage->mapping
  		 * has gone stale (or that PageSwapCache has been cleared).
  		 */
  		smp_wmb();
  		set_page_stable_node(oldpage, NULL);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2560
2561
2562
  	}
  }
  #endif /* CONFIG_MIGRATION */
62b61f611   Hugh Dickins   ksm: memory hotre...
2563
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2564
2565
2566
2567
2568
  static void wait_while_offlining(void)
  {
  	while (ksm_run & KSM_RUN_OFFLINE) {
  		mutex_unlock(&ksm_thread_mutex);
  		wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
743162013   NeilBrown   sched: Remove pro...
2569
  			    TASK_UNINTERRUPTIBLE);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2570
2571
2572
  		mutex_lock(&ksm_thread_mutex);
  	}
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
  static bool stable_node_dup_remove_range(struct stable_node *stable_node,
  					 unsigned long start_pfn,
  					 unsigned long end_pfn)
  {
  	if (stable_node->kpfn >= start_pfn &&
  	    stable_node->kpfn < end_pfn) {
  		/*
  		 * Don't get_ksm_page, page has already gone:
  		 * which is why we keep kpfn instead of page*
  		 */
  		remove_node_from_stable_tree(stable_node);
  		return true;
  	}
  	return false;
  }
  
  static bool stable_node_chain_remove_range(struct stable_node *stable_node,
  					   unsigned long start_pfn,
  					   unsigned long end_pfn,
  					   struct rb_root *root)
  {
  	struct stable_node *dup;
  	struct hlist_node *hlist_safe;
  
  	if (!is_stable_node_chain(stable_node)) {
  		VM_BUG_ON(is_stable_node_dup(stable_node));
  		return stable_node_dup_remove_range(stable_node, start_pfn,
  						    end_pfn);
  	}
  
  	hlist_for_each_entry_safe(dup, hlist_safe,
  				  &stable_node->hlist, hlist_dup) {
  		VM_BUG_ON(!is_stable_node_dup(dup));
  		stable_node_dup_remove_range(dup, start_pfn, end_pfn);
  	}
  	if (hlist_empty(&stable_node->hlist)) {
  		free_stable_node_chain(stable_node, root);
  		return true; /* notify caller that tree was rebalanced */
  	} else
  		return false;
  }
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2614
2615
  static void ksm_check_stable_tree(unsigned long start_pfn,
  				  unsigned long end_pfn)
62b61f611   Hugh Dickins   ksm: memory hotre...
2616
  {
036404183   Geliang Tang   mm/ksm.c: use lis...
2617
  	struct stable_node *stable_node, *next;
62b61f611   Hugh Dickins   ksm: memory hotre...
2618
  	struct rb_node *node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
2619
  	int nid;
62b61f611   Hugh Dickins   ksm: memory hotre...
2620

ef53d16cd   Hugh Dickins   ksm: allocate roo...
2621
2622
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
  		node = rb_first(root_stable_tree + nid);
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2623
  		while (node) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
2624
  			stable_node = rb_entry(node, struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2625
2626
2627
2628
  			if (stable_node_chain_remove_range(stable_node,
  							   start_pfn, end_pfn,
  							   root_stable_tree +
  							   nid))
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2629
  				node = rb_first(root_stable_tree + nid);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2630
  			else
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2631
2632
  				node = rb_next(node);
  			cond_resched();
90bd6fd31   Petr Holasek   ksm: allow trees ...
2633
  		}
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2634
  	}
036404183   Geliang Tang   mm/ksm.c: use lis...
2635
  	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
2636
2637
2638
2639
2640
  		if (stable_node->kpfn >= start_pfn &&
  		    stable_node->kpfn < end_pfn)
  			remove_node_from_stable_tree(stable_node);
  		cond_resched();
  	}
62b61f611   Hugh Dickins   ksm: memory hotre...
2641
2642
2643
2644
2645
2646
  }
  
  static int ksm_memory_callback(struct notifier_block *self,
  			       unsigned long action, void *arg)
  {
  	struct memory_notify *mn = arg;
62b61f611   Hugh Dickins   ksm: memory hotre...
2647
2648
2649
2650
  
  	switch (action) {
  	case MEM_GOING_OFFLINE:
  		/*
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2651
2652
2653
2654
2655
  		 * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
  		 * and remove_all_stable_nodes() while memory is going offline:
  		 * it is unsafe for them to touch the stable tree at this time.
  		 * But unmerge_ksm_pages(), rmap lookups and other entry points
  		 * which do not need the ksm_thread_mutex are all safe.
62b61f611   Hugh Dickins   ksm: memory hotre...
2656
  		 */
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2657
2658
2659
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run |= KSM_RUN_OFFLINE;
  		mutex_unlock(&ksm_thread_mutex);
62b61f611   Hugh Dickins   ksm: memory hotre...
2660
2661
2662
2663
2664
2665
  		break;
  
  	case MEM_OFFLINE:
  		/*
  		 * Most of the work is done by page migration; but there might
  		 * be a few stable_nodes left over, still pointing to struct
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2666
2667
2668
  		 * pages which have been offlined: prune those from the tree,
  		 * otherwise get_ksm_page() might later try to access a
  		 * non-existent struct page.
62b61f611   Hugh Dickins   ksm: memory hotre...
2669
  		 */
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2670
2671
  		ksm_check_stable_tree(mn->start_pfn,
  				      mn->start_pfn + mn->nr_pages);
62b61f611   Hugh Dickins   ksm: memory hotre...
2672
2673
2674
  		/* fallthrough */
  
  	case MEM_CANCEL_OFFLINE:
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2675
2676
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run &= ~KSM_RUN_OFFLINE;
62b61f611   Hugh Dickins   ksm: memory hotre...
2677
  		mutex_unlock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2678
2679
2680
  
  		smp_mb();	/* wake_up_bit advises this */
  		wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE));
62b61f611   Hugh Dickins   ksm: memory hotre...
2681
2682
2683
2684
  		break;
  	}
  	return NOTIFY_OK;
  }
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2685
2686
2687
2688
  #else
  static void wait_while_offlining(void)
  {
  }
62b61f611   Hugh Dickins   ksm: memory hotre...
2689
  #endif /* CONFIG_MEMORY_HOTREMOVE */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2690
2691
2692
2693
  #ifdef CONFIG_SYSFS
  /*
   * This all compiles without CONFIG_SYSFS, but is a waste of space.
   */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
  #define KSM_ATTR_RO(_name) \
  	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
  #define KSM_ATTR(_name) \
  	static struct kobj_attribute _name##_attr = \
  		__ATTR(_name, 0644, _name##_show, _name##_store)
  
  static ssize_t sleep_millisecs_show(struct kobject *kobj,
  				    struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_sleep_millisecs);
  }
  
  static ssize_t sleep_millisecs_store(struct kobject *kobj,
  				     struct kobj_attribute *attr,
  				     const char *buf, size_t count)
  {
  	unsigned long msecs;
  	int err;
3dbb95f78   Jingoo Han   mm: replace stric...
2713
  	err = kstrtoul(buf, 10, &msecs);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
  	if (err || msecs > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_sleep_millisecs = msecs;
  
  	return count;
  }
  KSM_ATTR(sleep_millisecs);
  
  static ssize_t pages_to_scan_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_pages_to_scan);
  }
  
  static ssize_t pages_to_scan_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long nr_pages;
3dbb95f78   Jingoo Han   mm: replace stric...
2736
  	err = kstrtoul(buf, 10, &nr_pages);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
  	if (err || nr_pages > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_pages_to_scan = nr_pages;
  
  	return count;
  }
  KSM_ATTR(pages_to_scan);
  
  static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
  			char *buf)
  {
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2749
2750
  	return sprintf(buf, "%lu
  ", ksm_run);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2751
2752
2753
2754
2755
2756
2757
  }
  
  static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
  			 const char *buf, size_t count)
  {
  	int err;
  	unsigned long flags;
3dbb95f78   Jingoo Han   mm: replace stric...
2758
  	err = kstrtoul(buf, 10, &flags);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2759
2760
2761
2762
2763
2764
2765
2766
  	if (err || flags > UINT_MAX)
  		return -EINVAL;
  	if (flags > KSM_RUN_UNMERGE)
  		return -EINVAL;
  
  	/*
  	 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
  	 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
d0f209f68   Hugh Dickins   ksm: remove unswa...
2767
2768
  	 * breaking COW to free the pages_shared (but leaves mm_slots
  	 * on the list for when ksmd may be set running again).
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2769
2770
2771
  	 */
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2772
  	wait_while_offlining();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2773
2774
  	if (ksm_run != flags) {
  		ksm_run = flags;
d952b7913   Hugh Dickins   ksm: fix endless ...
2775
  		if (flags & KSM_RUN_UNMERGE) {
e1e12d2f3   David Rientjes   mm, oom: fix race...
2776
  			set_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2777
  			err = unmerge_and_remove_all_rmap_items();
e1e12d2f3   David Rientjes   mm, oom: fix race...
2778
  			clear_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2779
2780
2781
2782
2783
  			if (err) {
  				ksm_run = KSM_RUN_STOP;
  				count = err;
  			}
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2784
2785
2786
2787
2788
2789
2790
2791
2792
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	if (flags & KSM_RUN_MERGE)
  		wake_up_interruptible(&ksm_thread_wait);
  
  	return count;
  }
  KSM_ATTR(run);
90bd6fd31   Petr Holasek   ksm: allow trees ...
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
  #ifdef CONFIG_NUMA
  static ssize_t merge_across_nodes_show(struct kobject *kobj,
  				struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_merge_across_nodes);
  }
  
  static ssize_t merge_across_nodes_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long knob;
  
  	err = kstrtoul(buf, 10, &knob);
  	if (err)
  		return err;
  	if (knob > 1)
  		return -EINVAL;
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2815
  	wait_while_offlining();
90bd6fd31   Petr Holasek   ksm: allow trees ...
2816
  	if (ksm_merge_across_nodes != knob) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2817
  		if (ksm_pages_shared || remove_all_stable_nodes())
90bd6fd31   Petr Holasek   ksm: allow trees ...
2818
  			err = -EBUSY;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2819
2820
2821
2822
2823
2824
2825
2826
2827
  		else if (root_stable_tree == one_stable_tree) {
  			struct rb_root *buf;
  			/*
  			 * This is the first time that we switch away from the
  			 * default of merging across nodes: must now allocate
  			 * a buffer to hold as many roots as may be needed.
  			 * Allocate stable and unstable together:
  			 * MAXSMP NODES_SHIFT 10 will use 16kB.
  			 */
bafe1e144   Joe Perches   ksm: remove redun...
2828
2829
  			buf = kcalloc(nr_node_ids + nr_node_ids, sizeof(*buf),
  				      GFP_KERNEL);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
  			/* Let us assume that RB_ROOT is NULL is zero */
  			if (!buf)
  				err = -ENOMEM;
  			else {
  				root_stable_tree = buf;
  				root_unstable_tree = buf + nr_node_ids;
  				/* Stable tree is empty but not the unstable */
  				root_unstable_tree[0] = one_unstable_tree[0];
  			}
  		}
  		if (!err) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
2841
  			ksm_merge_across_nodes = knob;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2842
2843
  			ksm_nr_node_ids = knob ? 1 : nr_node_ids;
  		}
90bd6fd31   Petr Holasek   ksm: allow trees ...
2844
2845
2846
2847
2848
2849
2850
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	return err ? err : count;
  }
  KSM_ATTR(merge_across_nodes);
  #endif
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
  static ssize_t use_zero_pages_show(struct kobject *kobj,
  				struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_use_zero_pages);
  }
  static ssize_t use_zero_pages_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	bool value;
  
  	err = kstrtobool(buf, &value);
  	if (err)
  		return -EINVAL;
  
  	ksm_use_zero_pages = value;
  
  	return count;
  }
  KSM_ATTR(use_zero_pages);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
  static ssize_t max_page_sharing_show(struct kobject *kobj,
  				     struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_max_page_sharing);
  }
  
  static ssize_t max_page_sharing_store(struct kobject *kobj,
  				      struct kobj_attribute *attr,
  				      const char *buf, size_t count)
  {
  	int err;
  	int knob;
  
  	err = kstrtoint(buf, 10, &knob);
  	if (err)
  		return err;
  	/*
  	 * When a KSM page is created it is shared by 2 mappings. This
  	 * being a signed comparison, it implicitly verifies it's not
  	 * negative.
  	 */
  	if (knob < 2)
  		return -EINVAL;
  
  	if (READ_ONCE(ksm_max_page_sharing) == knob)
  		return count;
  
  	mutex_lock(&ksm_thread_mutex);
  	wait_while_offlining();
  	if (ksm_max_page_sharing != knob) {
  		if (ksm_pages_shared || remove_all_stable_nodes())
  			err = -EBUSY;
  		else
  			ksm_max_page_sharing = knob;
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	return err ? err : count;
  }
  KSM_ATTR(max_page_sharing);
b40282603   Hugh Dickins   ksm: rename kerne...
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
  static ssize_t pages_shared_show(struct kobject *kobj,
  				 struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_shared);
  }
  KSM_ATTR_RO(pages_shared);
  
  static ssize_t pages_sharing_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
e178dfde3   Hugh Dickins   ksm: move pages_s...
2925
2926
  	return sprintf(buf, "%lu
  ", ksm_pages_sharing);
b40282603   Hugh Dickins   ksm: rename kerne...
2927
2928
  }
  KSM_ATTR_RO(pages_sharing);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
  static ssize_t pages_unshared_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_unshared);
  }
  KSM_ATTR_RO(pages_unshared);
  
  static ssize_t pages_volatile_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	long ksm_pages_volatile;
  
  	ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
  				- ksm_pages_sharing - ksm_pages_unshared;
  	/*
  	 * It was not worth any locking to calculate that statistic,
  	 * but it might therefore sometimes be negative: conceal that.
  	 */
  	if (ksm_pages_volatile < 0)
  		ksm_pages_volatile = 0;
  	return sprintf(buf, "%ld
  ", ksm_pages_volatile);
  }
  KSM_ATTR_RO(pages_volatile);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
  static ssize_t stable_node_dups_show(struct kobject *kobj,
  				     struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_stable_node_dups);
  }
  KSM_ATTR_RO(stable_node_dups);
  
  static ssize_t stable_node_chains_show(struct kobject *kobj,
  				       struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_stable_node_chains);
  }
  KSM_ATTR_RO(stable_node_chains);
  
  static ssize_t
  stable_node_chains_prune_millisecs_show(struct kobject *kobj,
  					struct kobj_attribute *attr,
  					char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_stable_node_chains_prune_millisecs);
  }
  
  static ssize_t
  stable_node_chains_prune_millisecs_store(struct kobject *kobj,
  					 struct kobj_attribute *attr,
  					 const char *buf, size_t count)
  {
  	unsigned long msecs;
  	int err;
  
  	err = kstrtoul(buf, 10, &msecs);
  	if (err || msecs > UINT_MAX)
  		return -EINVAL;
  
  	ksm_stable_node_chains_prune_millisecs = msecs;
  
  	return count;
  }
  KSM_ATTR(stable_node_chains_prune_millisecs);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2996
2997
2998
2999
3000
3001
3002
  static ssize_t full_scans_show(struct kobject *kobj,
  			       struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_scan.seqnr);
  }
  KSM_ATTR_RO(full_scans);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3003
3004
3005
3006
  static struct attribute *ksm_attrs[] = {
  	&sleep_millisecs_attr.attr,
  	&pages_to_scan_attr.attr,
  	&run_attr.attr,
b40282603   Hugh Dickins   ksm: rename kerne...
3007
3008
  	&pages_shared_attr.attr,
  	&pages_sharing_attr.attr,
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
3009
3010
3011
  	&pages_unshared_attr.attr,
  	&pages_volatile_attr.attr,
  	&full_scans_attr.attr,
90bd6fd31   Petr Holasek   ksm: allow trees ...
3012
3013
3014
  #ifdef CONFIG_NUMA
  	&merge_across_nodes_attr.attr,
  #endif
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
3015
3016
3017
3018
  	&max_page_sharing_attr.attr,
  	&stable_node_chains_attr.attr,
  	&stable_node_dups_attr.attr,
  	&stable_node_chains_prune_millisecs_attr.attr,
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
3019
  	&use_zero_pages_attr.attr,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3020
3021
  	NULL,
  };
f907c26a9   Arvind Yadav   mm/ksm.c: constif...
3022
  static const struct attribute_group ksm_attr_group = {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3023
3024
3025
  	.attrs = ksm_attrs,
  	.name = "ksm",
  };
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3026
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3027
3028
3029
3030
3031
  
  static int __init ksm_init(void)
  {
  	struct task_struct *ksm_thread;
  	int err;
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
3032
3033
3034
3035
  	/* The correct value depends on page size and endianness */
  	zero_checksum = calc_checksum(ZERO_PAGE(0));
  	/* Default to false for backwards compatibility */
  	ksm_use_zero_pages = false;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3036
3037
3038
  	err = ksm_slab_init();
  	if (err)
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3039
3040
  	ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
  	if (IS_ERR(ksm_thread)) {
25acde317   Paul McQuade   mm: ksm use pr_er...
3041
3042
  		pr_err("ksm: creating kthread failed
  ");
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3043
  		err = PTR_ERR(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
3044
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3045
  	}
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3046
  #ifdef CONFIG_SYSFS
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3047
3048
  	err = sysfs_create_group(mm_kobj, &ksm_attr_group);
  	if (err) {
25acde317   Paul McQuade   mm: ksm use pr_er...
3049
3050
  		pr_err("ksm: register sysfs failed
  ");
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3051
  		kthread_stop(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
3052
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3053
  	}
c73602ad3   Hugh Dickins   ksm: more on defa...
3054
3055
  #else
  	ksm_run = KSM_RUN_MERGE;	/* no way for user to start it */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3056
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3057

62b61f611   Hugh Dickins   ksm: memory hotre...
3058
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
3059
  	/* There is no significance to this priority 100 */
62b61f611   Hugh Dickins   ksm: memory hotre...
3060
3061
  	hotplug_memory_notifier(ksm_memory_callback, 100);
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3062
  	return 0;
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
3063
  out_free:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3064
3065
3066
  	ksm_slab_free();
  out:
  	return err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
3067
  }
a64fb3cd6   Paul Gortmaker   mm: audit/fix non...
3068
  subsys_initcall(ksm_init);