Blame view

mm/ksm.c 89.5 KB
7a338472f   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-only
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3
4
5
6
7
   * Memory merging support.
   *
   * This code enables dynamic sharing of identical pages found in different
   * memory areas, even if they are not shared by fork()
   *
36b2528dc   Izik Eidus   ksm: change copyr...
8
   * Copyright (C) 2008-2009 Red Hat, Inc.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
9
10
11
12
   * Authors:
   *	Izik Eidus
   *	Andrea Arcangeli
   *	Chris Wright
36b2528dc   Izik Eidus   ksm: change copyr...
13
   *	Hugh Dickins
f8af4da3b   Hugh Dickins   ksm: the mm inter...
14
15
16
   */
  
  #include <linux/errno.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
17
18
  #include <linux/mm.h>
  #include <linux/fs.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
19
  #include <linux/mman.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
20
  #include <linux/sched.h>
6e84f3152   Ingo Molnar   sched/headers: Pr...
21
  #include <linux/sched/mm.h>
f7ccbae45   Ingo Molnar   sched/headers: Pr...
22
  #include <linux/sched/coredump.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
23
24
25
26
  #include <linux/rwsem.h>
  #include <linux/pagemap.h>
  #include <linux/rmap.h>
  #include <linux/spinlock.h>
59e1a2f4b   Timofey Titovets   ksm: replace jhas...
27
  #include <linux/xxhash.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
28
29
30
31
32
  #include <linux/delay.h>
  #include <linux/kthread.h>
  #include <linux/wait.h>
  #include <linux/slab.h>
  #include <linux/rbtree.h>
62b61f611   Hugh Dickins   ksm: memory hotre...
33
  #include <linux/memory.h>
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
34
  #include <linux/mmu_notifier.h>
2c6854fda   Izik Eidus   ksm: change defau...
35
  #include <linux/swap.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
36
  #include <linux/ksm.h>
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
37
  #include <linux/hashtable.h>
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
38
  #include <linux/freezer.h>
72788c385   David Rientjes   oom: replace PF_O...
39
  #include <linux/oom.h>
90bd6fd31   Petr Holasek   ksm: allow trees ...
40
  #include <linux/numa.h>
f8af4da3b   Hugh Dickins   ksm: the mm inter...
41

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
42
  #include <asm/tlbflush.h>
73848b468   Hugh Dickins   ksm: fix mlockfre...
43
  #include "internal.h"
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
44

e850dcf53   Hugh Dickins   ksm: trivial tidyups
45
46
47
48
49
50
51
  #ifdef CONFIG_NUMA
  #define NUMA(x)		(x)
  #define DO_NUMA(x)	do { (x); } while (0)
  #else
  #define NUMA(x)		(0)
  #define DO_NUMA(x)	do { } while (0)
  #endif
5a2ca3efe   Mike Rapoport   mm/ksm: docs: ext...
52
53
54
  /**
   * DOC: Overview
   *
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
   * A few notes about the KSM scanning process,
   * to make it easier to understand the data structures below:
   *
   * In order to reduce excessive scanning, KSM sorts the memory pages by their
   * contents into a data structure that holds pointers to the pages' locations.
   *
   * Since the contents of the pages may change at any moment, KSM cannot just
   * insert the pages into a normal sorted tree and expect it to find anything.
   * Therefore KSM uses two data structures - the stable and the unstable tree.
   *
   * The stable tree holds pointers to all the merged pages (ksm pages), sorted
   * by their contents.  Because each such page is write-protected, searching on
   * this tree is fully assured to be working (except when pages are unmapped),
   * and therefore this tree is called the stable tree.
   *
5a2ca3efe   Mike Rapoport   mm/ksm: docs: ext...
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
   * The stable tree node includes information required for reverse
   * mapping from a KSM page to virtual addresses that map this page.
   *
   * In order to avoid large latencies of the rmap walks on KSM pages,
   * KSM maintains two types of nodes in the stable tree:
   *
   * * the regular nodes that keep the reverse mapping structures in a
   *   linked list
   * * the "chains" that link nodes ("dups") that represent the same
   *   write protected memory content, but each "dup" corresponds to a
   *   different KSM page copy of that content
   *
   * Internally, the regular nodes, "dups" and "chains" are represented
   * using the same :c:type:`struct stable_node` structure.
   *
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
   * In addition to the stable tree, KSM uses a second data structure called the
   * unstable tree: this tree holds pointers to pages which have been found to
   * be "unchanged for a period of time".  The unstable tree sorts these pages
   * by their contents, but since they are not write-protected, KSM cannot rely
   * upon the unstable tree to work correctly - the unstable tree is liable to
   * be corrupted as its contents are modified, and so it is called unstable.
   *
   * KSM solves this problem by several techniques:
   *
   * 1) The unstable tree is flushed every time KSM completes scanning all
   *    memory areas, and then the tree is rebuilt again from the beginning.
   * 2) KSM will only insert into the unstable tree, pages whose hash value
   *    has not changed since the previous scan of all memory areas.
   * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the
   *    colors of the nodes and not on their contents, assuring that even when
   *    the tree gets "corrupted" it won't get out of balance, so scanning time
   *    remains the same (also, searching and inserting nodes in an rbtree uses
   *    the same algorithm, so we have no overhead when we flush and rebuild).
   * 4) KSM never flushes the stable tree, which means that even if it were to
   *    take 10 attempts to find a page in the unstable tree, once it is found,
   *    it is secured in the stable tree.  (When we scan a new page, we first
   *    compare it against the stable tree, and then against the unstable tree.)
8fdb3dbf0   Hugh Dickins   ksm: add some com...
107
108
109
   *
   * If the merge_across_nodes tunable is unset, then KSM maintains multiple
   * stable trees and multiple unstable trees: one of each for each NUMA node.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
110
111
112
113
114
115
   */
  
  /**
   * struct mm_slot - ksm information per mm that is being scanned
   * @link: link to the mm_slots hash list
   * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
6514d511d   Hugh Dickins   ksm: singly-linke...
116
   * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
117
118
119
120
121
   * @mm: the mm that this information is valid for
   */
  struct mm_slot {
  	struct hlist_node link;
  	struct list_head mm_list;
6514d511d   Hugh Dickins   ksm: singly-linke...
122
  	struct rmap_item *rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
123
124
125
126
127
128
129
  	struct mm_struct *mm;
  };
  
  /**
   * struct ksm_scan - cursor for scanning
   * @mm_slot: the current mm_slot we are scanning
   * @address: the next address inside that to be scanned
6514d511d   Hugh Dickins   ksm: singly-linke...
130
   * @rmap_list: link to the next rmap to be scanned in the rmap_list
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
131
132
133
134
135
136
137
   * @seqnr: count of completed full scans (needed when removing unstable node)
   *
   * There is only the one ksm_scan instance of this cursor structure.
   */
  struct ksm_scan {
  	struct mm_slot *mm_slot;
  	unsigned long address;
6514d511d   Hugh Dickins   ksm: singly-linke...
138
  	struct rmap_item **rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
139
140
141
142
  	unsigned long seqnr;
  };
  
  /**
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
143
144
   * struct stable_node - node of the stable rbtree
   * @node: rb node of this ksm page in the stable tree
4146d2d67   Hugh Dickins   ksm: make !merge_...
145
   * @head: (overlaying parent) &migrate_nodes indicates temporarily on that list
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
146
   * @hlist_dup: linked into the stable_node->hlist with a stable_node chain
4146d2d67   Hugh Dickins   ksm: make !merge_...
147
   * @list: linked into migrate_nodes, pending placement in the proper node tree
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
148
   * @hlist: hlist head of rmap_items using this ksm page
4146d2d67   Hugh Dickins   ksm: make !merge_...
149
   * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
150
151
   * @chain_prune_time: time of the last full garbage collection
   * @rmap_hlist_len: number of rmap_item entries in hlist or STABLE_NODE_CHAIN
4146d2d67   Hugh Dickins   ksm: make !merge_...
152
   * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
153
154
   */
  struct stable_node {
4146d2d67   Hugh Dickins   ksm: make !merge_...
155
156
157
158
  	union {
  		struct rb_node node;	/* when node of stable tree */
  		struct {		/* when listed for migration */
  			struct list_head *head;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
159
160
161
162
  			struct {
  				struct hlist_node hlist_dup;
  				struct list_head list;
  			};
4146d2d67   Hugh Dickins   ksm: make !merge_...
163
164
  		};
  	};
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
165
  	struct hlist_head hlist;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
166
167
168
169
170
171
172
173
174
175
176
  	union {
  		unsigned long kpfn;
  		unsigned long chain_prune_time;
  	};
  	/*
  	 * STABLE_NODE_CHAIN can be any negative number in
  	 * rmap_hlist_len negative range, but better not -1 to be able
  	 * to reliably detect underflows.
  	 */
  #define STABLE_NODE_CHAIN -1024
  	int rmap_hlist_len;
4146d2d67   Hugh Dickins   ksm: make !merge_...
177
178
179
  #ifdef CONFIG_NUMA
  	int nid;
  #endif
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
180
181
182
  };
  
  /**
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
183
   * struct rmap_item - reverse mapping item for virtual addresses
6514d511d   Hugh Dickins   ksm: singly-linke...
184
   * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
db114b83a   Hugh Dickins   ksm: hold anon_vm...
185
   * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
186
   * @nid: NUMA node id of unstable tree in which linked (may not match page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
187
188
189
   * @mm: the memory structure this rmap_item is pointing into
   * @address: the virtual address this rmap_item tracks (+ flags in low bits)
   * @oldchecksum: previous checksum of the page at that virtual address
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
190
191
192
   * @node: rb node of this rmap_item in the unstable tree
   * @head: pointer to stable_node heading this list in the stable tree
   * @hlist: link into hlist of rmap_items hanging off that stable_node
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
193
194
   */
  struct rmap_item {
6514d511d   Hugh Dickins   ksm: singly-linke...
195
  	struct rmap_item *rmap_list;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
196
197
198
199
200
201
  	union {
  		struct anon_vma *anon_vma;	/* when stable */
  #ifdef CONFIG_NUMA
  		int nid;		/* when node of unstable tree */
  #endif
  	};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
202
203
  	struct mm_struct *mm;
  	unsigned long address;		/* + low bits used for flags below */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
204
  	unsigned int oldchecksum;	/* when unstable */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
205
  	union {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
206
207
208
209
210
  		struct rb_node node;	/* when node of unstable tree */
  		struct {		/* when listed from stable tree */
  			struct stable_node *head;
  			struct hlist_node hlist;
  		};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
211
212
213
214
  	};
  };
  
  #define SEQNR_MASK	0x0ff	/* low bits of unstable tree seqnr */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
215
216
  #define UNSTABLE_FLAG	0x100	/* is a node of the unstable tree */
  #define STABLE_FLAG	0x200	/* is listed from the stable tree */
1105a2fc0   Jia He   mm/ksm.c: ignore ...
217
218
  #define KSM_FLAG_MASK	(SEQNR_MASK|UNSTABLE_FLAG|STABLE_FLAG)
  				/* to mask all the flags */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
219
220
  
  /* The stable and unstable tree heads */
ef53d16cd   Hugh Dickins   ksm: allocate roo...
221
222
223
224
  static struct rb_root one_stable_tree[1] = { RB_ROOT };
  static struct rb_root one_unstable_tree[1] = { RB_ROOT };
  static struct rb_root *root_stable_tree = one_stable_tree;
  static struct rb_root *root_unstable_tree = one_unstable_tree;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
225

4146d2d67   Hugh Dickins   ksm: make !merge_...
226
227
  /* Recently migrated nodes of stable tree, pending proper placement */
  static LIST_HEAD(migrate_nodes);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
228
  #define STABLE_NODE_DUP_HEAD ((struct list_head *)&migrate_nodes.prev)
4146d2d67   Hugh Dickins   ksm: make !merge_...
229

4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
230
231
  #define MM_SLOTS_HASH_BITS 10
  static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
232
233
234
235
236
237
238
239
240
  
  static struct mm_slot ksm_mm_head = {
  	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
  };
  static struct ksm_scan ksm_scan = {
  	.mm_slot = &ksm_mm_head,
  };
  
  static struct kmem_cache *rmap_item_cache;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
241
  static struct kmem_cache *stable_node_cache;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
242
243
244
  static struct kmem_cache *mm_slot_cache;
  
  /* The number of nodes in the stable tree */
b40282603   Hugh Dickins   ksm: rename kerne...
245
  static unsigned long ksm_pages_shared;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
246

e178dfde3   Hugh Dickins   ksm: move pages_s...
247
  /* The number of page slots additionally sharing those nodes */
b40282603   Hugh Dickins   ksm: rename kerne...
248
  static unsigned long ksm_pages_sharing;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
249

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
250
251
252
253
254
  /* The number of nodes in the unstable tree */
  static unsigned long ksm_pages_unshared;
  
  /* The number of rmap_items in use: to calculate pages_volatile */
  static unsigned long ksm_rmap_items;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
255
256
257
258
259
260
261
262
263
264
265
  /* The number of stable_node chains */
  static unsigned long ksm_stable_node_chains;
  
  /* The number of stable_node dups linked to the stable_node chains */
  static unsigned long ksm_stable_node_dups;
  
  /* Delay in pruning stale stable_node_dups in the stable_node_chains */
  static int ksm_stable_node_chains_prune_millisecs = 2000;
  
  /* Maximum number of page slots sharing a stable node */
  static int ksm_max_page_sharing = 256;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
266
  /* Number of pages ksmd should scan in one batch */
2c6854fda   Izik Eidus   ksm: change defau...
267
  static unsigned int ksm_thread_pages_to_scan = 100;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
268
269
  
  /* Milliseconds ksmd should sleep between batches */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
270
  static unsigned int ksm_thread_sleep_millisecs = 20;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
271

e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
272
273
274
275
276
  /* Checksum of an empty (zeroed) page */
  static unsigned int zero_checksum __read_mostly;
  
  /* Whether to merge empty (zeroed) pages with actual zero pages */
  static bool ksm_use_zero_pages __read_mostly;
e850dcf53   Hugh Dickins   ksm: trivial tidyups
277
  #ifdef CONFIG_NUMA
90bd6fd31   Petr Holasek   ksm: allow trees ...
278
279
  /* Zeroed when merging across nodes is not allowed */
  static unsigned int ksm_merge_across_nodes = 1;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
280
  static int ksm_nr_node_ids = 1;
e850dcf53   Hugh Dickins   ksm: trivial tidyups
281
282
  #else
  #define ksm_merge_across_nodes	1U
ef53d16cd   Hugh Dickins   ksm: allocate roo...
283
  #define ksm_nr_node_ids		1
e850dcf53   Hugh Dickins   ksm: trivial tidyups
284
  #endif
90bd6fd31   Petr Holasek   ksm: allow trees ...
285

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
286
287
288
  #define KSM_RUN_STOP	0
  #define KSM_RUN_MERGE	1
  #define KSM_RUN_UNMERGE	2
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
289
290
291
  #define KSM_RUN_OFFLINE	4
  static unsigned long ksm_run = KSM_RUN_STOP;
  static void wait_while_offlining(void);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
292
293
  
  static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
fcf9a0ef8   Kirill Tkhai   ksm: react on cha...
294
  static DECLARE_WAIT_QUEUE_HEAD(ksm_iter_wait);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
295
296
297
298
299
300
301
302
303
304
305
306
  static DEFINE_MUTEX(ksm_thread_mutex);
  static DEFINE_SPINLOCK(ksm_mmlist_lock);
  
  #define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
  		sizeof(struct __struct), __alignof__(struct __struct),\
  		(__flags), NULL)
  
  static int __init ksm_slab_init(void)
  {
  	rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
  	if (!rmap_item_cache)
  		goto out;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
307
308
309
  	stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
  	if (!stable_node_cache)
  		goto out_free1;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
310
311
  	mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
  	if (!mm_slot_cache)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
312
  		goto out_free2;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
313
314
  
  	return 0;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
315
316
317
  out_free2:
  	kmem_cache_destroy(stable_node_cache);
  out_free1:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
318
319
320
321
322
323
324
325
  	kmem_cache_destroy(rmap_item_cache);
  out:
  	return -ENOMEM;
  }
  
  static void __init ksm_slab_free(void)
  {
  	kmem_cache_destroy(mm_slot_cache);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
326
  	kmem_cache_destroy(stable_node_cache);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
327
328
329
  	kmem_cache_destroy(rmap_item_cache);
  	mm_slot_cache = NULL;
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
  static __always_inline bool is_stable_node_chain(struct stable_node *chain)
  {
  	return chain->rmap_hlist_len == STABLE_NODE_CHAIN;
  }
  
  static __always_inline bool is_stable_node_dup(struct stable_node *dup)
  {
  	return dup->head == STABLE_NODE_DUP_HEAD;
  }
  
  static inline void stable_node_chain_add_dup(struct stable_node *dup,
  					     struct stable_node *chain)
  {
  	VM_BUG_ON(is_stable_node_dup(dup));
  	dup->head = STABLE_NODE_DUP_HEAD;
  	VM_BUG_ON(!is_stable_node_chain(chain));
  	hlist_add_head(&dup->hlist_dup, &chain->hlist);
  	ksm_stable_node_dups++;
  }
  
  static inline void __stable_node_dup_del(struct stable_node *dup)
  {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
352
  	VM_BUG_ON(!is_stable_node_dup(dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
  	hlist_del(&dup->hlist_dup);
  	ksm_stable_node_dups--;
  }
  
  static inline void stable_node_dup_del(struct stable_node *dup)
  {
  	VM_BUG_ON(is_stable_node_chain(dup));
  	if (is_stable_node_dup(dup))
  		__stable_node_dup_del(dup);
  	else
  		rb_erase(&dup->node, root_stable_tree + NUMA(dup->nid));
  #ifdef CONFIG_DEBUG_VM
  	dup->head = NULL;
  #endif
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
368
369
  static inline struct rmap_item *alloc_rmap_item(void)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
370
  	struct rmap_item *rmap_item;
5b398e416   zhong jiang   mm,ksm: fix endle...
371
372
  	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
  						__GFP_NORETRY | __GFP_NOWARN);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
373
374
375
  	if (rmap_item)
  		ksm_rmap_items++;
  	return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
376
377
378
379
  }
  
  static inline void free_rmap_item(struct rmap_item *rmap_item)
  {
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
380
  	ksm_rmap_items--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
381
382
383
  	rmap_item->mm = NULL;	/* debug safety */
  	kmem_cache_free(rmap_item_cache, rmap_item);
  }
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
384
385
  static inline struct stable_node *alloc_stable_node(void)
  {
6213055f2   zhong jiang   mm,ksm: add __GFP...
386
387
388
389
390
391
  	/*
  	 * The allocation can take too long with GFP_KERNEL when memory is under
  	 * pressure, which may lead to hung task warnings.  Adding __GFP_HIGH
  	 * grants access to memory reserves, helping to avoid this problem.
  	 */
  	return kmem_cache_alloc(stable_node_cache, GFP_KERNEL | __GFP_HIGH);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
392
393
394
395
  }
  
  static inline void free_stable_node(struct stable_node *stable_node)
  {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
396
397
  	VM_BUG_ON(stable_node->rmap_hlist_len &&
  		  !is_stable_node_chain(stable_node));
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
398
399
  	kmem_cache_free(stable_node_cache, stable_node);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
400
401
402
403
404
405
406
407
408
409
410
  static inline struct mm_slot *alloc_mm_slot(void)
  {
  	if (!mm_slot_cache)	/* initialization failed */
  		return NULL;
  	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
  }
  
  static inline void free_mm_slot(struct mm_slot *mm_slot)
  {
  	kmem_cache_free(mm_slot_cache, mm_slot);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
411
412
  static struct mm_slot *get_mm_slot(struct mm_struct *mm)
  {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
413
  	struct mm_slot *slot;
b67bfe0d4   Sasha Levin   hlist: drop the n...
414
  	hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm)
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
415
416
  		if (slot->mm == mm)
  			return slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
417

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
418
419
420
421
422
423
  	return NULL;
  }
  
  static void insert_to_mm_slots_hash(struct mm_struct *mm,
  				    struct mm_slot *mm_slot)
  {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
424
  	mm_slot->mm = mm;
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
425
  	hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
426
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
427
  /*
a913e182a   Hugh Dickins   ksm: clean up obs...
428
429
430
431
432
433
434
435
436
437
438
439
440
   * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
   * page tables after it has passed through ksm_exit() - which, if necessary,
   * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
   * a special flag: they can just back out as soon as mm_users goes to zero.
   * ksm_test_exit() is used throughout to make this test for exit: in some
   * places for correctness, in some places just to avoid unnecessary work.
   */
  static inline bool ksm_test_exit(struct mm_struct *mm)
  {
  	return atomic_read(&mm->mm_users) == 0;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
441
442
   * We use break_ksm to break COW on a ksm page: it's a stripped down
   *
d4edcf0d5   Dave Hansen   mm/gup: Switch al...
443
   *	if (get_user_pages(addr, 1, 1, 1, &page, NULL) == 1)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
444
445
446
447
448
449
   *		put_page(page);
   *
   * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
   * in case the application has unmapped and remapped mm,addr meanwhile.
   * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
   * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
1b2ee1266   Dave Hansen   mm/core: Do not e...
450
451
452
453
   *
   * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context
   * of the process that owns 'vma'.  We also do not want to enforce
   * protection keys here anyway.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
454
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
455
  static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
456
457
  {
  	struct page *page;
50a7ca3c6   Souptick Joarder   mm: convert retur...
458
  	vm_fault_t ret = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
459
460
461
  
  	do {
  		cond_resched();
1b2ee1266   Dave Hansen   mm/core: Do not e...
462
463
  		page = follow_page(vma, addr,
  				FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
22eccdd7d   Dan Carpenter   ksm: check for ER...
464
  		if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
465
466
  			break;
  		if (PageKsm(page))
dcddffd41   Kirill A. Shutemov   mm: do not pass m...
467
468
  			ret = handle_mm_fault(vma, addr,
  					FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
469
470
471
  		else
  			ret = VM_FAULT_WRITE;
  		put_page(page);
33692f275   Linus Torvalds   vm: add VM_FAULT_...
472
  	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
d952b7913   Hugh Dickins   ksm: fix endless ...
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
  	/*
  	 * We must loop because handle_mm_fault() may back out if there's
  	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
  	 *
  	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
  	 * COW has been broken, even if the vma does not permit VM_WRITE;
  	 * but note that a concurrent fault might break PageKsm for us.
  	 *
  	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
  	 * backing file, which also invalidates anonymous pages: that's
  	 * okay, that truncation will have unmapped the PageKsm for us.
  	 *
  	 * VM_FAULT_OOM: at the time of writing (late July 2009), setting
  	 * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the
  	 * current task has TIF_MEMDIE set, and will be OOM killed on return
  	 * to user; and ksmd, having no mm, would never be chosen for that.
  	 *
  	 * But if the mm is in a limited mem_cgroup, then the fault may fail
  	 * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and
  	 * even ksmd can fail in this way - though it's usually breaking ksm
  	 * just to undo a merge it made a moment before, so unlikely to oom.
  	 *
  	 * That's a pity: we might therefore have more kernel pages allocated
  	 * than we're counting as nodes in the stable tree; but ksm_do_scan
  	 * will retry to break_cow on each pass, so should recover the page
  	 * in due course.  The important thing is to not let VM_MERGEABLE
  	 * be cleared while any such pages might remain in the area.
  	 */
  	return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
502
  }
ef6942224   Bob Liu   ksm: cleanup: int...
503
504
505
506
507
508
509
510
511
512
513
514
515
  static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
  		unsigned long addr)
  {
  	struct vm_area_struct *vma;
  	if (ksm_test_exit(mm))
  		return NULL;
  	vma = find_vma(mm, addr);
  	if (!vma || vma->vm_start > addr)
  		return NULL;
  	if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  		return NULL;
  	return vma;
  }
8dd3557a5   Hugh Dickins   ksm: cleanup some...
516
  static void break_cow(struct rmap_item *rmap_item)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
517
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
518
519
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
520
  	struct vm_area_struct *vma;
4035c07a8   Hugh Dickins   ksm: take keyhole...
521
522
523
524
  	/*
  	 * It is not an accident that whenever we want to break COW
  	 * to undo, we also need to drop a reference to the anon_vma.
  	 */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
525
  	put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
526

81464e306   Hugh Dickins   ksm: five little ...
527
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
528
529
530
  	vma = find_mergeable_vma(mm, addr);
  	if (vma)
  		break_ksm(vma, addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
531
532
533
534
535
536
537
538
539
540
541
  	up_read(&mm->mmap_sem);
  }
  
  static struct page *get_mergeable_page(struct rmap_item *rmap_item)
  {
  	struct mm_struct *mm = rmap_item->mm;
  	unsigned long addr = rmap_item->address;
  	struct vm_area_struct *vma;
  	struct page *page;
  
  	down_read(&mm->mmap_sem);
ef6942224   Bob Liu   ksm: cleanup: int...
542
543
  	vma = find_mergeable_vma(mm, addr);
  	if (!vma)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
544
545
546
  		goto out;
  
  	page = follow_page(vma, addr, FOLL_GET);
22eccdd7d   Dan Carpenter   ksm: check for ER...
547
  	if (IS_ERR_OR_NULL(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
548
  		goto out;
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
549
  	if (PageAnon(page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
550
551
552
553
  		flush_anon_page(vma, page, addr);
  		flush_dcache_page(page);
  	} else {
  		put_page(page);
c8f95ed1a   Andrea Arcangeli   ksm: unstable_tre...
554
555
  out:
  		page = NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
556
557
558
559
  	}
  	up_read(&mm->mmap_sem);
  	return page;
  }
90bd6fd31   Petr Holasek   ksm: allow trees ...
560
561
562
563
564
565
566
567
  /*
   * This helper is used for getting right index into array of tree roots.
   * When merge_across_nodes knob is set to 1, there are only two rb-trees for
   * stable and unstable pages from all nodes with roots in index 0. Otherwise,
   * every node has its own stable and unstable tree.
   */
  static inline int get_kpfn_nid(unsigned long kpfn)
  {
d8fc16a82   Hugh Dickins   ksm: fix m68k bui...
568
  	return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn));
90bd6fd31   Petr Holasek   ksm: allow trees ...
569
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
570
571
572
573
574
575
576
577
578
579
  static struct stable_node *alloc_stable_node_chain(struct stable_node *dup,
  						   struct rb_root *root)
  {
  	struct stable_node *chain = alloc_stable_node();
  	VM_BUG_ON(is_stable_node_chain(dup));
  	if (likely(chain)) {
  		INIT_HLIST_HEAD(&chain->hlist);
  		chain->chain_prune_time = jiffies;
  		chain->rmap_hlist_len = STABLE_NODE_CHAIN;
  #if defined (CONFIG_DEBUG_VM) && defined(CONFIG_NUMA)
98fa15f34   Anshuman Khandual   mm: replace all o...
580
  		chain->nid = NUMA_NO_NODE; /* debug */
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
  #endif
  		ksm_stable_node_chains++;
  
  		/*
  		 * Put the stable node chain in the first dimension of
  		 * the stable tree and at the same time remove the old
  		 * stable node.
  		 */
  		rb_replace_node(&dup->node, &chain->node, root);
  
  		/*
  		 * Move the old stable node to the second dimension
  		 * queued in the hlist_dup. The invariant is that all
  		 * dup stable_nodes in the chain->hlist point to pages
  		 * that are wrprotected and have the exact same
  		 * content.
  		 */
  		stable_node_chain_add_dup(dup, chain);
  	}
  	return chain;
  }
  
  static inline void free_stable_node_chain(struct stable_node *chain,
  					  struct rb_root *root)
  {
  	rb_erase(&chain->node, root);
  	free_stable_node(chain);
  	ksm_stable_node_chains--;
  }
4035c07a8   Hugh Dickins   ksm: take keyhole...
610
611
612
  static void remove_node_from_stable_tree(struct stable_node *stable_node)
  {
  	struct rmap_item *rmap_item;
4035c07a8   Hugh Dickins   ksm: take keyhole...
613

2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
614
615
  	/* check it's not STABLE_NODE_CHAIN or negative */
  	BUG_ON(stable_node->rmap_hlist_len < 0);
b67bfe0d4   Sasha Levin   hlist: drop the n...
616
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
617
618
619
620
  		if (rmap_item->hlist.next)
  			ksm_pages_sharing--;
  		else
  			ksm_pages_shared--;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
621
622
  		VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
  		stable_node->rmap_hlist_len--;
9e60109f1   Peter Zijlstra   mm: rename drop_a...
623
  		put_anon_vma(rmap_item->anon_vma);
4035c07a8   Hugh Dickins   ksm: take keyhole...
624
625
626
  		rmap_item->address &= PAGE_MASK;
  		cond_resched();
  	}
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
627
628
629
630
631
  	/*
  	 * We need the second aligned pointer of the migrate_nodes
  	 * list_head to stay clear from the rb_parent_color union
  	 * (aligned and different than any node) and also different
  	 * from &migrate_nodes. This will verify that future list.h changes
815f0ddb3   Nick Desaulniers   include/linux/com...
632
  	 * don't break STABLE_NODE_DUP_HEAD. Only recent gcc can handle it.
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
633
  	 */
815f0ddb3   Nick Desaulniers   include/linux/com...
634
  #if defined(GCC_VERSION) && GCC_VERSION >= 40903
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
635
636
637
  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
  	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
  #endif
4146d2d67   Hugh Dickins   ksm: make !merge_...
638
639
640
  	if (stable_node->head == &migrate_nodes)
  		list_del(&stable_node->list);
  	else
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
641
  		stable_node_dup_del(stable_node);
4035c07a8   Hugh Dickins   ksm: take keyhole...
642
643
  	free_stable_node(stable_node);
  }
2cee57d1b   Yang Shi   mm: ksm: do not b...
644
645
646
647
648
  enum get_ksm_page_flags {
  	GET_KSM_PAGE_NOLOCK,
  	GET_KSM_PAGE_LOCK,
  	GET_KSM_PAGE_TRYLOCK
  };
4035c07a8   Hugh Dickins   ksm: take keyhole...
649
650
651
652
653
654
  /*
   * get_ksm_page: checks if the page indicated by the stable node
   * is still its ksm page, despite having held no reference to it.
   * In which case we can trust the content of the page, and it
   * returns the gotten page; but if the page has now been zapped,
   * remove the stale node from the stable tree and return NULL.
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
655
   * But beware, the stable node's page might be being migrated.
4035c07a8   Hugh Dickins   ksm: take keyhole...
656
657
658
659
660
661
662
663
664
665
   *
   * You would expect the stable_node to hold a reference to the ksm page.
   * But if it increments the page's count, swapping out has to wait for
   * ksmd to come around again before it can free the page, which may take
   * seconds or even minutes: much too unresponsive.  So instead we use a
   * "keyhole reference": access to the ksm page from the stable node peeps
   * out through its keyhole to see if that page still holds the right key,
   * pointing back to this stable node.  This relies on freeing a PageAnon
   * page to reset its page->mapping to NULL, and relies on no other use of
   * a page to put something that might look like our key in page->mapping.
4035c07a8   Hugh Dickins   ksm: take keyhole...
666
667
   * is on its way to being freed; but it is an anomaly to bear in mind.
   */
2cee57d1b   Yang Shi   mm: ksm: do not b...
668
669
  static struct page *get_ksm_page(struct stable_node *stable_node,
  				 enum get_ksm_page_flags flags)
4035c07a8   Hugh Dickins   ksm: take keyhole...
670
671
672
  {
  	struct page *page;
  	void *expected_mapping;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
673
  	unsigned long kpfn;
4035c07a8   Hugh Dickins   ksm: take keyhole...
674

bda807d44   Minchan Kim   mm: migrate: supp...
675
676
  	expected_mapping = (void *)((unsigned long)stable_node |
  					PAGE_MAPPING_KSM);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
677
  again:
08df47743   Paul E. McKenney   mm/ksm: Remove no...
678
  	kpfn = READ_ONCE(stable_node->kpfn); /* Address dependency. */
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
679
  	page = pfn_to_page(kpfn);
4db0c3c29   Jason Low   mm: remove rest o...
680
  	if (READ_ONCE(page->mapping) != expected_mapping)
4035c07a8   Hugh Dickins   ksm: take keyhole...
681
  		goto stale;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
682
683
684
685
686
  
  	/*
  	 * We cannot do anything with the page while its refcount is 0.
  	 * Usually 0 means free, or tail of a higher-order page: in which
  	 * case this node is no longer referenced, and should be freed;
1c4c3b99c   Jiang Biao   mm: fix page_free...
687
  	 * however, it might mean that the page is under page_ref_freeze().
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
688
  	 * The __remove_mapping() case is easy, again the node is now stale;
52d1e606e   Kirill Tkhai   mm: reuse only-pt...
689
690
691
  	 * the same is in reuse_ksm_page() case; but if page is swapcache
  	 * in migrate_page_move_mapping(), it might still be our page,
  	 * in which case it's essential to keep the node.
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
692
693
694
695
696
697
698
  	 */
  	while (!get_page_unless_zero(page)) {
  		/*
  		 * Another check for page->mapping != expected_mapping would
  		 * work here too.  We have chosen the !PageSwapCache test to
  		 * optimize the common case, when the page is or is about to
  		 * be freed: PageSwapCache is cleared (under spin_lock_irq)
1c4c3b99c   Jiang Biao   mm: fix page_free...
699
  		 * in the ref_freeze section of __remove_mapping(); but Anon
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
700
701
702
703
704
705
  		 * page->mapping reset to NULL later, in free_pages_prepare().
  		 */
  		if (!PageSwapCache(page))
  			goto stale;
  		cpu_relax();
  	}
4db0c3c29   Jason Low   mm: remove rest o...
706
  	if (READ_ONCE(page->mapping) != expected_mapping) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
707
708
709
  		put_page(page);
  		goto stale;
  	}
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
710

2cee57d1b   Yang Shi   mm: ksm: do not b...
711
712
713
714
715
716
  	if (flags == GET_KSM_PAGE_TRYLOCK) {
  		if (!trylock_page(page)) {
  			put_page(page);
  			return ERR_PTR(-EBUSY);
  		}
  	} else if (flags == GET_KSM_PAGE_LOCK)
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
717
  		lock_page(page);
2cee57d1b   Yang Shi   mm: ksm: do not b...
718
719
  
  	if (flags != GET_KSM_PAGE_NOLOCK) {
4db0c3c29   Jason Low   mm: remove rest o...
720
  		if (READ_ONCE(page->mapping) != expected_mapping) {
8aafa6a48   Hugh Dickins   ksm: get_ksm_page...
721
722
723
724
725
  			unlock_page(page);
  			put_page(page);
  			goto stale;
  		}
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
726
  	return page;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
727

4035c07a8   Hugh Dickins   ksm: take keyhole...
728
  stale:
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
729
730
731
732
733
734
735
  	/*
  	 * We come here from above when page->mapping or !PageSwapCache
  	 * suggests that the node is stale; but it might be under migration.
  	 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
  	 * before checking whether node->kpfn has been changed.
  	 */
  	smp_rmb();
4db0c3c29   Jason Low   mm: remove rest o...
736
  	if (READ_ONCE(stable_node->kpfn) != kpfn)
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
737
  		goto again;
4035c07a8   Hugh Dickins   ksm: take keyhole...
738
739
740
  	remove_node_from_stable_tree(stable_node);
  	return NULL;
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
741
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
742
743
744
745
746
   * Removing rmap_item from stable or unstable tree.
   * This function will clean the information from the stable/unstable tree.
   */
  static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
  {
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
747
748
  	if (rmap_item->address & STABLE_FLAG) {
  		struct stable_node *stable_node;
5ad646880   Hugh Dickins   ksm: let shared p...
749
  		struct page *page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
750

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
751
  		stable_node = rmap_item->head;
2cee57d1b   Yang Shi   mm: ksm: do not b...
752
  		page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
4035c07a8   Hugh Dickins   ksm: take keyhole...
753
754
  		if (!page)
  			goto out;
5ad646880   Hugh Dickins   ksm: let shared p...
755

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
756
  		hlist_del(&rmap_item->hlist);
4035c07a8   Hugh Dickins   ksm: take keyhole...
757
758
  		unlock_page(page);
  		put_page(page);
08beca44d   Hugh Dickins   ksm: stable_node ...
759

98666f8a2   Andrea Arcangeli   ksm: use the help...
760
  		if (!hlist_empty(&stable_node->hlist))
4035c07a8   Hugh Dickins   ksm: take keyhole...
761
762
  			ksm_pages_sharing--;
  		else
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
763
  			ksm_pages_shared--;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
764
765
  		VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
  		stable_node->rmap_hlist_len--;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
766

9e60109f1   Peter Zijlstra   mm: rename drop_a...
767
  		put_anon_vma(rmap_item->anon_vma);
93d17715a   Hugh Dickins   ksm: three remove...
768
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
769

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
770
  	} else if (rmap_item->address & UNSTABLE_FLAG) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
771
772
  		unsigned char age;
  		/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
773
  		 * Usually ksmd can and must skip the rb_erase, because
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
774
  		 * root_unstable_tree was already reset to RB_ROOT.
9ba692948   Hugh Dickins   ksm: fix oom dead...
775
776
777
  		 * But be careful when an mm is exiting: do the rb_erase
  		 * if this rmap_item was inserted by this scan, rather
  		 * than left over from before.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
778
779
  		 */
  		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
cd551f975   Hugh Dickins   ksm: distribute r...
780
  		BUG_ON(age > 1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
781
  		if (!age)
90bd6fd31   Petr Holasek   ksm: allow trees ...
782
  			rb_erase(&rmap_item->node,
ef53d16cd   Hugh Dickins   ksm: allocate roo...
783
  				 root_unstable_tree + NUMA(rmap_item->nid));
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
784
  		ksm_pages_unshared--;
93d17715a   Hugh Dickins   ksm: three remove...
785
  		rmap_item->address &= PAGE_MASK;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
786
  	}
4035c07a8   Hugh Dickins   ksm: take keyhole...
787
  out:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
788
789
  	cond_resched();		/* we're called from many long loops */
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
790
  static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
791
  				       struct rmap_item **rmap_list)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
792
  {
6514d511d   Hugh Dickins   ksm: singly-linke...
793
794
795
  	while (*rmap_list) {
  		struct rmap_item *rmap_item = *rmap_list;
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
796
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
797
798
799
800
801
  		free_rmap_item(rmap_item);
  	}
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
802
   * Though it's very tempting to unmerge rmap_items from stable tree rather
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
803
804
805
806
807
   * than check every pte of a given vma, the locking doesn't quite work for
   * that - an rmap_item is assigned to the stable tree after inserting ksm
   * page and upping mmap_sem.  Nor does it fit with the way we skip dup'ing
   * rmap_items from parent to child at fork time (so as not to waste time
   * if exit comes before the next scan reaches it).
81464e306   Hugh Dickins   ksm: five little ...
808
809
810
811
812
   *
   * Similarly, although we'd like to remove rmap_items (so updating counts
   * and freeing memory) when unmerging an area, it's easier to leave that
   * to the next pass of ksmd - consider, for example, how ksmd might be
   * in cmp_and_merge_page on one of the rmap_items we would be removing.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
813
   */
d952b7913   Hugh Dickins   ksm: fix endless ...
814
815
  static int unmerge_ksm_pages(struct vm_area_struct *vma,
  			     unsigned long start, unsigned long end)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
816
817
  {
  	unsigned long addr;
d952b7913   Hugh Dickins   ksm: fix endless ...
818
  	int err = 0;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
819

d952b7913   Hugh Dickins   ksm: fix endless ...
820
  	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
821
822
  		if (ksm_test_exit(vma->vm_mm))
  			break;
d952b7913   Hugh Dickins   ksm: fix endless ...
823
824
825
826
827
828
  		if (signal_pending(current))
  			err = -ERESTARTSYS;
  		else
  			err = break_ksm(vma, addr);
  	}
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
829
  }
88484826b   Mike Rapoport   mm/ksm: move [set...
830
831
832
833
834
835
836
837
838
839
  static inline struct stable_node *page_stable_node(struct page *page)
  {
  	return PageKsm(page) ? page_rmapping(page) : NULL;
  }
  
  static inline void set_page_stable_node(struct page *page,
  					struct stable_node *stable_node)
  {
  	page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
840
841
842
843
  #ifdef CONFIG_SYSFS
  /*
   * Only called through the sysfs control interface:
   */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
844
845
846
847
  static int remove_stable_node(struct stable_node *stable_node)
  {
  	struct page *page;
  	int err;
2cee57d1b   Yang Shi   mm: ksm: do not b...
848
  	page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
cbf86cfe0   Hugh Dickins   ksm: remove old s...
849
850
851
852
853
854
  	if (!page) {
  		/*
  		 * get_ksm_page did remove_node_from_stable_tree itself.
  		 */
  		return 0;
  	}
8fdb3dbf0   Hugh Dickins   ksm: add some com...
855
856
857
858
859
  	if (WARN_ON_ONCE(page_mapped(page))) {
  		/*
  		 * This should not happen: but if it does, just refuse to let
  		 * merge_across_nodes be switched - there is no need to panic.
  		 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
860
  		err = -EBUSY;
8fdb3dbf0   Hugh Dickins   ksm: add some com...
861
  	} else {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
862
  		/*
8fdb3dbf0   Hugh Dickins   ksm: add some com...
863
864
865
  		 * The stable node did not yet appear stale to get_ksm_page(),
  		 * since that allows for an unmapped ksm page to be recognized
  		 * right up until it is freed; but the node is safe to remove.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
866
867
868
869
870
871
872
873
874
875
876
877
878
  		 * This page might be in a pagevec waiting to be freed,
  		 * or it might be PageSwapCache (perhaps under writeback),
  		 * or it might have been removed from swapcache a moment ago.
  		 */
  		set_page_stable_node(page, NULL);
  		remove_node_from_stable_tree(stable_node);
  		err = 0;
  	}
  
  	unlock_page(page);
  	put_page(page);
  	return err;
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
  static int remove_stable_node_chain(struct stable_node *stable_node,
  				    struct rb_root *root)
  {
  	struct stable_node *dup;
  	struct hlist_node *hlist_safe;
  
  	if (!is_stable_node_chain(stable_node)) {
  		VM_BUG_ON(is_stable_node_dup(stable_node));
  		if (remove_stable_node(stable_node))
  			return true;
  		else
  			return false;
  	}
  
  	hlist_for_each_entry_safe(dup, hlist_safe,
  				  &stable_node->hlist, hlist_dup) {
  		VM_BUG_ON(!is_stable_node_dup(dup));
  		if (remove_stable_node(dup))
  			return true;
  	}
  	BUG_ON(!hlist_empty(&stable_node->hlist));
  	free_stable_node_chain(stable_node, root);
  	return false;
  }
cbf86cfe0   Hugh Dickins   ksm: remove old s...
903
904
  static int remove_all_stable_nodes(void)
  {
036404183   Geliang Tang   mm/ksm.c: use lis...
905
  	struct stable_node *stable_node, *next;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
906
907
  	int nid;
  	int err = 0;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
908
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
909
910
911
  		while (root_stable_tree[nid].rb_node) {
  			stable_node = rb_entry(root_stable_tree[nid].rb_node,
  						struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
912
913
  			if (remove_stable_node_chain(stable_node,
  						     root_stable_tree + nid)) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
914
915
916
917
918
919
  				err = -EBUSY;
  				break;	/* proceed to next nid */
  			}
  			cond_resched();
  		}
  	}
036404183   Geliang Tang   mm/ksm.c: use lis...
920
  	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
921
922
923
924
  		if (remove_stable_node(stable_node))
  			err = -EBUSY;
  		cond_resched();
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
925
926
  	return err;
  }
d952b7913   Hugh Dickins   ksm: fix endless ...
927
  static int unmerge_and_remove_all_rmap_items(void)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
928
929
930
931
  {
  	struct mm_slot *mm_slot;
  	struct mm_struct *mm;
  	struct vm_area_struct *vma;
d952b7913   Hugh Dickins   ksm: fix endless ...
932
933
934
  	int err = 0;
  
  	spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
935
  	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
936
937
  						struct mm_slot, mm_list);
  	spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
938

9ba692948   Hugh Dickins   ksm: fix oom dead...
939
940
  	for (mm_slot = ksm_scan.mm_slot;
  			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
941
942
943
  		mm = mm_slot->mm;
  		down_read(&mm->mmap_sem);
  		for (vma = mm->mmap; vma; vma = vma->vm_next) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
944
945
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
946
947
  			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
  				continue;
d952b7913   Hugh Dickins   ksm: fix endless ...
948
949
  			err = unmerge_ksm_pages(vma,
  						vma->vm_start, vma->vm_end);
9ba692948   Hugh Dickins   ksm: fix oom dead...
950
951
  			if (err)
  				goto error;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
952
  		}
9ba692948   Hugh Dickins   ksm: fix oom dead...
953

6514d511d   Hugh Dickins   ksm: singly-linke...
954
  		remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
7496fea9a   Zhou Chengming   ksm: fix conflict...
955
  		up_read(&mm->mmap_sem);
d952b7913   Hugh Dickins   ksm: fix endless ...
956
957
  
  		spin_lock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
958
  		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
d952b7913   Hugh Dickins   ksm: fix endless ...
959
  						struct mm_slot, mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
960
  		if (ksm_test_exit(mm)) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
961
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
962
963
964
965
966
  			list_del(&mm_slot->mm_list);
  			spin_unlock(&ksm_mmlist_lock);
  
  			free_mm_slot(mm_slot);
  			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
967
  			mmdrop(mm);
7496fea9a   Zhou Chengming   ksm: fix conflict...
968
  		} else
9ba692948   Hugh Dickins   ksm: fix oom dead...
969
  			spin_unlock(&ksm_mmlist_lock);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
970
  	}
cbf86cfe0   Hugh Dickins   ksm: remove old s...
971
972
  	/* Clean up stable nodes, but don't worry if some are still busy */
  	remove_all_stable_nodes();
d952b7913   Hugh Dickins   ksm: fix endless ...
973
  	ksm_scan.seqnr = 0;
9ba692948   Hugh Dickins   ksm: fix oom dead...
974
975
976
977
  	return 0;
  
  error:
  	up_read(&mm->mmap_sem);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
978
  	spin_lock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
979
  	ksm_scan.mm_slot = &ksm_mm_head;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
980
  	spin_unlock(&ksm_mmlist_lock);
d952b7913   Hugh Dickins   ksm: fix endless ...
981
  	return err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
982
  }
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
983
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
984

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
985
986
987
  static u32 calc_checksum(struct page *page)
  {
  	u32 checksum;
9b04c5fec   Cong Wang   mm: remove the se...
988
  	void *addr = kmap_atomic(page);
59e1a2f4b   Timofey Titovets   ksm: replace jhas...
989
  	checksum = xxhash(addr, PAGE_SIZE, 0);
9b04c5fec   Cong Wang   mm: remove the se...
990
  	kunmap_atomic(addr);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
991
992
993
994
995
996
997
  	return checksum;
  }
  
  static int memcmp_pages(struct page *page1, struct page *page2)
  {
  	char *addr1, *addr2;
  	int ret;
9b04c5fec   Cong Wang   mm: remove the se...
998
999
  	addr1 = kmap_atomic(page1);
  	addr2 = kmap_atomic(page2);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1000
  	ret = memcmp(addr1, addr2, PAGE_SIZE);
9b04c5fec   Cong Wang   mm: remove the se...
1001
1002
  	kunmap_atomic(addr2);
  	kunmap_atomic(addr1);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
  	return ret;
  }
  
  static inline int pages_identical(struct page *page1, struct page *page2)
  {
  	return !memcmp_pages(page1, page2);
  }
  
  static int write_protect_page(struct vm_area_struct *vma, struct page *page,
  			      pte_t *orig_pte)
  {
  	struct mm_struct *mm = vma->vm_mm;
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1015
1016
1017
1018
  	struct page_vma_mapped_walk pvmw = {
  		.page = page,
  		.vma = vma,
  	};
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1019
1020
  	int swapped;
  	int err = -EFAULT;
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1021
  	struct mmu_notifier_range range;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1022

36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1023
1024
  	pvmw.address = page_address_in_vma(page, vma);
  	if (pvmw.address == -EFAULT)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1025
  		goto out;
29ad768cf   Andrea Arcangeli   thp: KSM on THP
1026
  	BUG_ON(PageTransCompound(page));
6bdb913f0   Haggai Eran   mm: wrap calls to...
1027

7269f9999   Jérôme Glisse   mm/mmu_notifier: ...
1028
  	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
6f4f13e8d   Jérôme Glisse   mm/mmu_notifier: ...
1029
  				pvmw.address,
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1030
1031
  				pvmw.address + PAGE_SIZE);
  	mmu_notifier_invalidate_range_start(&range);
6bdb913f0   Haggai Eran   mm: wrap calls to...
1032

36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1033
  	if (!page_vma_mapped_walk(&pvmw))
6bdb913f0   Haggai Eran   mm: wrap calls to...
1034
  		goto out_mn;
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1035
1036
  	if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
  		goto out_unlock;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1037

595cd8f25   Aneesh Kumar K.V   mm/ksm: handle pr...
1038
  	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
b3a81d084   Minchan Kim   mm: fix KSM data ...
1039
1040
  	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) ||
  						mm_tlb_flush_pending(mm)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1041
1042
1043
  		pte_t entry;
  
  		swapped = PageSwapCache(page);
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1044
  		flush_cache_page(vma, pvmw.address, page_to_pfn(page));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1045
  		/*
25985edce   Lucas De Marchi   Fix common misspe...
1046
  		 * Ok this is tricky, when get_user_pages_fast() run it doesn't
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1047
1048
1049
1050
1051
1052
  		 * take any lock, therefore the check that we are going to make
  		 * with the pagecount against the mapcount is racey and
  		 * O_DIRECT can happen right after the check.
  		 * So we clear the pte and flush the tlb before the check
  		 * this assure us that no O_DIRECT can happen after the check
  		 * or in the middle of the check.
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1053
1054
1055
1056
  		 *
  		 * No need to notify as we are downgrading page table to read
  		 * only not changing it to point to a new page.
  		 *
ad56b738c   Mike Rapoport   docs/vm: rename d...
1057
  		 * See Documentation/vm/mmu_notifier.rst
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1058
  		 */
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1059
  		entry = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1060
1061
1062
1063
  		/*
  		 * Check that no O_DIRECT or similar I/O is in progress on the
  		 * page
  		 */
31e855ea7   Hugh Dickins   ksm: remove redun...
1064
  		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1065
  			set_pte_at(mm, pvmw.address, pvmw.pte, entry);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1066
1067
  			goto out_unlock;
  		}
4e31635c3   Hugh Dickins   ksm: fix bad user...
1068
1069
  		if (pte_dirty(entry))
  			set_page_dirty(page);
595cd8f25   Aneesh Kumar K.V   mm/ksm: handle pr...
1070
1071
1072
1073
1074
  
  		if (pte_protnone(entry))
  			entry = pte_mkclean(pte_clear_savedwrite(entry));
  		else
  			entry = pte_mkclean(pte_wrprotect(entry));
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1075
  		set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1076
  	}
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1077
  	*orig_pte = *pvmw.pte;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1078
1079
1080
  	err = 0;
  
  out_unlock:
36eaff336   Kirill A. Shutemov   mm, ksm: convert ...
1081
  	page_vma_mapped_walk_done(&pvmw);
6bdb913f0   Haggai Eran   mm: wrap calls to...
1082
  out_mn:
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1083
  	mmu_notifier_invalidate_range_end(&range);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1084
1085
1086
1087
1088
1089
  out:
  	return err;
  }
  
  /**
   * replace_page - replace page in vma by new ksm page
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1090
1091
1092
   * @vma:      vma that holds the pte pointing to page
   * @page:     the page we are replacing by kpage
   * @kpage:    the ksm page we replace page by
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1093
1094
1095
1096
   * @orig_pte: the original value of the pte
   *
   * Returns 0 on success, -EFAULT on failure.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1097
1098
  static int replace_page(struct vm_area_struct *vma, struct page *page,
  			struct page *kpage, pte_t orig_pte)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1099
1100
  {
  	struct mm_struct *mm = vma->vm_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1101
1102
  	pmd_t *pmd;
  	pte_t *ptep;
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1103
  	pte_t newpte;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1104
1105
  	spinlock_t *ptl;
  	unsigned long addr;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1106
  	int err = -EFAULT;
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1107
  	struct mmu_notifier_range range;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1108

8dd3557a5   Hugh Dickins   ksm: cleanup some...
1109
  	addr = page_address_in_vma(page, vma);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1110
1111
  	if (addr == -EFAULT)
  		goto out;
6219049ae   Bob Liu   mm: introduce mm_...
1112
1113
  	pmd = mm_find_pmd(mm, addr);
  	if (!pmd)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1114
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1115

7269f9999   Jérôme Glisse   mm/mmu_notifier: ...
1116
  	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
6f4f13e8d   Jérôme Glisse   mm/mmu_notifier: ...
1117
  				addr + PAGE_SIZE);
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1118
  	mmu_notifier_invalidate_range_start(&range);
6bdb913f0   Haggai Eran   mm: wrap calls to...
1119

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1120
1121
1122
  	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
  	if (!pte_same(*ptep, orig_pte)) {
  		pte_unmap_unlock(ptep, ptl);
6bdb913f0   Haggai Eran   mm: wrap calls to...
1123
  		goto out_mn;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1124
  	}
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
  	/*
  	 * No need to check ksm_use_zero_pages here: we can only have a
  	 * zero_page here if ksm_use_zero_pages was enabled alreaady.
  	 */
  	if (!is_zero_pfn(page_to_pfn(kpage))) {
  		get_page(kpage);
  		page_add_anon_rmap(kpage, vma, addr, false);
  		newpte = mk_pte(kpage, vma->vm_page_prot);
  	} else {
  		newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
  					       vma->vm_page_prot));
a38c015f3   Claudio Imbrenda   mm/ksm.c: fix inc...
1136
1137
1138
1139
1140
1141
1142
  		/*
  		 * We're replacing an anonymous page with a zero page, which is
  		 * not anonymous. We need to do proper accounting otherwise we
  		 * will get wrong values in /proc, and a BUG message in dmesg
  		 * when tearing down the mm.
  		 */
  		dec_mm_counter(mm, MM_ANONPAGES);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1143
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1144
1145
  
  	flush_cache_page(vma, addr, pte_pfn(*ptep));
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1146
1147
1148
1149
  	/*
  	 * No need to notify as we are replacing a read only page with another
  	 * read only page with the same content.
  	 *
ad56b738c   Mike Rapoport   docs/vm: rename d...
1150
  	 * See Documentation/vm/mmu_notifier.rst
0f10851ea   Jérôme Glisse   mm/mmu_notifier: ...
1151
1152
  	 */
  	ptep_clear_flush(vma, addr, ptep);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
1153
  	set_pte_at_notify(mm, addr, ptep, newpte);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1154

d281ee614   Kirill A. Shutemov   rmap: add argumen...
1155
  	page_remove_rmap(page, false);
ae52a2adb   Hugh Dickins   thp: ksm: free sw...
1156
1157
  	if (!page_mapped(page))
  		try_to_free_swap(page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1158
  	put_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1159
1160
1161
  
  	pte_unmap_unlock(ptep, ptl);
  	err = 0;
6bdb913f0   Haggai Eran   mm: wrap calls to...
1162
  out_mn:
ac46d4f3c   Jérôme Glisse   mm/mmu_notifier: ...
1163
  	mmu_notifier_invalidate_range_end(&range);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1164
1165
1166
1167
1168
1169
  out:
  	return err;
  }
  
  /*
   * try_to_merge_one_page - take two pages and merge them into one
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1170
1171
   * @vma: the vma that holds the pte pointing to page
   * @page: the PageAnon page that we want to replace with kpage
80e148226   Hugh Dickins   ksm: share anon p...
1172
1173
   * @kpage: the PageKsm page that we want to map instead of page,
   *         or NULL the first time when we want to use page as kpage.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1174
1175
1176
1177
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
   */
  static int try_to_merge_one_page(struct vm_area_struct *vma,
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1178
  				 struct page *page, struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1179
1180
1181
  {
  	pte_t orig_pte = __pte(0);
  	int err = -EFAULT;
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1182
1183
  	if (page == kpage)			/* ksm page forked */
  		return 0;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1184
  	if (!PageAnon(page))
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1185
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1186
1187
1188
1189
1190
1191
1192
  	/*
  	 * We need the page lock to read a stable PageSwapCache in
  	 * write_protect_page().  We use trylock_page() instead of
  	 * lock_page() because we don't want to wait here - we
  	 * prefer to continue scanning and merging different pages,
  	 * then come back to this page when it is unlocked.
  	 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1193
  	if (!trylock_page(page))
31e855ea7   Hugh Dickins   ksm: remove redun...
1194
  		goto out;
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1195
1196
  
  	if (PageTransCompound(page)) {
a7306c343   Andrea Arcangeli   ksm: prevent cras...
1197
  		if (split_huge_page(page))
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1198
1199
  			goto out_unlock;
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1200
1201
1202
1203
1204
1205
  	/*
  	 * If this anonymous page is mapped only here, its pte may need
  	 * to be write-protected.  If it's mapped elsewhere, all of its
  	 * ptes are necessarily already write-protected.  But in either
  	 * case, we need to lock and check page_count is not raised.
  	 */
80e148226   Hugh Dickins   ksm: share anon p...
1206
1207
1208
1209
1210
1211
1212
1213
1214
  	if (write_protect_page(vma, page, &orig_pte) == 0) {
  		if (!kpage) {
  			/*
  			 * While we hold page lock, upgrade page from
  			 * PageAnon+anon_vma to PageKsm+NULL stable_node:
  			 * stable_tree_insert() will update stable_node.
  			 */
  			set_page_stable_node(page, NULL);
  			mark_page_accessed(page);
337ed7eb5   Minchan Kim   mm/ksm.c: mark st...
1215
1216
1217
1218
1219
1220
  			/*
  			 * Page reclaim just frees a clean page with no dirty
  			 * ptes: make sure that the ksm page would be swapped.
  			 */
  			if (!PageDirty(page))
  				SetPageDirty(page);
80e148226   Hugh Dickins   ksm: share anon p...
1221
1222
1223
1224
  			err = 0;
  		} else if (pages_identical(page, kpage))
  			err = replace_page(vma, page, kpage, orig_pte);
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1225

80e148226   Hugh Dickins   ksm: share anon p...
1226
  	if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
73848b468   Hugh Dickins   ksm: fix mlockfre...
1227
  		munlock_vma_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1228
1229
  		if (!PageMlocked(kpage)) {
  			unlock_page(page);
5ad646880   Hugh Dickins   ksm: let shared p...
1230
1231
1232
1233
1234
  			lock_page(kpage);
  			mlock_vma_page(kpage);
  			page = kpage;		/* for final unlock */
  		}
  	}
73848b468   Hugh Dickins   ksm: fix mlockfre...
1235

f765f5405   Kirill A. Shutemov   ksm: prepare to n...
1236
  out_unlock:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1237
  	unlock_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1238
1239
1240
1241
1242
  out:
  	return err;
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1243
1244
   * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
   * but no new kernel page is allocated: kpage must already be a ksm page.
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1245
1246
   *
   * This function returns 0 if the pages were merged, -EFAULT otherwise.
81464e306   Hugh Dickins   ksm: five little ...
1247
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1248
1249
  static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
  				      struct page *page, struct page *kpage)
81464e306   Hugh Dickins   ksm: five little ...
1250
  {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1251
  	struct mm_struct *mm = rmap_item->mm;
81464e306   Hugh Dickins   ksm: five little ...
1252
1253
  	struct vm_area_struct *vma;
  	int err = -EFAULT;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1254
  	down_read(&mm->mmap_sem);
85c6e8dd2   Andrea Arcangeli   ksm: use find_mer...
1255
1256
  	vma = find_mergeable_vma(mm, rmap_item->address);
  	if (!vma)
81464e306   Hugh Dickins   ksm: five little ...
1257
  		goto out;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1258
  	err = try_to_merge_one_page(vma, page, kpage);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1259
1260
  	if (err)
  		goto out;
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
1261
1262
  	/* Unstable nid is in union with stable anon_vma: remove first */
  	remove_rmap_item_from_tree(rmap_item);
db114b83a   Hugh Dickins   ksm: hold anon_vm...
1263
  	/* Must get reference to anon_vma while still holding mmap_sem */
9e60109f1   Peter Zijlstra   mm: rename drop_a...
1264
1265
  	rmap_item->anon_vma = vma->anon_vma;
  	get_anon_vma(vma->anon_vma);
81464e306   Hugh Dickins   ksm: five little ...
1266
  out:
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1267
  	up_read(&mm->mmap_sem);
81464e306   Hugh Dickins   ksm: five little ...
1268
1269
1270
1271
  	return err;
  }
  
  /*
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1272
1273
1274
   * try_to_merge_two_pages - take two identical pages and prepare them
   * to be merged into one page.
   *
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1275
1276
   * This function returns the kpage if we successfully merged two identical
   * pages into one ksm page, NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1277
   *
80e148226   Hugh Dickins   ksm: share anon p...
1278
   * Note that this function upgrades page to ksm page: if one of the pages
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1279
1280
   * is already a ksm page, try_to_merge_with_ksm_page should be used.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1281
1282
1283
1284
  static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
  					   struct page *page,
  					   struct rmap_item *tree_rmap_item,
  					   struct page *tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1285
  {
80e148226   Hugh Dickins   ksm: share anon p...
1286
  	int err;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1287

80e148226   Hugh Dickins   ksm: share anon p...
1288
  	err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1289
  	if (!err) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1290
  		err = try_to_merge_with_ksm_page(tree_rmap_item,
80e148226   Hugh Dickins   ksm: share anon p...
1291
  							tree_page, page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1292
  		/*
81464e306   Hugh Dickins   ksm: five little ...
1293
1294
  		 * If that fails, we have a ksm page with only one pte
  		 * pointing to it: so break it.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1295
  		 */
4035c07a8   Hugh Dickins   ksm: take keyhole...
1296
  		if (err)
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1297
  			break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1298
  	}
80e148226   Hugh Dickins   ksm: share anon p...
1299
  	return err ? NULL : page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1300
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
  static __always_inline
  bool __is_page_sharing_candidate(struct stable_node *stable_node, int offset)
  {
  	VM_BUG_ON(stable_node->rmap_hlist_len < 0);
  	/*
  	 * Check that at least one mapping still exists, otherwise
  	 * there's no much point to merge and share with this
  	 * stable_node, as the underlying tree_page of the other
  	 * sharer is going to be freed soon.
  	 */
  	return stable_node->rmap_hlist_len &&
  		stable_node->rmap_hlist_len + offset < ksm_max_page_sharing;
  }
  
  static __always_inline
  bool is_page_sharing_candidate(struct stable_node *stable_node)
  {
  	return __is_page_sharing_candidate(stable_node, 0);
  }
c01f0b54e   Colin Ian King   mm/ksm.c: make st...
1320
1321
1322
1323
  static struct page *stable_node_dup(struct stable_node **_stable_node_dup,
  				    struct stable_node **_stable_node,
  				    struct rb_root *root,
  				    bool prune_stale_stable_nodes)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1324
  {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1325
  	struct stable_node *dup, *found = NULL, *stable_node = *_stable_node;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1326
  	struct hlist_node *hlist_safe;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1327
  	struct page *_tree_page, *tree_page = NULL;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
  	int nr = 0;
  	int found_rmap_hlist_len;
  
  	if (!prune_stale_stable_nodes ||
  	    time_before(jiffies, stable_node->chain_prune_time +
  			msecs_to_jiffies(
  				ksm_stable_node_chains_prune_millisecs)))
  		prune_stale_stable_nodes = false;
  	else
  		stable_node->chain_prune_time = jiffies;
  
  	hlist_for_each_entry_safe(dup, hlist_safe,
  				  &stable_node->hlist, hlist_dup) {
  		cond_resched();
  		/*
  		 * We must walk all stable_node_dup to prune the stale
  		 * stable nodes during lookup.
  		 *
  		 * get_ksm_page can drop the nodes from the
  		 * stable_node->hlist if they point to freed pages
  		 * (that's why we do a _safe walk). The "dup"
  		 * stable_node parameter itself will be freed from
  		 * under us if it returns NULL.
  		 */
2cee57d1b   Yang Shi   mm: ksm: do not b...
1352
  		_tree_page = get_ksm_page(dup, GET_KSM_PAGE_NOLOCK);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1353
1354
1355
1356
1357
1358
1359
  		if (!_tree_page)
  			continue;
  		nr += 1;
  		if (is_page_sharing_candidate(dup)) {
  			if (!found ||
  			    dup->rmap_hlist_len > found_rmap_hlist_len) {
  				if (found)
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1360
  					put_page(tree_page);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1361
1362
  				found = dup;
  				found_rmap_hlist_len = found->rmap_hlist_len;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1363
  				tree_page = _tree_page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1364

8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1365
  				/* skip put_page for found dup */
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1366
1367
  				if (!prune_stale_stable_nodes)
  					break;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1368
1369
1370
1371
1372
  				continue;
  			}
  		}
  		put_page(_tree_page);
  	}
80b18dfa5   Andrea Arcangeli   ksm: optimize ref...
1373
1374
1375
1376
1377
1378
1379
1380
  	if (found) {
  		/*
  		 * nr is counting all dups in the chain only if
  		 * prune_stale_stable_nodes is true, otherwise we may
  		 * break the loop at nr == 1 even if there are
  		 * multiple entries.
  		 */
  		if (prune_stale_stable_nodes && nr == 1) {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
  			/*
  			 * If there's not just one entry it would
  			 * corrupt memory, better BUG_ON. In KSM
  			 * context with no lock held it's not even
  			 * fatal.
  			 */
  			BUG_ON(stable_node->hlist.first->next);
  
  			/*
  			 * There's just one entry and it is below the
  			 * deduplication limit so drop the chain.
  			 */
  			rb_replace_node(&stable_node->node, &found->node,
  					root);
  			free_stable_node(stable_node);
  			ksm_stable_node_chains--;
  			ksm_stable_node_dups--;
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1398
  			/*
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1399
1400
1401
  			 * NOTE: the caller depends on the stable_node
  			 * to be equal to stable_node_dup if the chain
  			 * was collapsed.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1402
  			 */
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1403
1404
1405
1406
1407
1408
1409
1410
  			*_stable_node = found;
  			/*
  			 * Just for robustneess as stable_node is
  			 * otherwise left as a stable pointer, the
  			 * compiler shall optimize it away at build
  			 * time.
  			 */
  			stable_node = NULL;
80b18dfa5   Andrea Arcangeli   ksm: optimize ref...
1411
1412
  		} else if (stable_node->hlist.first != &found->hlist_dup &&
  			   __is_page_sharing_candidate(found, 1)) {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1413
  			/*
80b18dfa5   Andrea Arcangeli   ksm: optimize ref...
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
  			 * If the found stable_node dup can accept one
  			 * more future merge (in addition to the one
  			 * that is underway) and is not at the head of
  			 * the chain, put it there so next search will
  			 * be quicker in the !prune_stale_stable_nodes
  			 * case.
  			 *
  			 * NOTE: it would be inaccurate to use nr > 1
  			 * instead of checking the hlist.first pointer
  			 * directly, because in the
  			 * prune_stale_stable_nodes case "nr" isn't
  			 * the position of the found dup in the chain,
  			 * but the total number of dups in the chain.
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1427
1428
1429
1430
1431
1432
  			 */
  			hlist_del(&found->hlist_dup);
  			hlist_add_head(&found->hlist_dup,
  				       &stable_node->hlist);
  		}
  	}
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1433
1434
  	*_stable_node_dup = found;
  	return tree_page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
  }
  
  static struct stable_node *stable_node_dup_any(struct stable_node *stable_node,
  					       struct rb_root *root)
  {
  	if (!is_stable_node_chain(stable_node))
  		return stable_node;
  	if (hlist_empty(&stable_node->hlist)) {
  		free_stable_node_chain(stable_node, root);
  		return NULL;
  	}
  	return hlist_entry(stable_node->hlist.first,
  			   typeof(*stable_node), hlist_dup);
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
  /*
   * Like for get_ksm_page, this function can free the *_stable_node and
   * *_stable_node_dup if the returned tree_page is NULL.
   *
   * It can also free and overwrite *_stable_node with the found
   * stable_node_dup if the chain is collapsed (in which case
   * *_stable_node will be equal to *_stable_node_dup like if the chain
   * never existed). It's up to the caller to verify tree_page is not
   * NULL before dereferencing *_stable_node or *_stable_node_dup.
   *
   * *_stable_node_dup is really a second output parameter of this
   * function and will be overwritten in all cases, the caller doesn't
   * need to initialize it.
   */
  static struct page *__stable_node_chain(struct stable_node **_stable_node_dup,
  					struct stable_node **_stable_node,
  					struct rb_root *root,
  					bool prune_stale_stable_nodes)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1467
  {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1468
  	struct stable_node *stable_node = *_stable_node;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1469
1470
  	if (!is_stable_node_chain(stable_node)) {
  		if (is_page_sharing_candidate(stable_node)) {
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1471
  			*_stable_node_dup = stable_node;
2cee57d1b   Yang Shi   mm: ksm: do not b...
1472
  			return get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1473
  		}
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1474
1475
1476
1477
1478
  		/*
  		 * _stable_node_dup set to NULL means the stable_node
  		 * reached the ksm_max_page_sharing limit.
  		 */
  		*_stable_node_dup = NULL;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1479
1480
  		return NULL;
  	}
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1481
  	return stable_node_dup(_stable_node_dup, _stable_node, root,
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1482
1483
  			       prune_stale_stable_nodes);
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1484
1485
1486
  static __always_inline struct page *chain_prune(struct stable_node **s_n_d,
  						struct stable_node **s_n,
  						struct rb_root *root)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1487
  {
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1488
  	return __stable_node_chain(s_n_d, s_n, root, true);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1489
  }
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1490
1491
1492
  static __always_inline struct page *chain(struct stable_node **s_n_d,
  					  struct stable_node *s_n,
  					  struct rb_root *root)
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1493
  {
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1494
1495
1496
1497
1498
1499
1500
  	struct stable_node *old_stable_node = s_n;
  	struct page *tree_page;
  
  	tree_page = __stable_node_chain(s_n_d, &s_n, root, false);
  	/* not pruning dups so s_n cannot have changed */
  	VM_BUG_ON(s_n != old_stable_node);
  	return tree_page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1501
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1502
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1503
   * stable_tree_search - search for page inside the stable tree
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1504
1505
1506
1507
   *
   * This function checks if there is a page inside the stable tree
   * with identical content to the page that we are scanning right now.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1508
   * This function returns the stable tree node of identical content if found,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1509
1510
   * NULL otherwise.
   */
62b61f611   Hugh Dickins   ksm: memory hotre...
1511
  static struct page *stable_tree_search(struct page *page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1512
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1513
  	int nid;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1514
  	struct rb_root *root;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1515
1516
  	struct rb_node **new;
  	struct rb_node *parent;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1517
  	struct stable_node *stable_node, *stable_node_dup, *stable_node_any;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1518
  	struct stable_node *page_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1519

4146d2d67   Hugh Dickins   ksm: make !merge_...
1520
1521
1522
  	page_node = page_stable_node(page);
  	if (page_node && page_node->head != &migrate_nodes) {
  		/* ksm page forked */
08beca44d   Hugh Dickins   ksm: stable_node ...
1523
  		get_page(page);
62b61f611   Hugh Dickins   ksm: memory hotre...
1524
  		return page;
08beca44d   Hugh Dickins   ksm: stable_node ...
1525
  	}
90bd6fd31   Petr Holasek   ksm: allow trees ...
1526
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1527
  	root = root_stable_tree + nid;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1528
  again:
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1529
  	new = &root->rb_node;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1530
  	parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1531

4146d2d67   Hugh Dickins   ksm: make !merge_...
1532
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1533
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1534
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1535
  		cond_resched();
4146d2d67   Hugh Dickins   ksm: make !merge_...
1536
  		stable_node = rb_entry(*new, struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1537
  		stable_node_any = NULL;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1538
  		tree_page = chain_prune(&stable_node_dup, &stable_node,	root);
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1539
1540
1541
1542
1543
1544
  		/*
  		 * NOTE: stable_node may have been freed by
  		 * chain_prune() if the returned stable_node_dup is
  		 * not NULL. stable_node_dup may have been inserted in
  		 * the rbtree instead as a regular stable_node (in
  		 * order to collapse the stable_node chain if a single
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1545
1546
1547
1548
1549
  		 * stable_node dup was found in it). In such case the
  		 * stable_node is overwritten by the calleee to point
  		 * to the stable_node_dup that was collapsed in the
  		 * stable rbtree and stable_node will be equal to
  		 * stable_node_dup like if the chain never existed.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1550
  		 */
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
  		if (!stable_node_dup) {
  			/*
  			 * Either all stable_node dups were full in
  			 * this stable_node chain, or this chain was
  			 * empty and should be rb_erased.
  			 */
  			stable_node_any = stable_node_dup_any(stable_node,
  							      root);
  			if (!stable_node_any) {
  				/* rb_erase just run */
  				goto again;
  			}
  			/*
  			 * Take any of the stable_node dups page of
  			 * this stable_node chain to let the tree walk
  			 * continue. All KSM pages belonging to the
  			 * stable_node dups in a stable_node chain
  			 * have the same content and they're
  			 * wrprotected at all times. Any will work
  			 * fine to continue the walk.
  			 */
2cee57d1b   Yang Shi   mm: ksm: do not b...
1572
1573
  			tree_page = get_ksm_page(stable_node_any,
  						 GET_KSM_PAGE_NOLOCK);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1574
1575
  		}
  		VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
  		if (!tree_page) {
  			/*
  			 * If we walked over a stale stable_node,
  			 * get_ksm_page() will call rb_erase() and it
  			 * may rebalance the tree from under us. So
  			 * restart the search from scratch. Returning
  			 * NULL would be safe too, but we'd generate
  			 * false negative insertions just because some
  			 * stable_node was stale.
  			 */
  			goto again;
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1588

4035c07a8   Hugh Dickins   ksm: take keyhole...
1589
  		ret = memcmp_pages(page, tree_page);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1590
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1591

4146d2d67   Hugh Dickins   ksm: make !merge_...
1592
  		parent = *new;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1593
  		if (ret < 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1594
  			new = &parent->rb_left;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1595
  		else if (ret > 0)
4146d2d67   Hugh Dickins   ksm: make !merge_...
1596
  			new = &parent->rb_right;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1597
  		else {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
  			if (page_node) {
  				VM_BUG_ON(page_node->head != &migrate_nodes);
  				/*
  				 * Test if the migrated page should be merged
  				 * into a stable node dup. If the mapcount is
  				 * 1 we can migrate it with another KSM page
  				 * without adding it to the chain.
  				 */
  				if (page_mapcount(page) > 1)
  					goto chain_append;
  			}
  
  			if (!stable_node_dup) {
  				/*
  				 * If the stable_node is a chain and
  				 * we got a payload match in memcmp
  				 * but we cannot merge the scanned
  				 * page in any of the existing
  				 * stable_node dups because they're
  				 * all full, we need to wait the
  				 * scanned page to find itself a match
  				 * in the unstable tree to create a
  				 * brand new KSM page to add later to
  				 * the dups of this stable_node.
  				 */
  				return NULL;
  			}
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1625
1626
1627
1628
1629
1630
1631
  			/*
  			 * Lock and unlock the stable_node's page (which
  			 * might already have been migrated) so that page
  			 * migration is sure to notice its raised count.
  			 * It would be more elegant to return stable_node
  			 * than kpage, but that involves more changes.
  			 */
2cee57d1b   Yang Shi   mm: ksm: do not b...
1632
1633
1634
1635
1636
  			tree_page = get_ksm_page(stable_node_dup,
  						 GET_KSM_PAGE_TRYLOCK);
  
  			if (PTR_ERR(tree_page) == -EBUSY)
  				return ERR_PTR(-EBUSY);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1637
1638
1639
1640
1641
  			if (unlikely(!tree_page))
  				/*
  				 * The tree may have been rebalanced,
  				 * so re-evaluate parent and new.
  				 */
4146d2d67   Hugh Dickins   ksm: make !merge_...
1642
  				goto again;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1643
1644
1645
1646
1647
1648
1649
1650
  			unlock_page(tree_page);
  
  			if (get_kpfn_nid(stable_node_dup->kpfn) !=
  			    NUMA(stable_node_dup->nid)) {
  				put_page(tree_page);
  				goto replace;
  			}
  			return tree_page;
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
1651
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1652
  	}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1653
1654
1655
1656
1657
1658
  	if (!page_node)
  		return NULL;
  
  	list_del(&page_node->list);
  	DO_NUMA(page_node->nid = nid);
  	rb_link_node(&page_node->node, parent, new);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1659
  	rb_insert_color(&page_node->node, root);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1660
1661
1662
1663
1664
1665
  out:
  	if (is_page_sharing_candidate(page_node)) {
  		get_page(page);
  		return page;
  	} else
  		return NULL;
4146d2d67   Hugh Dickins   ksm: make !merge_...
1666
1667
  
  replace:
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1668
1669
  	/*
  	 * If stable_node was a chain and chain_prune collapsed it,
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1670
1671
1672
1673
1674
  	 * stable_node has been updated to be the new regular
  	 * stable_node. A collapse of the chain is indistinguishable
  	 * from the case there was no chain in the stable
  	 * rbtree. Otherwise stable_node is the chain and
  	 * stable_node_dup is the dup to replace.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1675
  	 */
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1676
  	if (stable_node_dup == stable_node) {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1677
1678
  		VM_BUG_ON(is_stable_node_chain(stable_node_dup));
  		VM_BUG_ON(is_stable_node_dup(stable_node_dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1679
1680
1681
1682
1683
  		/* there is no chain */
  		if (page_node) {
  			VM_BUG_ON(page_node->head != &migrate_nodes);
  			list_del(&page_node->list);
  			DO_NUMA(page_node->nid = nid);
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1684
1685
  			rb_replace_node(&stable_node_dup->node,
  					&page_node->node,
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1686
1687
1688
1689
1690
1691
  					root);
  			if (is_page_sharing_candidate(page_node))
  				get_page(page);
  			else
  				page = NULL;
  		} else {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1692
  			rb_erase(&stable_node_dup->node, root);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1693
1694
  			page = NULL;
  		}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1695
  	} else {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
  		VM_BUG_ON(!is_stable_node_chain(stable_node));
  		__stable_node_dup_del(stable_node_dup);
  		if (page_node) {
  			VM_BUG_ON(page_node->head != &migrate_nodes);
  			list_del(&page_node->list);
  			DO_NUMA(page_node->nid = nid);
  			stable_node_chain_add_dup(page_node, stable_node);
  			if (is_page_sharing_candidate(page_node))
  				get_page(page);
  			else
  				page = NULL;
  		} else {
  			page = NULL;
  		}
4146d2d67   Hugh Dickins   ksm: make !merge_...
1710
  	}
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1711
1712
  	stable_node_dup->head = &migrate_nodes;
  	list_add(&stable_node_dup->list, stable_node_dup->head);
4146d2d67   Hugh Dickins   ksm: make !merge_...
1713
  	return page;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1714
1715
1716
1717
1718
  
  chain_append:
  	/* stable_node_dup could be null if it reached the limit */
  	if (!stable_node_dup)
  		stable_node_dup = stable_node_any;
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1719
1720
  	/*
  	 * If stable_node was a chain and chain_prune collapsed it,
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1721
1722
1723
1724
1725
  	 * stable_node has been updated to be the new regular
  	 * stable_node. A collapse of the chain is indistinguishable
  	 * from the case there was no chain in the stable
  	 * rbtree. Otherwise stable_node is the chain and
  	 * stable_node_dup is the dup to replace.
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1726
  	 */
0ba1d0f7c   Andrea Arcangeli   ksm: cleanup stab...
1727
  	if (stable_node_dup == stable_node) {
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1728
1729
  		VM_BUG_ON(is_stable_node_chain(stable_node_dup));
  		VM_BUG_ON(is_stable_node_dup(stable_node_dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
  		/* chain is missing so create it */
  		stable_node = alloc_stable_node_chain(stable_node_dup,
  						      root);
  		if (!stable_node)
  			return NULL;
  	}
  	/*
  	 * Add this stable_node dup that was
  	 * migrated to the stable_node chain
  	 * of the current nid for this page
  	 * content.
  	 */
b4fecc67c   Andrea Arcangeli   ksm: fix use afte...
1742
1743
  	VM_BUG_ON(!is_stable_node_chain(stable_node));
  	VM_BUG_ON(!is_stable_node_dup(stable_node_dup));
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1744
1745
1746
1747
1748
  	VM_BUG_ON(page_node->head != &migrate_nodes);
  	list_del(&page_node->list);
  	DO_NUMA(page_node->nid = nid);
  	stable_node_chain_add_dup(page_node, stable_node);
  	goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1749
1750
1751
  }
  
  /*
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1752
   * stable_tree_insert - insert stable tree node pointing to new ksm page
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1753
1754
   * into the stable tree.
   *
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1755
1756
   * This function returns the stable tree node just allocated on success,
   * NULL otherwise.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1757
   */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1758
  static struct stable_node *stable_tree_insert(struct page *kpage)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1759
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1760
1761
  	int nid;
  	unsigned long kpfn;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1762
  	struct rb_root *root;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1763
  	struct rb_node **new;
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1764
  	struct rb_node *parent;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1765
1766
  	struct stable_node *stable_node, *stable_node_dup, *stable_node_any;
  	bool need_chain = false;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1767

90bd6fd31   Petr Holasek   ksm: allow trees ...
1768
1769
  	kpfn = page_to_pfn(kpage);
  	nid = get_kpfn_nid(kpfn);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1770
  	root = root_stable_tree + nid;
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1771
1772
  again:
  	parent = NULL;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1773
  	new = &root->rb_node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1774

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1775
  	while (*new) {
4035c07a8   Hugh Dickins   ksm: take keyhole...
1776
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1777
  		int ret;
08beca44d   Hugh Dickins   ksm: stable_node ...
1778
  		cond_resched();
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1779
  		stable_node = rb_entry(*new, struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1780
  		stable_node_any = NULL;
8dc5ffcd5   Andrea Arcangeli   ksm: swap the two...
1781
  		tree_page = chain(&stable_node_dup, stable_node, root);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
  		if (!stable_node_dup) {
  			/*
  			 * Either all stable_node dups were full in
  			 * this stable_node chain, or this chain was
  			 * empty and should be rb_erased.
  			 */
  			stable_node_any = stable_node_dup_any(stable_node,
  							      root);
  			if (!stable_node_any) {
  				/* rb_erase just run */
  				goto again;
  			}
  			/*
  			 * Take any of the stable_node dups page of
  			 * this stable_node chain to let the tree walk
  			 * continue. All KSM pages belonging to the
  			 * stable_node dups in a stable_node chain
  			 * have the same content and they're
  			 * wrprotected at all times. Any will work
  			 * fine to continue the walk.
  			 */
2cee57d1b   Yang Shi   mm: ksm: do not b...
1803
1804
  			tree_page = get_ksm_page(stable_node_any,
  						 GET_KSM_PAGE_NOLOCK);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1805
1806
  		}
  		VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
f2e5ff85e   Andrea Arcangeli   ksm: don't fail s...
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
  		if (!tree_page) {
  			/*
  			 * If we walked over a stale stable_node,
  			 * get_ksm_page() will call rb_erase() and it
  			 * may rebalance the tree from under us. So
  			 * restart the search from scratch. Returning
  			 * NULL would be safe too, but we'd generate
  			 * false negative insertions just because some
  			 * stable_node was stale.
  			 */
  			goto again;
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1819

4035c07a8   Hugh Dickins   ksm: take keyhole...
1820
1821
  		ret = memcmp_pages(kpage, tree_page);
  		put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1822
1823
1824
1825
1826
1827
1828
  
  		parent = *new;
  		if (ret < 0)
  			new = &parent->rb_left;
  		else if (ret > 0)
  			new = &parent->rb_right;
  		else {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1829
1830
  			need_chain = true;
  			break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1831
1832
  		}
  	}
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1833
1834
  	stable_node_dup = alloc_stable_node();
  	if (!stable_node_dup)
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1835
  		return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1836

2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
  	INIT_HLIST_HEAD(&stable_node_dup->hlist);
  	stable_node_dup->kpfn = kpfn;
  	set_page_stable_node(kpage, stable_node_dup);
  	stable_node_dup->rmap_hlist_len = 0;
  	DO_NUMA(stable_node_dup->nid = nid);
  	if (!need_chain) {
  		rb_link_node(&stable_node_dup->node, parent, new);
  		rb_insert_color(&stable_node_dup->node, root);
  	} else {
  		if (!is_stable_node_chain(stable_node)) {
  			struct stable_node *orig = stable_node;
  			/* chain is missing so create it */
  			stable_node = alloc_stable_node_chain(orig, root);
  			if (!stable_node) {
  				free_stable_node(stable_node_dup);
  				return NULL;
  			}
  		}
  		stable_node_chain_add_dup(stable_node_dup, stable_node);
  	}
08beca44d   Hugh Dickins   ksm: stable_node ...
1857

2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1858
  	return stable_node_dup;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1859
1860
1861
  }
  
  /*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1862
1863
   * unstable_tree_search_insert - search for identical page,
   * else insert rmap_item into the unstable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
   *
   * This function searches for a page in the unstable tree identical to the
   * page currently being scanned; and if no identical page is found in the
   * tree, we insert rmap_item as a new object into the unstable tree.
   *
   * This function returns pointer to rmap_item found to be identical
   * to the currently scanned page, NULL otherwise.
   *
   * This function does both searching and inserting, because they share
   * the same walking algorithm in an rbtree.
   */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1875
1876
1877
1878
  static
  struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
  					      struct page *page,
  					      struct page **tree_pagep)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1879
  {
90bd6fd31   Petr Holasek   ksm: allow trees ...
1880
1881
  	struct rb_node **new;
  	struct rb_root *root;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1882
  	struct rb_node *parent = NULL;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1883
1884
1885
  	int nid;
  
  	nid = get_kpfn_nid(page_to_pfn(page));
ef53d16cd   Hugh Dickins   ksm: allocate roo...
1886
  	root = root_unstable_tree + nid;
90bd6fd31   Petr Holasek   ksm: allow trees ...
1887
  	new = &root->rb_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1888
1889
1890
  
  	while (*new) {
  		struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1891
  		struct page *tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1892
  		int ret;
d178f27fc   Hugh Dickins   ksm: cond_resched...
1893
  		cond_resched();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1894
  		tree_rmap_item = rb_entry(*new, struct rmap_item, node);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1895
  		tree_page = get_mergeable_page(tree_rmap_item);
c8f95ed1a   Andrea Arcangeli   ksm: unstable_tre...
1896
  		if (!tree_page)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1897
1898
1899
  			return NULL;
  
  		/*
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1900
  		 * Don't substitute a ksm page for a forked page.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1901
  		 */
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1902
1903
  		if (page == tree_page) {
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1904
1905
  			return NULL;
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1906
  		ret = memcmp_pages(page, tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1907
1908
1909
  
  		parent = *new;
  		if (ret < 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1910
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1911
1912
  			new = &parent->rb_left;
  		} else if (ret > 0) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1913
  			put_page(tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1914
  			new = &parent->rb_right;
b599cbdf1   Hugh Dickins   ksm: treat unstab...
1915
1916
1917
1918
1919
1920
1921
1922
1923
  		} else if (!ksm_merge_across_nodes &&
  			   page_to_nid(tree_page) != nid) {
  			/*
  			 * If tree_page has been migrated to another NUMA node,
  			 * it will be flushed out and put in the right unstable
  			 * tree next time: only merge with it when across_nodes.
  			 */
  			put_page(tree_page);
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1924
  		} else {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1925
  			*tree_pagep = tree_page;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1926
1927
1928
  			return tree_rmap_item;
  		}
  	}
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1929
  	rmap_item->address |= UNSTABLE_FLAG;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1930
  	rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
e850dcf53   Hugh Dickins   ksm: trivial tidyups
1931
  	DO_NUMA(rmap_item->nid = nid);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1932
  	rb_link_node(&rmap_item->node, parent, new);
90bd6fd31   Petr Holasek   ksm: allow trees ...
1933
  	rb_insert_color(&rmap_item->node, root);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1934

473b0ce4d   Hugh Dickins   ksm: pages_unshar...
1935
  	ksm_pages_unshared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1936
1937
1938
1939
1940
1941
1942
1943
1944
  	return NULL;
  }
  
  /*
   * stable_tree_append - add another rmap_item to the linked list of
   * rmap_items hanging off a given node of the stable tree, all sharing
   * the same ksm page.
   */
  static void stable_tree_append(struct rmap_item *rmap_item,
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1945
1946
  			       struct stable_node *stable_node,
  			       bool max_page_sharing_bypass)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1947
  {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
  	/*
  	 * rmap won't find this mapping if we don't insert the
  	 * rmap_item in the right stable_node
  	 * duplicate. page_migration could break later if rmap breaks,
  	 * so we can as well crash here. We really need to check for
  	 * rmap_hlist_len == STABLE_NODE_CHAIN, but we can as well check
  	 * for other negative values as an undeflow if detected here
  	 * for the first time (and not when decreasing rmap_hlist_len)
  	 * would be sign of memory corruption in the stable_node.
  	 */
  	BUG_ON(stable_node->rmap_hlist_len < 0);
  
  	stable_node->rmap_hlist_len++;
  	if (!max_page_sharing_bypass)
  		/* possibly non fatal but unexpected overflow, only warn */
  		WARN_ON_ONCE(stable_node->rmap_hlist_len >
  			     ksm_max_page_sharing);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1965
  	rmap_item->head = stable_node;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1966
  	rmap_item->address |= STABLE_FLAG;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1967
  	hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
e178dfde3   Hugh Dickins   ksm: move pages_s...
1968

7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1969
1970
1971
1972
  	if (rmap_item->hlist.next)
  		ksm_pages_sharing++;
  	else
  		ksm_pages_shared++;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1973
1974
1975
  }
  
  /*
81464e306   Hugh Dickins   ksm: five little ...
1976
1977
1978
1979
   * cmp_and_merge_page - first see if page can be merged into the stable tree;
   * if not, compare checksum to previous and if it's the same, see if page can
   * be inserted into the unstable tree, or merged with a page already there and
   * both transferred to the stable tree.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1980
1981
1982
1983
1984
1985
   *
   * @page: the page that we are searching identical page to.
   * @rmap_item: the reverse mapping into the virtual address of this page
   */
  static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
  {
4b22927f0   Kirill Tkhai   ksm: fix unlocked...
1986
  	struct mm_struct *mm = rmap_item->mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1987
  	struct rmap_item *tree_rmap_item;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1988
  	struct page *tree_page = NULL;
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
1989
  	struct stable_node *stable_node;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
1990
  	struct page *kpage;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1991
1992
  	unsigned int checksum;
  	int err;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1993
  	bool max_page_sharing_bypass = false;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
1994

4146d2d67   Hugh Dickins   ksm: make !merge_...
1995
1996
1997
  	stable_node = page_stable_node(page);
  	if (stable_node) {
  		if (stable_node->head != &migrate_nodes &&
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
1998
1999
2000
  		    get_kpfn_nid(READ_ONCE(stable_node->kpfn)) !=
  		    NUMA(stable_node->nid)) {
  			stable_node_dup_del(stable_node);
4146d2d67   Hugh Dickins   ksm: make !merge_...
2001
2002
2003
2004
2005
2006
  			stable_node->head = &migrate_nodes;
  			list_add(&stable_node->list, stable_node->head);
  		}
  		if (stable_node->head != &migrate_nodes &&
  		    rmap_item->head == stable_node)
  			return;
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2007
2008
2009
2010
2011
2012
  		/*
  		 * If it's a KSM fork, allow it to go over the sharing limit
  		 * without warnings.
  		 */
  		if (!is_page_sharing_candidate(stable_node))
  			max_page_sharing_bypass = true;
4146d2d67   Hugh Dickins   ksm: make !merge_...
2013
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2014
2015
  
  	/* We first start with searching the page inside the stable tree */
62b61f611   Hugh Dickins   ksm: memory hotre...
2016
  	kpage = stable_tree_search(page);
4146d2d67   Hugh Dickins   ksm: make !merge_...
2017
2018
2019
2020
2021
2022
  	if (kpage == page && rmap_item->head == stable_node) {
  		put_page(kpage);
  		return;
  	}
  
  	remove_rmap_item_from_tree(rmap_item);
62b61f611   Hugh Dickins   ksm: memory hotre...
2023
  	if (kpage) {
2cee57d1b   Yang Shi   mm: ksm: do not b...
2024
2025
  		if (PTR_ERR(kpage) == -EBUSY)
  			return;
08beca44d   Hugh Dickins   ksm: stable_node ...
2026
  		err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2027
2028
2029
2030
2031
  		if (!err) {
  			/*
  			 * The page was successfully merged:
  			 * add its rmap_item to the stable tree.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
2032
  			lock_page(kpage);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2033
2034
  			stable_tree_append(rmap_item, page_stable_node(kpage),
  					   max_page_sharing_bypass);
5ad646880   Hugh Dickins   ksm: let shared p...
2035
  			unlock_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2036
  		}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2037
  		put_page(kpage);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2038
2039
2040
2041
  		return;
  	}
  
  	/*
4035c07a8   Hugh Dickins   ksm: take keyhole...
2042
2043
2044
2045
  	 * If the hash value of the page has changed from the last time
  	 * we calculated it, this page is changing frequently: therefore we
  	 * don't want to insert it in the unstable tree, and we don't want
  	 * to waste our time searching for something identical to it there.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2046
2047
2048
2049
2050
2051
  	 */
  	checksum = calc_checksum(page);
  	if (rmap_item->oldchecksum != checksum) {
  		rmap_item->oldchecksum = checksum;
  		return;
  	}
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2052
2053
2054
2055
2056
2057
  	/*
  	 * Same checksum as an empty page. We attempt to merge it with the
  	 * appropriate zero page if the user enabled this via sysfs.
  	 */
  	if (ksm_use_zero_pages && (checksum == zero_checksum)) {
  		struct vm_area_struct *vma;
4b22927f0   Kirill Tkhai   ksm: fix unlocked...
2058
2059
  		down_read(&mm->mmap_sem);
  		vma = find_mergeable_vma(mm, rmap_item->address);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2060
2061
  		err = try_to_merge_one_page(vma, page,
  					    ZERO_PAGE(rmap_item->address));
4b22927f0   Kirill Tkhai   ksm: fix unlocked...
2062
  		up_read(&mm->mmap_sem);
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2063
2064
2065
2066
2067
2068
2069
  		/*
  		 * In case of failure, the page was not really empty, so we
  		 * need to continue. Otherwise we're done.
  		 */
  		if (!err)
  			return;
  	}
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2070
2071
  	tree_rmap_item =
  		unstable_tree_search_insert(rmap_item, page, &tree_page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2072
  	if (tree_rmap_item) {
77da2ba06   Claudio Imbrenda   mm/ksm: fix inter...
2073
  		bool split;
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2074
2075
  		kpage = try_to_merge_two_pages(rmap_item, page,
  						tree_rmap_item, tree_page);
77da2ba06   Claudio Imbrenda   mm/ksm: fix inter...
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
  		/*
  		 * If both pages we tried to merge belong to the same compound
  		 * page, then we actually ended up increasing the reference
  		 * count of the same compound page twice, and split_huge_page
  		 * failed.
  		 * Here we set a flag if that happened, and we use it later to
  		 * try split_huge_page again. Since we call put_page right
  		 * afterwards, the reference count will be correct and
  		 * split_huge_page should succeed.
  		 */
  		split = PageTransCompound(page)
  			&& compound_head(page) == compound_head(tree_page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2088
  		put_page(tree_page);
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2089
  		if (kpage) {
bc56620b4   Hugh Dickins   ksm: shrink 32-bi...
2090
2091
2092
2093
  			/*
  			 * The pages were successfully merged: insert new
  			 * node in the stable tree and add both rmap_items.
  			 */
5ad646880   Hugh Dickins   ksm: let shared p...
2094
  			lock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2095
2096
  			stable_node = stable_tree_insert(kpage);
  			if (stable_node) {
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2097
2098
2099
2100
  				stable_tree_append(tree_rmap_item, stable_node,
  						   false);
  				stable_tree_append(rmap_item, stable_node,
  						   false);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2101
  			}
5ad646880   Hugh Dickins   ksm: let shared p...
2102
  			unlock_page(kpage);
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2103

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2104
2105
2106
2107
2108
2109
  			/*
  			 * If we fail to insert the page into the stable tree,
  			 * we will have 2 virtual addresses that are pointing
  			 * to a ksm page left outside the stable tree,
  			 * in which case we need to break_cow on both.
  			 */
7b6ba2c7d   Hugh Dickins   ksm: separate sta...
2110
  			if (!stable_node) {
8dd3557a5   Hugh Dickins   ksm: cleanup some...
2111
2112
  				break_cow(tree_rmap_item);
  				break_cow(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2113
  			}
77da2ba06   Claudio Imbrenda   mm/ksm: fix inter...
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
  		} else if (split) {
  			/*
  			 * We are here if we tried to merge two pages and
  			 * failed because they both belonged to the same
  			 * compound page. We will split the page now, but no
  			 * merging will take place.
  			 * We do not want to add the cost of a full lock; if
  			 * the page is locked, it is better to skip it and
  			 * perhaps try again later.
  			 */
  			if (!trylock_page(page))
  				return;
  			split_huge_page(page);
  			unlock_page(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2128
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2129
2130
2131
2132
  	}
  }
  
  static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
2133
  					    struct rmap_item **rmap_list,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2134
2135
2136
  					    unsigned long addr)
  {
  	struct rmap_item *rmap_item;
6514d511d   Hugh Dickins   ksm: singly-linke...
2137
2138
  	while (*rmap_list) {
  		rmap_item = *rmap_list;
93d17715a   Hugh Dickins   ksm: three remove...
2139
  		if ((rmap_item->address & PAGE_MASK) == addr)
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2140
  			return rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2141
2142
  		if (rmap_item->address > addr)
  			break;
6514d511d   Hugh Dickins   ksm: singly-linke...
2143
  		*rmap_list = rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2144
  		remove_rmap_item_from_tree(rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2145
2146
2147
2148
2149
2150
2151
2152
  		free_rmap_item(rmap_item);
  	}
  
  	rmap_item = alloc_rmap_item();
  	if (rmap_item) {
  		/* It has already been zeroed */
  		rmap_item->mm = mm_slot->mm;
  		rmap_item->address = addr;
6514d511d   Hugh Dickins   ksm: singly-linke...
2153
2154
  		rmap_item->rmap_list = *rmap_list;
  		*rmap_list = rmap_item;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
  	}
  	return rmap_item;
  }
  
  static struct rmap_item *scan_get_next_rmap_item(struct page **page)
  {
  	struct mm_struct *mm;
  	struct mm_slot *slot;
  	struct vm_area_struct *vma;
  	struct rmap_item *rmap_item;
90bd6fd31   Petr Holasek   ksm: allow trees ...
2165
  	int nid;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2166
2167
2168
2169
2170
2171
  
  	if (list_empty(&ksm_mm_head.mm_list))
  		return NULL;
  
  	slot = ksm_scan.mm_slot;
  	if (slot == &ksm_mm_head) {
2919bfd07   Hugh Dickins   ksm: drain pageve...
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
  		/*
  		 * A number of pages can hang around indefinitely on per-cpu
  		 * pagevecs, raised page count preventing write_protect_page
  		 * from merging them.  Though it doesn't really matter much,
  		 * it is puzzling to see some stuck in pages_volatile until
  		 * other activity jostles them out, and they also prevented
  		 * LTP's KSM test from succeeding deterministically; so drain
  		 * them here (here rather than on entry to ksm_do_scan(),
  		 * so we don't IPI too often when pages_to_scan is set low).
  		 */
  		lru_add_drain_all();
4146d2d67   Hugh Dickins   ksm: make !merge_...
2183
2184
2185
2186
2187
2188
2189
  		/*
  		 * Whereas stale stable_nodes on the stable_tree itself
  		 * get pruned in the regular course of stable_tree_search(),
  		 * those moved out to the migrate_nodes list can accumulate:
  		 * so prune them once before each full scan.
  		 */
  		if (!ksm_merge_across_nodes) {
036404183   Geliang Tang   mm/ksm.c: use lis...
2190
  			struct stable_node *stable_node, *next;
4146d2d67   Hugh Dickins   ksm: make !merge_...
2191
  			struct page *page;
036404183   Geliang Tang   mm/ksm.c: use lis...
2192
2193
  			list_for_each_entry_safe(stable_node, next,
  						 &migrate_nodes, list) {
2cee57d1b   Yang Shi   mm: ksm: do not b...
2194
2195
  				page = get_ksm_page(stable_node,
  						    GET_KSM_PAGE_NOLOCK);
4146d2d67   Hugh Dickins   ksm: make !merge_...
2196
2197
2198
2199
2200
  				if (page)
  					put_page(page);
  				cond_resched();
  			}
  		}
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2201
  		for (nid = 0; nid < ksm_nr_node_ids; nid++)
90bd6fd31   Petr Holasek   ksm: allow trees ...
2202
  			root_unstable_tree[nid] = RB_ROOT;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2203
2204
2205
2206
2207
  
  		spin_lock(&ksm_mmlist_lock);
  		slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
  		ksm_scan.mm_slot = slot;
  		spin_unlock(&ksm_mmlist_lock);
2b472611a   Hugh Dickins   ksm: fix NULL poi...
2208
2209
2210
2211
2212
2213
  		/*
  		 * Although we tested list_empty() above, a racing __ksm_exit
  		 * of the last mm on the list may have removed it since then.
  		 */
  		if (slot == &ksm_mm_head)
  			return NULL;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2214
2215
  next_mm:
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
2216
  		ksm_scan.rmap_list = &slot->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2217
2218
2219
2220
  	}
  
  	mm = slot->mm;
  	down_read(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2221
2222
2223
2224
2225
2226
  	if (ksm_test_exit(mm))
  		vma = NULL;
  	else
  		vma = find_vma(mm, ksm_scan.address);
  
  	for (; vma; vma = vma->vm_next) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2227
2228
2229
2230
2231
2232
2233
2234
  		if (!(vma->vm_flags & VM_MERGEABLE))
  			continue;
  		if (ksm_scan.address < vma->vm_start)
  			ksm_scan.address = vma->vm_start;
  		if (!vma->anon_vma)
  			ksm_scan.address = vma->vm_end;
  
  		while (ksm_scan.address < vma->vm_end) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
2235
2236
  			if (ksm_test_exit(mm))
  				break;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2237
  			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
21ae5b017   Andrea Arcangeli   thp: skip transhu...
2238
2239
2240
2241
2242
  			if (IS_ERR_OR_NULL(*page)) {
  				ksm_scan.address += PAGE_SIZE;
  				cond_resched();
  				continue;
  			}
f765f5405   Kirill A. Shutemov   ksm: prepare to n...
2243
  			if (PageAnon(*page)) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2244
2245
2246
  				flush_anon_page(vma, *page, ksm_scan.address);
  				flush_dcache_page(*page);
  				rmap_item = get_next_rmap_item(slot,
6514d511d   Hugh Dickins   ksm: singly-linke...
2247
  					ksm_scan.rmap_list, ksm_scan.address);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2248
  				if (rmap_item) {
6514d511d   Hugh Dickins   ksm: singly-linke...
2249
2250
  					ksm_scan.rmap_list =
  							&rmap_item->rmap_list;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2251
2252
2253
2254
2255
2256
  					ksm_scan.address += PAGE_SIZE;
  				} else
  					put_page(*page);
  				up_read(&mm->mmap_sem);
  				return rmap_item;
  			}
21ae5b017   Andrea Arcangeli   thp: skip transhu...
2257
  			put_page(*page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2258
2259
2260
2261
  			ksm_scan.address += PAGE_SIZE;
  			cond_resched();
  		}
  	}
9ba692948   Hugh Dickins   ksm: fix oom dead...
2262
2263
  	if (ksm_test_exit(mm)) {
  		ksm_scan.address = 0;
6514d511d   Hugh Dickins   ksm: singly-linke...
2264
  		ksm_scan.rmap_list = &slot->rmap_list;
9ba692948   Hugh Dickins   ksm: fix oom dead...
2265
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2266
2267
2268
2269
  	/*
  	 * Nuke all the rmap_items that are above this current rmap:
  	 * because there were no VM_MERGEABLE vmas with such addresses.
  	 */
6514d511d   Hugh Dickins   ksm: singly-linke...
2270
  	remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2271
2272
  
  	spin_lock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
2273
2274
2275
2276
2277
2278
2279
  	ksm_scan.mm_slot = list_entry(slot->mm_list.next,
  						struct mm_slot, mm_list);
  	if (ksm_scan.address == 0) {
  		/*
  		 * We've completed a full scan of all vmas, holding mmap_sem
  		 * throughout, and found no VM_MERGEABLE: so do the same as
  		 * __ksm_exit does to remove this mm from all our lists now.
9ba692948   Hugh Dickins   ksm: fix oom dead...
2280
2281
2282
2283
  		 * This applies either when cleaning up after __ksm_exit
  		 * (but beware: we can reach here even before __ksm_exit),
  		 * or when all VM_MERGEABLE areas have been unmapped (and
  		 * mmap_sem then protects against race with MADV_MERGEABLE).
cd551f975   Hugh Dickins   ksm: distribute r...
2284
  		 */
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
2285
  		hash_del(&slot->link);
cd551f975   Hugh Dickins   ksm: distribute r...
2286
  		list_del(&slot->mm_list);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2287
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
2288
2289
  		free_mm_slot(slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2290
2291
2292
  		up_read(&mm->mmap_sem);
  		mmdrop(mm);
  	} else {
9ba692948   Hugh Dickins   ksm: fix oom dead...
2293
  		up_read(&mm->mmap_sem);
7496fea9a   Zhou Chengming   ksm: fix conflict...
2294
2295
2296
2297
2298
2299
2300
2301
  		/*
  		 * up_read(&mm->mmap_sem) first because after
  		 * spin_unlock(&ksm_mmlist_lock) run, the "mm" may
  		 * already have been freed under us by __ksm_exit()
  		 * because the "mm_slot" is still hashed and
  		 * ksm_scan.mm_slot doesn't point to it anymore.
  		 */
  		spin_unlock(&ksm_mmlist_lock);
cd551f975   Hugh Dickins   ksm: distribute r...
2302
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2303
2304
  
  	/* Repeat until we've completed scanning the whole list */
cd551f975   Hugh Dickins   ksm: distribute r...
2305
  	slot = ksm_scan.mm_slot;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2306
2307
  	if (slot != &ksm_mm_head)
  		goto next_mm;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2308
2309
2310
2311
2312
2313
  	ksm_scan.seqnr++;
  	return NULL;
  }
  
  /**
   * ksm_do_scan  - the ksm scanner main worker function.
b7701a5f2   Mike Rapoport   mm: docs: fixup p...
2314
   * @scan_npages:  number of pages we want to scan before we return.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2315
2316
2317
2318
   */
  static void ksm_do_scan(unsigned int scan_npages)
  {
  	struct rmap_item *rmap_item;
22eccdd7d   Dan Carpenter   ksm: check for ER...
2319
  	struct page *uninitialized_var(page);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2320

878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2321
  	while (scan_npages-- && likely(!freezing(current))) {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2322
2323
2324
2325
  		cond_resched();
  		rmap_item = scan_get_next_rmap_item(&page);
  		if (!rmap_item)
  			return;
4146d2d67   Hugh Dickins   ksm: make !merge_...
2326
  		cmp_and_merge_page(page, rmap_item);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2327
2328
2329
  		put_page(page);
  	}
  }
6e1583842   Hugh Dickins   ksm: keep quiet w...
2330
2331
2332
2333
  static int ksmd_should_run(void)
  {
  	return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
  }
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2334
2335
  static int ksm_scan_thread(void *nothing)
  {
fcf9a0ef8   Kirill Tkhai   ksm: react on cha...
2336
  	unsigned int sleep_ms;
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2337
  	set_freezable();
339aa6246   Izik Eidus   ksm: change ksm n...
2338
  	set_user_nice(current, 5);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2339
2340
  
  	while (!kthread_should_stop()) {
6e1583842   Hugh Dickins   ksm: keep quiet w...
2341
  		mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2342
  		wait_while_offlining();
6e1583842   Hugh Dickins   ksm: keep quiet w...
2343
  		if (ksmd_should_run())
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2344
  			ksm_do_scan(ksm_thread_pages_to_scan);
6e1583842   Hugh Dickins   ksm: keep quiet w...
2345
  		mutex_unlock(&ksm_thread_mutex);
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2346
  		try_to_freeze();
6e1583842   Hugh Dickins   ksm: keep quiet w...
2347
  		if (ksmd_should_run()) {
fcf9a0ef8   Kirill Tkhai   ksm: react on cha...
2348
2349
2350
2351
  			sleep_ms = READ_ONCE(ksm_thread_sleep_millisecs);
  			wait_event_interruptible_timeout(ksm_iter_wait,
  				sleep_ms != READ_ONCE(ksm_thread_sleep_millisecs),
  				msecs_to_jiffies(sleep_ms));
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2352
  		} else {
878aee7d6   Andrea Arcangeli   thp: freeze khuge...
2353
  			wait_event_freezable(ksm_thread_wait,
6e1583842   Hugh Dickins   ksm: keep quiet w...
2354
  				ksmd_should_run() || kthread_should_stop());
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2355
2356
2357
2358
  		}
  	}
  	return 0;
  }
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2359
2360
2361
2362
  int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
  		unsigned long end, int advice, unsigned long *vm_flags)
  {
  	struct mm_struct *mm = vma->vm_mm;
d952b7913   Hugh Dickins   ksm: fix endless ...
2363
  	int err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2364
2365
2366
2367
2368
2369
2370
2371
  
  	switch (advice) {
  	case MADV_MERGEABLE:
  		/*
  		 * Be somewhat over-protective for now!
  		 */
  		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
  				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
0661a3361   Kirill A. Shutemov   mm: remove rest u...
2372
  				 VM_HUGETLB | VM_MIXEDMAP))
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2373
  			return 0;		/* just ignore the advice */
e1fb4a086   Dave Jiang   dax: remove VM_MI...
2374
2375
  		if (vma_is_dax(vma))
  			return 0;
cc2383ec0   Konstantin Khlebnikov   mm: introduce arc...
2376
2377
2378
2379
  #ifdef VM_SAO
  		if (*vm_flags & VM_SAO)
  			return 0;
  #endif
74a049674   Khalid Aziz   sparc64: Add supp...
2380
2381
2382
2383
  #ifdef VM_SPARC_ADI
  		if (*vm_flags & VM_SPARC_ADI)
  			return 0;
  #endif
cc2383ec0   Konstantin Khlebnikov   mm: introduce arc...
2384

d952b7913   Hugh Dickins   ksm: fix endless ...
2385
2386
2387
2388
2389
  		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
  			err = __ksm_enter(mm);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2390
2391
2392
2393
2394
2395
2396
  
  		*vm_flags |= VM_MERGEABLE;
  		break;
  
  	case MADV_UNMERGEABLE:
  		if (!(*vm_flags & VM_MERGEABLE))
  			return 0;		/* just ignore the advice */
d952b7913   Hugh Dickins   ksm: fix endless ...
2397
2398
2399
2400
2401
  		if (vma->anon_vma) {
  			err = unmerge_ksm_pages(vma, start, end);
  			if (err)
  				return err;
  		}
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
  
  		*vm_flags &= ~VM_MERGEABLE;
  		break;
  	}
  
  	return 0;
  }
  
  int __ksm_enter(struct mm_struct *mm)
  {
6e1583842   Hugh Dickins   ksm: keep quiet w...
2412
2413
2414
2415
  	struct mm_slot *mm_slot;
  	int needs_wakeup;
  
  	mm_slot = alloc_mm_slot();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2416
2417
  	if (!mm_slot)
  		return -ENOMEM;
6e1583842   Hugh Dickins   ksm: keep quiet w...
2418
2419
  	/* Check ksm_run too?  Would need tighter locking */
  	needs_wakeup = list_empty(&ksm_mm_head.mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2420
2421
2422
  	spin_lock(&ksm_mmlist_lock);
  	insert_to_mm_slots_hash(mm, mm_slot);
  	/*
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2423
2424
  	 * When KSM_RUN_MERGE (or KSM_RUN_STOP),
  	 * insert just behind the scanning cursor, to let the area settle
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2425
2426
  	 * down a little; when fork is followed by immediate exec, we don't
  	 * want ksmd to waste time setting up and tearing down an rmap_list.
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2427
2428
2429
2430
  	 *
  	 * But when KSM_RUN_UNMERGE, it's important to insert ahead of its
  	 * scanning cursor, otherwise KSM pages in newly forked mms will be
  	 * missed: then we might as well insert at the end of the list.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2431
  	 */
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2432
2433
2434
2435
  	if (ksm_run & KSM_RUN_UNMERGE)
  		list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
  	else
  		list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2436
  	spin_unlock(&ksm_mmlist_lock);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2437
  	set_bit(MMF_VM_MERGEABLE, &mm->flags);
f1f100764   Vegard Nossum   mm: add new mmgra...
2438
  	mmgrab(mm);
6e1583842   Hugh Dickins   ksm: keep quiet w...
2439
2440
2441
  
  	if (needs_wakeup)
  		wake_up_interruptible(&ksm_thread_wait);
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2442
2443
  	return 0;
  }
1c2fb7a4c   Andrea Arcangeli   ksm: fix deadlock...
2444
  void __ksm_exit(struct mm_struct *mm)
f8af4da3b   Hugh Dickins   ksm: the mm inter...
2445
  {
cd551f975   Hugh Dickins   ksm: distribute r...
2446
  	struct mm_slot *mm_slot;
9ba692948   Hugh Dickins   ksm: fix oom dead...
2447
  	int easy_to_free = 0;
cd551f975   Hugh Dickins   ksm: distribute r...
2448

31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2449
  	/*
9ba692948   Hugh Dickins   ksm: fix oom dead...
2450
2451
2452
2453
2454
2455
  	 * This process is exiting: if it's straightforward (as is the
  	 * case when ksmd was never running), free mm_slot immediately.
  	 * But if it's at the cursor or has rmap_items linked to it, use
  	 * mmap_sem to synchronize with any break_cows before pagetables
  	 * are freed, and leave the mm_slot on the list for ksmd to free.
  	 * Beware: ksm may already have noticed it exiting and freed the slot.
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2456
  	 */
9ba692948   Hugh Dickins   ksm: fix oom dead...
2457

cd551f975   Hugh Dickins   ksm: distribute r...
2458
2459
  	spin_lock(&ksm_mmlist_lock);
  	mm_slot = get_mm_slot(mm);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2460
  	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
6514d511d   Hugh Dickins   ksm: singly-linke...
2461
  		if (!mm_slot->rmap_list) {
4ca3a69bc   Sasha Levin   mm/ksm.c: use new...
2462
  			hash_del(&mm_slot->link);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2463
2464
2465
2466
2467
2468
  			list_del(&mm_slot->mm_list);
  			easy_to_free = 1;
  		} else {
  			list_move(&mm_slot->mm_list,
  				  &ksm_scan.mm_slot->mm_list);
  		}
cd551f975   Hugh Dickins   ksm: distribute r...
2469
  	}
cd551f975   Hugh Dickins   ksm: distribute r...
2470
  	spin_unlock(&ksm_mmlist_lock);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2471
2472
2473
2474
2475
  	if (easy_to_free) {
  		free_mm_slot(mm_slot);
  		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
  		mmdrop(mm);
  	} else if (mm_slot) {
9ba692948   Hugh Dickins   ksm: fix oom dead...
2476
2477
  		down_write(&mm->mmap_sem);
  		up_write(&mm->mmap_sem);
9ba692948   Hugh Dickins   ksm: fix oom dead...
2478
  	}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2479
  }
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2480
  struct page *ksm_might_need_to_copy(struct page *page,
5ad646880   Hugh Dickins   ksm: let shared p...
2481
2482
  			struct vm_area_struct *vma, unsigned long address)
  {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2483
  	struct anon_vma *anon_vma = page_anon_vma(page);
5ad646880   Hugh Dickins   ksm: let shared p...
2484
  	struct page *new_page;
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
  	if (PageKsm(page)) {
  		if (page_stable_node(page) &&
  		    !(ksm_run & KSM_RUN_UNMERGE))
  			return page;	/* no need to copy it */
  	} else if (!anon_vma) {
  		return page;		/* no need to copy it */
  	} else if (anon_vma->root == vma->anon_vma->root &&
  		 page->index == linear_page_index(vma, address)) {
  		return page;		/* still no need to copy it */
  	}
  	if (!PageUptodate(page))
  		return page;		/* let do_swap_page report the error */
5ad646880   Hugh Dickins   ksm: let shared p...
2497
2498
2499
2500
2501
2502
  	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
  	if (new_page) {
  		copy_user_highpage(new_page, page, address, vma);
  
  		SetPageDirty(new_page);
  		__SetPageUptodate(new_page);
48c935ad8   Kirill A. Shutemov   page-flags: defin...
2503
  		__SetPageLocked(new_page);
5ad646880   Hugh Dickins   ksm: let shared p...
2504
  	}
5ad646880   Hugh Dickins   ksm: let shared p...
2505
2506
  	return new_page;
  }
1df631ae1   Minchan Kim   mm: make rmap_wal...
2507
  void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2508
2509
  {
  	struct stable_node *stable_node;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2510
  	struct rmap_item *rmap_item;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2511
  	int search_new_forks = 0;
309381fea   Sasha Levin   mm: dump page whe...
2512
  	VM_BUG_ON_PAGE(!PageKsm(page), page);
9f32624be   Joonsoo Kim   mm/rmap: use rmap...
2513
2514
2515
2516
2517
  
  	/*
  	 * Rely on the page lock to protect against concurrent modifications
  	 * to that page's node of the stable tree.
  	 */
309381fea   Sasha Levin   mm: dump page whe...
2518
  	VM_BUG_ON_PAGE(!PageLocked(page), page);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2519
2520
2521
  
  	stable_node = page_stable_node(page);
  	if (!stable_node)
1df631ae1   Minchan Kim   mm: make rmap_wal...
2522
  		return;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2523
  again:
b67bfe0d4   Sasha Levin   hlist: drop the n...
2524
  	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2525
  		struct anon_vma *anon_vma = rmap_item->anon_vma;
5beb49305   Rik van Riel   mm: change anon_v...
2526
  		struct anon_vma_chain *vmac;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2527
  		struct vm_area_struct *vma;
ad12695f1   Andrea Arcangeli   ksm: add cond_res...
2528
  		cond_resched();
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
2529
  		anon_vma_lock_read(anon_vma);
bf181b9f9   Michel Lespinasse   mm anon rmap: rep...
2530
2531
  		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
  					       0, ULONG_MAX) {
1105a2fc0   Jia He   mm/ksm.c: ignore ...
2532
  			unsigned long addr;
ad12695f1   Andrea Arcangeli   ksm: add cond_res...
2533
  			cond_resched();
5beb49305   Rik van Riel   mm: change anon_v...
2534
  			vma = vmac->vma;
1105a2fc0   Jia He   mm/ksm.c: ignore ...
2535
2536
2537
2538
2539
  
  			/* Ignore the stable/unstable/sqnr flags */
  			addr = rmap_item->address & ~KSM_FLAG_MASK;
  
  			if (addr < vma->vm_start || addr >= vma->vm_end)
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2540
2541
2542
2543
2544
2545
2546
2547
2548
  				continue;
  			/*
  			 * Initially we examine only the vma which covers this
  			 * rmap_item; but later, if there is still work to do,
  			 * we examine covering vmas in other mms: in case they
  			 * were forked from the original since ksmd passed.
  			 */
  			if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
  				continue;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
2549
2550
  			if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
  				continue;
1105a2fc0   Jia He   mm/ksm.c: ignore ...
2551
  			if (!rwc->rmap_one(page, vma, addr, rwc->arg)) {
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
2552
  				anon_vma_unlock_read(anon_vma);
1df631ae1   Minchan Kim   mm: make rmap_wal...
2553
  				return;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2554
  			}
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
2555
2556
  			if (rwc->done && rwc->done(page)) {
  				anon_vma_unlock_read(anon_vma);
1df631ae1   Minchan Kim   mm: make rmap_wal...
2557
  				return;
0dd1c7bbc   Joonsoo Kim   mm/rmap: extend r...
2558
  			}
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2559
  		}
b6b19f25f   Hugh Dickins   ksm: make rmap wa...
2560
  		anon_vma_unlock_read(anon_vma);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2561
2562
2563
  	}
  	if (!search_new_forks++)
  		goto again;
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2564
  }
52d1e606e   Kirill Tkhai   mm: reuse only-pt...
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
  bool reuse_ksm_page(struct page *page,
  		    struct vm_area_struct *vma,
  		    unsigned long address)
  {
  #ifdef CONFIG_DEBUG_VM
  	if (WARN_ON(is_zero_pfn(page_to_pfn(page))) ||
  			WARN_ON(!page_mapped(page)) ||
  			WARN_ON(!PageLocked(page))) {
  		dump_page(page, "reuse_ksm_page");
  		return false;
  	}
  #endif
  
  	if (PageSwapCache(page) || !page_stable_node(page))
  		return false;
  	/* Prohibit parallel get_ksm_page() */
  	if (!page_ref_freeze(page, 1))
  		return false;
  
  	page_move_anon_rmap(page, vma);
  	page->index = linear_page_index(vma, address);
  	page_ref_unfreeze(page, 1);
  
  	return true;
  }
526295064   Joonsoo Kim   mm/rmap: use rmap...
2590
  #ifdef CONFIG_MIGRATION
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2591
2592
2593
  void ksm_migrate_page(struct page *newpage, struct page *oldpage)
  {
  	struct stable_node *stable_node;
309381fea   Sasha Levin   mm: dump page whe...
2594
2595
2596
  	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
  	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
  	VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2597
2598
2599
  
  	stable_node = page_stable_node(newpage);
  	if (stable_node) {
309381fea   Sasha Levin   mm: dump page whe...
2600
  		VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage);
62b61f611   Hugh Dickins   ksm: memory hotre...
2601
  		stable_node->kpfn = page_to_pfn(newpage);
c8d6553b9   Hugh Dickins   ksm: make KSM pag...
2602
2603
2604
2605
2606
2607
2608
2609
  		/*
  		 * newpage->mapping was set in advance; now we need smp_wmb()
  		 * to make sure that the new stable_node->kpfn is visible
  		 * to get_ksm_page() before it can see that oldpage->mapping
  		 * has gone stale (or that PageSwapCache has been cleared).
  		 */
  		smp_wmb();
  		set_page_stable_node(oldpage, NULL);
e9995ef97   Hugh Dickins   ksm: rmap_walk to...
2610
2611
2612
  	}
  }
  #endif /* CONFIG_MIGRATION */
62b61f611   Hugh Dickins   ksm: memory hotre...
2613
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2614
2615
2616
2617
2618
  static void wait_while_offlining(void)
  {
  	while (ksm_run & KSM_RUN_OFFLINE) {
  		mutex_unlock(&ksm_thread_mutex);
  		wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
743162013   NeilBrown   sched: Remove pro...
2619
  			    TASK_UNINTERRUPTIBLE);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2620
2621
2622
  		mutex_lock(&ksm_thread_mutex);
  	}
  }
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
  static bool stable_node_dup_remove_range(struct stable_node *stable_node,
  					 unsigned long start_pfn,
  					 unsigned long end_pfn)
  {
  	if (stable_node->kpfn >= start_pfn &&
  	    stable_node->kpfn < end_pfn) {
  		/*
  		 * Don't get_ksm_page, page has already gone:
  		 * which is why we keep kpfn instead of page*
  		 */
  		remove_node_from_stable_tree(stable_node);
  		return true;
  	}
  	return false;
  }
  
  static bool stable_node_chain_remove_range(struct stable_node *stable_node,
  					   unsigned long start_pfn,
  					   unsigned long end_pfn,
  					   struct rb_root *root)
  {
  	struct stable_node *dup;
  	struct hlist_node *hlist_safe;
  
  	if (!is_stable_node_chain(stable_node)) {
  		VM_BUG_ON(is_stable_node_dup(stable_node));
  		return stable_node_dup_remove_range(stable_node, start_pfn,
  						    end_pfn);
  	}
  
  	hlist_for_each_entry_safe(dup, hlist_safe,
  				  &stable_node->hlist, hlist_dup) {
  		VM_BUG_ON(!is_stable_node_dup(dup));
  		stable_node_dup_remove_range(dup, start_pfn, end_pfn);
  	}
  	if (hlist_empty(&stable_node->hlist)) {
  		free_stable_node_chain(stable_node, root);
  		return true; /* notify caller that tree was rebalanced */
  	} else
  		return false;
  }
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2664
2665
  static void ksm_check_stable_tree(unsigned long start_pfn,
  				  unsigned long end_pfn)
62b61f611   Hugh Dickins   ksm: memory hotre...
2666
  {
036404183   Geliang Tang   mm/ksm.c: use lis...
2667
  	struct stable_node *stable_node, *next;
62b61f611   Hugh Dickins   ksm: memory hotre...
2668
  	struct rb_node *node;
90bd6fd31   Petr Holasek   ksm: allow trees ...
2669
  	int nid;
62b61f611   Hugh Dickins   ksm: memory hotre...
2670

ef53d16cd   Hugh Dickins   ksm: allocate roo...
2671
2672
  	for (nid = 0; nid < ksm_nr_node_ids; nid++) {
  		node = rb_first(root_stable_tree + nid);
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2673
  		while (node) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
2674
  			stable_node = rb_entry(node, struct stable_node, node);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2675
2676
2677
2678
  			if (stable_node_chain_remove_range(stable_node,
  							   start_pfn, end_pfn,
  							   root_stable_tree +
  							   nid))
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2679
  				node = rb_first(root_stable_tree + nid);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2680
  			else
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2681
2682
  				node = rb_next(node);
  			cond_resched();
90bd6fd31   Petr Holasek   ksm: allow trees ...
2683
  		}
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2684
  	}
036404183   Geliang Tang   mm/ksm.c: use lis...
2685
  	list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) {
4146d2d67   Hugh Dickins   ksm: make !merge_...
2686
2687
2688
2689
2690
  		if (stable_node->kpfn >= start_pfn &&
  		    stable_node->kpfn < end_pfn)
  			remove_node_from_stable_tree(stable_node);
  		cond_resched();
  	}
62b61f611   Hugh Dickins   ksm: memory hotre...
2691
2692
2693
2694
2695
2696
  }
  
  static int ksm_memory_callback(struct notifier_block *self,
  			       unsigned long action, void *arg)
  {
  	struct memory_notify *mn = arg;
62b61f611   Hugh Dickins   ksm: memory hotre...
2697
2698
2699
2700
  
  	switch (action) {
  	case MEM_GOING_OFFLINE:
  		/*
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2701
2702
2703
2704
2705
  		 * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
  		 * and remove_all_stable_nodes() while memory is going offline:
  		 * it is unsafe for them to touch the stable tree at this time.
  		 * But unmerge_ksm_pages(), rmap lookups and other entry points
  		 * which do not need the ksm_thread_mutex are all safe.
62b61f611   Hugh Dickins   ksm: memory hotre...
2706
  		 */
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2707
2708
2709
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run |= KSM_RUN_OFFLINE;
  		mutex_unlock(&ksm_thread_mutex);
62b61f611   Hugh Dickins   ksm: memory hotre...
2710
2711
2712
2713
2714
2715
  		break;
  
  	case MEM_OFFLINE:
  		/*
  		 * Most of the work is done by page migration; but there might
  		 * be a few stable_nodes left over, still pointing to struct
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2716
2717
2718
  		 * pages which have been offlined: prune those from the tree,
  		 * otherwise get_ksm_page() might later try to access a
  		 * non-existent struct page.
62b61f611   Hugh Dickins   ksm: memory hotre...
2719
  		 */
ee0ea59cf   Hugh Dickins   ksm: reorganize k...
2720
2721
  		ksm_check_stable_tree(mn->start_pfn,
  				      mn->start_pfn + mn->nr_pages);
62b61f611   Hugh Dickins   ksm: memory hotre...
2722
2723
2724
  		/* fallthrough */
  
  	case MEM_CANCEL_OFFLINE:
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2725
2726
  		mutex_lock(&ksm_thread_mutex);
  		ksm_run &= ~KSM_RUN_OFFLINE;
62b61f611   Hugh Dickins   ksm: memory hotre...
2727
  		mutex_unlock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2728
2729
2730
  
  		smp_mb();	/* wake_up_bit advises this */
  		wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE));
62b61f611   Hugh Dickins   ksm: memory hotre...
2731
2732
2733
2734
  		break;
  	}
  	return NOTIFY_OK;
  }
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2735
2736
2737
2738
  #else
  static void wait_while_offlining(void)
  {
  }
62b61f611   Hugh Dickins   ksm: memory hotre...
2739
  #endif /* CONFIG_MEMORY_HOTREMOVE */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
2740
2741
2742
2743
  #ifdef CONFIG_SYSFS
  /*
   * This all compiles without CONFIG_SYSFS, but is a waste of space.
   */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
  #define KSM_ATTR_RO(_name) \
  	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
  #define KSM_ATTR(_name) \
  	static struct kobj_attribute _name##_attr = \
  		__ATTR(_name, 0644, _name##_show, _name##_store)
  
  static ssize_t sleep_millisecs_show(struct kobject *kobj,
  				    struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_sleep_millisecs);
  }
  
  static ssize_t sleep_millisecs_store(struct kobject *kobj,
  				     struct kobj_attribute *attr,
  				     const char *buf, size_t count)
  {
  	unsigned long msecs;
  	int err;
3dbb95f78   Jingoo Han   mm: replace stric...
2763
  	err = kstrtoul(buf, 10, &msecs);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2764
2765
2766
2767
  	if (err || msecs > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_sleep_millisecs = msecs;
fcf9a0ef8   Kirill Tkhai   ksm: react on cha...
2768
  	wake_up_interruptible(&ksm_iter_wait);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
  
  	return count;
  }
  KSM_ATTR(sleep_millisecs);
  
  static ssize_t pages_to_scan_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_thread_pages_to_scan);
  }
  
  static ssize_t pages_to_scan_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long nr_pages;
3dbb95f78   Jingoo Han   mm: replace stric...
2787
  	err = kstrtoul(buf, 10, &nr_pages);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
  	if (err || nr_pages > UINT_MAX)
  		return -EINVAL;
  
  	ksm_thread_pages_to_scan = nr_pages;
  
  	return count;
  }
  KSM_ATTR(pages_to_scan);
  
  static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
  			char *buf)
  {
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2800
2801
  	return sprintf(buf, "%lu
  ", ksm_run);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2802
2803
2804
2805
2806
2807
2808
  }
  
  static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
  			 const char *buf, size_t count)
  {
  	int err;
  	unsigned long flags;
3dbb95f78   Jingoo Han   mm: replace stric...
2809
  	err = kstrtoul(buf, 10, &flags);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2810
2811
2812
2813
2814
2815
2816
2817
  	if (err || flags > UINT_MAX)
  		return -EINVAL;
  	if (flags > KSM_RUN_UNMERGE)
  		return -EINVAL;
  
  	/*
  	 * KSM_RUN_MERGE sets ksmd running, and 0 stops it running.
  	 * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items,
d0f209f68   Hugh Dickins   ksm: remove unswa...
2818
2819
  	 * breaking COW to free the pages_shared (but leaves mm_slots
  	 * on the list for when ksmd may be set running again).
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2820
2821
2822
  	 */
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2823
  	wait_while_offlining();
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2824
2825
  	if (ksm_run != flags) {
  		ksm_run = flags;
d952b7913   Hugh Dickins   ksm: fix endless ...
2826
  		if (flags & KSM_RUN_UNMERGE) {
e1e12d2f3   David Rientjes   mm, oom: fix race...
2827
  			set_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2828
  			err = unmerge_and_remove_all_rmap_items();
e1e12d2f3   David Rientjes   mm, oom: fix race...
2829
  			clear_current_oom_origin();
d952b7913   Hugh Dickins   ksm: fix endless ...
2830
2831
2832
2833
2834
  			if (err) {
  				ksm_run = KSM_RUN_STOP;
  				count = err;
  			}
  		}
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
2835
2836
2837
2838
2839
2840
2841
2842
2843
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	if (flags & KSM_RUN_MERGE)
  		wake_up_interruptible(&ksm_thread_wait);
  
  	return count;
  }
  KSM_ATTR(run);
90bd6fd31   Petr Holasek   ksm: allow trees ...
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
  #ifdef CONFIG_NUMA
  static ssize_t merge_across_nodes_show(struct kobject *kobj,
  				struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_merge_across_nodes);
  }
  
  static ssize_t merge_across_nodes_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	unsigned long knob;
  
  	err = kstrtoul(buf, 10, &knob);
  	if (err)
  		return err;
  	if (knob > 1)
  		return -EINVAL;
  
  	mutex_lock(&ksm_thread_mutex);
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
2866
  	wait_while_offlining();
90bd6fd31   Petr Holasek   ksm: allow trees ...
2867
  	if (ksm_merge_across_nodes != knob) {
cbf86cfe0   Hugh Dickins   ksm: remove old s...
2868
  		if (ksm_pages_shared || remove_all_stable_nodes())
90bd6fd31   Petr Holasek   ksm: allow trees ...
2869
  			err = -EBUSY;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2870
2871
2872
2873
2874
2875
2876
2877
2878
  		else if (root_stable_tree == one_stable_tree) {
  			struct rb_root *buf;
  			/*
  			 * This is the first time that we switch away from the
  			 * default of merging across nodes: must now allocate
  			 * a buffer to hold as many roots as may be needed.
  			 * Allocate stable and unstable together:
  			 * MAXSMP NODES_SHIFT 10 will use 16kB.
  			 */
bafe1e144   Joe Perches   ksm: remove redun...
2879
2880
  			buf = kcalloc(nr_node_ids + nr_node_ids, sizeof(*buf),
  				      GFP_KERNEL);
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
  			/* Let us assume that RB_ROOT is NULL is zero */
  			if (!buf)
  				err = -ENOMEM;
  			else {
  				root_stable_tree = buf;
  				root_unstable_tree = buf + nr_node_ids;
  				/* Stable tree is empty but not the unstable */
  				root_unstable_tree[0] = one_unstable_tree[0];
  			}
  		}
  		if (!err) {
90bd6fd31   Petr Holasek   ksm: allow trees ...
2892
  			ksm_merge_across_nodes = knob;
ef53d16cd   Hugh Dickins   ksm: allocate roo...
2893
2894
  			ksm_nr_node_ids = knob ? 1 : nr_node_ids;
  		}
90bd6fd31   Petr Holasek   ksm: allow trees ...
2895
2896
2897
2898
2899
2900
2901
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	return err ? err : count;
  }
  KSM_ATTR(merge_across_nodes);
  #endif
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
  static ssize_t use_zero_pages_show(struct kobject *kobj,
  				struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_use_zero_pages);
  }
  static ssize_t use_zero_pages_store(struct kobject *kobj,
  				   struct kobj_attribute *attr,
  				   const char *buf, size_t count)
  {
  	int err;
  	bool value;
  
  	err = kstrtobool(buf, &value);
  	if (err)
  		return -EINVAL;
  
  	ksm_use_zero_pages = value;
  
  	return count;
  }
  KSM_ATTR(use_zero_pages);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
  static ssize_t max_page_sharing_show(struct kobject *kobj,
  				     struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_max_page_sharing);
  }
  
  static ssize_t max_page_sharing_store(struct kobject *kobj,
  				      struct kobj_attribute *attr,
  				      const char *buf, size_t count)
  {
  	int err;
  	int knob;
  
  	err = kstrtoint(buf, 10, &knob);
  	if (err)
  		return err;
  	/*
  	 * When a KSM page is created it is shared by 2 mappings. This
  	 * being a signed comparison, it implicitly verifies it's not
  	 * negative.
  	 */
  	if (knob < 2)
  		return -EINVAL;
  
  	if (READ_ONCE(ksm_max_page_sharing) == knob)
  		return count;
  
  	mutex_lock(&ksm_thread_mutex);
  	wait_while_offlining();
  	if (ksm_max_page_sharing != knob) {
  		if (ksm_pages_shared || remove_all_stable_nodes())
  			err = -EBUSY;
  		else
  			ksm_max_page_sharing = knob;
  	}
  	mutex_unlock(&ksm_thread_mutex);
  
  	return err ? err : count;
  }
  KSM_ATTR(max_page_sharing);
b40282603   Hugh Dickins   ksm: rename kerne...
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
  static ssize_t pages_shared_show(struct kobject *kobj,
  				 struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_shared);
  }
  KSM_ATTR_RO(pages_shared);
  
  static ssize_t pages_sharing_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
  {
e178dfde3   Hugh Dickins   ksm: move pages_s...
2976
2977
  	return sprintf(buf, "%lu
  ", ksm_pages_sharing);
b40282603   Hugh Dickins   ksm: rename kerne...
2978
2979
  }
  KSM_ATTR_RO(pages_sharing);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
  static ssize_t pages_unshared_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_pages_unshared);
  }
  KSM_ATTR_RO(pages_unshared);
  
  static ssize_t pages_volatile_show(struct kobject *kobj,
  				   struct kobj_attribute *attr, char *buf)
  {
  	long ksm_pages_volatile;
  
  	ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared
  				- ksm_pages_sharing - ksm_pages_unshared;
  	/*
  	 * It was not worth any locking to calculate that statistic,
  	 * but it might therefore sometimes be negative: conceal that.
  	 */
  	if (ksm_pages_volatile < 0)
  		ksm_pages_volatile = 0;
  	return sprintf(buf, "%ld
  ", ksm_pages_volatile);
  }
  KSM_ATTR_RO(pages_volatile);
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
  static ssize_t stable_node_dups_show(struct kobject *kobj,
  				     struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_stable_node_dups);
  }
  KSM_ATTR_RO(stable_node_dups);
  
  static ssize_t stable_node_chains_show(struct kobject *kobj,
  				       struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_stable_node_chains);
  }
  KSM_ATTR_RO(stable_node_chains);
  
  static ssize_t
  stable_node_chains_prune_millisecs_show(struct kobject *kobj,
  					struct kobj_attribute *attr,
  					char *buf)
  {
  	return sprintf(buf, "%u
  ", ksm_stable_node_chains_prune_millisecs);
  }
  
  static ssize_t
  stable_node_chains_prune_millisecs_store(struct kobject *kobj,
  					 struct kobj_attribute *attr,
  					 const char *buf, size_t count)
  {
  	unsigned long msecs;
  	int err;
  
  	err = kstrtoul(buf, 10, &msecs);
  	if (err || msecs > UINT_MAX)
  		return -EINVAL;
  
  	ksm_stable_node_chains_prune_millisecs = msecs;
  
  	return count;
  }
  KSM_ATTR(stable_node_chains_prune_millisecs);
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
3047
3048
3049
3050
3051
3052
3053
  static ssize_t full_scans_show(struct kobject *kobj,
  			       struct kobj_attribute *attr, char *buf)
  {
  	return sprintf(buf, "%lu
  ", ksm_scan.seqnr);
  }
  KSM_ATTR_RO(full_scans);
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3054
3055
3056
3057
  static struct attribute *ksm_attrs[] = {
  	&sleep_millisecs_attr.attr,
  	&pages_to_scan_attr.attr,
  	&run_attr.attr,
b40282603   Hugh Dickins   ksm: rename kerne...
3058
3059
  	&pages_shared_attr.attr,
  	&pages_sharing_attr.attr,
473b0ce4d   Hugh Dickins   ksm: pages_unshar...
3060
3061
3062
  	&pages_unshared_attr.attr,
  	&pages_volatile_attr.attr,
  	&full_scans_attr.attr,
90bd6fd31   Petr Holasek   ksm: allow trees ...
3063
3064
3065
  #ifdef CONFIG_NUMA
  	&merge_across_nodes_attr.attr,
  #endif
2c653d0ee   Andrea Arcangeli   ksm: introduce ks...
3066
3067
3068
3069
  	&max_page_sharing_attr.attr,
  	&stable_node_chains_attr.attr,
  	&stable_node_dups_attr.attr,
  	&stable_node_chains_prune_millisecs_attr.attr,
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
3070
  	&use_zero_pages_attr.attr,
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3071
3072
  	NULL,
  };
f907c26a9   Arvind Yadav   mm/ksm.c: constif...
3073
  static const struct attribute_group ksm_attr_group = {
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3074
3075
3076
  	.attrs = ksm_attrs,
  	.name = "ksm",
  };
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3077
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3078
3079
3080
3081
3082
  
  static int __init ksm_init(void)
  {
  	struct task_struct *ksm_thread;
  	int err;
e86c59b1b   Claudio Imbrenda   mm/ksm: improve d...
3083
3084
3085
3086
  	/* The correct value depends on page size and endianness */
  	zero_checksum = calc_checksum(ZERO_PAGE(0));
  	/* Default to false for backwards compatibility */
  	ksm_use_zero_pages = false;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3087
3088
3089
  	err = ksm_slab_init();
  	if (err)
  		goto out;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3090
3091
  	ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
  	if (IS_ERR(ksm_thread)) {
25acde317   Paul McQuade   mm: ksm use pr_er...
3092
3093
  		pr_err("ksm: creating kthread failed
  ");
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3094
  		err = PTR_ERR(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
3095
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3096
  	}
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3097
  #ifdef CONFIG_SYSFS
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3098
3099
  	err = sysfs_create_group(mm_kobj, &ksm_attr_group);
  	if (err) {
25acde317   Paul McQuade   mm: ksm use pr_er...
3100
3101
  		pr_err("ksm: register sysfs failed
  ");
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3102
  		kthread_stop(ksm_thread);
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
3103
  		goto out_free;
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3104
  	}
c73602ad3   Hugh Dickins   ksm: more on defa...
3105
3106
  #else
  	ksm_run = KSM_RUN_MERGE;	/* no way for user to start it */
2ffd8679c   Hugh Dickins   ksm: sysfs and de...
3107
  #endif /* CONFIG_SYSFS */
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3108

62b61f611   Hugh Dickins   ksm: memory hotre...
3109
  #ifdef CONFIG_MEMORY_HOTREMOVE
ef4d43a80   Hugh Dickins   ksm: stop hotremo...
3110
  	/* There is no significance to this priority 100 */
62b61f611   Hugh Dickins   ksm: memory hotre...
3111
3112
  	hotplug_memory_notifier(ksm_memory_callback, 100);
  #endif
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3113
  	return 0;
d9f8984c2   Lai Jiangshan   ksm: cleanup for ...
3114
  out_free:
31dbd01f3   Izik Eidus   ksm: Kernel SameP...
3115
3116
3117
  	ksm_slab_free();
  out:
  	return err;
f8af4da3b   Hugh Dickins   ksm: the mm inter...
3118
  }
a64fb3cd6   Paul Gortmaker   mm: audit/fix non...
3119
  subsys_initcall(ksm_init);