Blame view

mm/mmu_notifier.c 9.3 KB
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
1
2
3
4
5
6
7
8
9
10
11
12
13
  /*
   *  linux/mm/mmu_notifier.c
   *
   *  Copyright (C) 2008  Qumranet, Inc.
   *  Copyright (C) 2008  SGI
   *             Christoph Lameter <clameter@sgi.com>
   *
   *  This work is licensed under the terms of the GNU GPL, version 2. See
   *  the COPYING file in the top-level directory.
   */
  
  #include <linux/rculist.h>
  #include <linux/mmu_notifier.h>
b95f1b31b   Paul Gortmaker   mm: Map most file...
14
  #include <linux/export.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
15
16
  #include <linux/mm.h>
  #include <linux/err.h>
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
17
  #include <linux/srcu.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
18
19
  #include <linux/rcupdate.h>
  #include <linux/sched.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
20
  #include <linux/slab.h>
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
21

21a92735f   Sagi Grimberg   mm: mmu_notifier:...
22
  /* global SRCU for all MMs */
70400303c   Andrea Arcangeli   mm: mmu_notifier:...
23
  static struct srcu_struct srcu;
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
24

cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
25
26
27
28
29
30
31
  /*
   * This function can't run concurrently against mmu_notifier_register
   * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
   * runs with mm_users == 0. Other tasks may still invoke mmu notifiers
   * in parallel despite there being no task using this mm any more,
   * through the vmas outside of the exit_mmap context, such as with
   * vmtruncate. This serializes against mmu_notifier_unregister with
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
32
33
   * the mmu_notifier_mm->lock in addition to SRCU and it serializes
   * against the other mmu notifiers with SRCU. struct mmu_notifier_mm
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
34
35
36
37
38
39
   * can't go away from under us as exit_mmap holds an mm_count pin
   * itself.
   */
  void __mmu_notifier_release(struct mm_struct *mm)
  {
  	struct mmu_notifier *mn;
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
40
  	int id;
3ad3d901b   Xiao Guangrong   mm: mmu_notifier:...
41
42
  
  	/*
751efd861   Robin Holt   mmu_notifier_unre...
43
44
45
46
  	 * srcu_read_lock() here will block synchronize_srcu() in
  	 * mmu_notifier_unregister() until all registered
  	 * ->release() callouts this function makes have
  	 * returned.
3ad3d901b   Xiao Guangrong   mm: mmu_notifier:...
47
  	 */
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
48
  	id = srcu_read_lock(&srcu);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
49
50
51
52
53
  	spin_lock(&mm->mmu_notifier_mm->lock);
  	while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
  		mn = hlist_entry(mm->mmu_notifier_mm->list.first,
  				 struct mmu_notifier,
  				 hlist);
751efd861   Robin Holt   mmu_notifier_unre...
54

cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
55
  		/*
751efd861   Robin Holt   mmu_notifier_unre...
56
57
  		 * Unlink.  This will prevent mmu_notifier_unregister()
  		 * from also making the ->release() callout.
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
58
59
  		 */
  		hlist_del_init_rcu(&mn->hlist);
751efd861   Robin Holt   mmu_notifier_unre...
60
61
62
63
64
65
66
67
68
  		spin_unlock(&mm->mmu_notifier_mm->lock);
  
  		/*
  		 * Clear sptes. (see 'release' description in mmu_notifier.h)
  		 */
  		if (mn->ops->release)
  			mn->ops->release(mn, mm);
  
  		spin_lock(&mm->mmu_notifier_mm->lock);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
69
70
71
72
  	}
  	spin_unlock(&mm->mmu_notifier_mm->lock);
  
  	/*
751efd861   Robin Holt   mmu_notifier_unre...
73
74
75
76
77
78
79
80
81
82
83
84
  	 * All callouts to ->release() which we have done are complete.
  	 * Allow synchronize_srcu() in mmu_notifier_unregister() to complete
  	 */
  	srcu_read_unlock(&srcu, id);
  
  	/*
  	 * mmu_notifier_unregister() may have unlinked a notifier and may
  	 * still be calling out to it.	Additionally, other notifiers
  	 * may have been active via vmtruncate() et. al. Block here
  	 * to ensure that all notifier callouts for this mm have been
  	 * completed and the sptes are really cleaned up before returning
  	 * to exit_mmap().
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
85
  	 */
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
86
  	synchronize_srcu(&srcu);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
87
88
89
90
91
92
93
94
95
96
97
  }
  
  /*
   * If no young bitflag is supported by the hardware, ->clear_flush_young can
   * unmap the address and return 1 or 0 depending if the mapping previously
   * existed or not.
   */
  int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
  					unsigned long address)
  {
  	struct mmu_notifier *mn;
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
98
  	int young = 0, id;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
99

21a92735f   Sagi Grimberg   mm: mmu_notifier:...
100
  	id = srcu_read_lock(&srcu);
b67bfe0d4   Sasha Levin   hlist: drop the n...
101
  	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
102
103
104
  		if (mn->ops->clear_flush_young)
  			young |= mn->ops->clear_flush_young(mn, mm, address);
  	}
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
105
  	srcu_read_unlock(&srcu, id);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
106
107
108
  
  	return young;
  }
8ee53820e   Andrea Arcangeli   thp: mmu_notifier...
109
110
111
112
  int __mmu_notifier_test_young(struct mm_struct *mm,
  			      unsigned long address)
  {
  	struct mmu_notifier *mn;
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
113
  	int young = 0, id;
8ee53820e   Andrea Arcangeli   thp: mmu_notifier...
114

21a92735f   Sagi Grimberg   mm: mmu_notifier:...
115
  	id = srcu_read_lock(&srcu);
b67bfe0d4   Sasha Levin   hlist: drop the n...
116
  	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
8ee53820e   Andrea Arcangeli   thp: mmu_notifier...
117
118
119
120
121
122
  		if (mn->ops->test_young) {
  			young = mn->ops->test_young(mn, mm, address);
  			if (young)
  				break;
  		}
  	}
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
123
  	srcu_read_unlock(&srcu, id);
8ee53820e   Andrea Arcangeli   thp: mmu_notifier...
124
125
126
  
  	return young;
  }
828502d30   Izik Eidus   ksm: add mmu_noti...
127
128
129
130
  void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
  			       pte_t pte)
  {
  	struct mmu_notifier *mn;
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
131
  	int id;
828502d30   Izik Eidus   ksm: add mmu_noti...
132

21a92735f   Sagi Grimberg   mm: mmu_notifier:...
133
  	id = srcu_read_lock(&srcu);
b67bfe0d4   Sasha Levin   hlist: drop the n...
134
  	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
828502d30   Izik Eidus   ksm: add mmu_noti...
135
136
  		if (mn->ops->change_pte)
  			mn->ops->change_pte(mn, mm, address, pte);
828502d30   Izik Eidus   ksm: add mmu_noti...
137
  	}
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
138
  	srcu_read_unlock(&srcu, id);
828502d30   Izik Eidus   ksm: add mmu_noti...
139
  }
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
140
141
142
143
  void __mmu_notifier_invalidate_page(struct mm_struct *mm,
  					  unsigned long address)
  {
  	struct mmu_notifier *mn;
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
144
  	int id;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
145

21a92735f   Sagi Grimberg   mm: mmu_notifier:...
146
  	id = srcu_read_lock(&srcu);
b67bfe0d4   Sasha Levin   hlist: drop the n...
147
  	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
148
149
150
  		if (mn->ops->invalidate_page)
  			mn->ops->invalidate_page(mn, mm, address);
  	}
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
151
  	srcu_read_unlock(&srcu, id);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
152
153
154
155
156
157
  }
  
  void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
  				  unsigned long start, unsigned long end)
  {
  	struct mmu_notifier *mn;
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
158
  	int id;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
159

21a92735f   Sagi Grimberg   mm: mmu_notifier:...
160
  	id = srcu_read_lock(&srcu);
b67bfe0d4   Sasha Levin   hlist: drop the n...
161
  	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
162
163
164
  		if (mn->ops->invalidate_range_start)
  			mn->ops->invalidate_range_start(mn, mm, start, end);
  	}
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
165
  	srcu_read_unlock(&srcu, id);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
166
  }
fa794199e   Cliff Wickman   mm: export mmu no...
167
  EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
168
169
170
171
172
  
  void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
  				  unsigned long start, unsigned long end)
  {
  	struct mmu_notifier *mn;
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
173
  	int id;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
174

21a92735f   Sagi Grimberg   mm: mmu_notifier:...
175
  	id = srcu_read_lock(&srcu);
b67bfe0d4   Sasha Levin   hlist: drop the n...
176
  	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
177
178
179
  		if (mn->ops->invalidate_range_end)
  			mn->ops->invalidate_range_end(mn, mm, start, end);
  	}
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
180
  	srcu_read_unlock(&srcu, id);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
181
  }
fa794199e   Cliff Wickman   mm: export mmu no...
182
  EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
183
184
185
186
187
188
189
190
191
  
  static int do_mmu_notifier_register(struct mmu_notifier *mn,
  				    struct mm_struct *mm,
  				    int take_mmap_sem)
  {
  	struct mmu_notifier_mm *mmu_notifier_mm;
  	int ret;
  
  	BUG_ON(atomic_read(&mm->mm_users) <= 0);
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
192
  	/*
35cfa2b0b   Gavin Shan   mm/mmu_notifier: ...
193
194
195
  	 * Verify that mmu_notifier_init() already run and the global srcu is
  	 * initialized.
  	 */
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
196
  	BUG_ON(!srcu.per_cpu_ref);
35cfa2b0b   Gavin Shan   mm/mmu_notifier: ...
197
198
199
200
  	ret = -ENOMEM;
  	mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
  	if (unlikely(!mmu_notifier_mm))
  		goto out;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
201
202
203
204
  	if (take_mmap_sem)
  		down_write(&mm->mmap_sem);
  	ret = mm_take_all_locks(mm);
  	if (unlikely(ret))
35cfa2b0b   Gavin Shan   mm/mmu_notifier: ...
205
  		goto out_clean;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
206
207
208
209
  
  	if (!mm_has_notifiers(mm)) {
  		INIT_HLIST_HEAD(&mmu_notifier_mm->list);
  		spin_lock_init(&mmu_notifier_mm->lock);
e0f3c3f78   Gavin Shan   mm/mmu_notifier: ...
210

cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
211
  		mm->mmu_notifier_mm = mmu_notifier_mm;
35cfa2b0b   Gavin Shan   mm/mmu_notifier: ...
212
  		mmu_notifier_mm = NULL;
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
  	}
  	atomic_inc(&mm->mm_count);
  
  	/*
  	 * Serialize the update against mmu_notifier_unregister. A
  	 * side note: mmu_notifier_release can't run concurrently with
  	 * us because we hold the mm_users pin (either implicitly as
  	 * current->mm or explicitly with get_task_mm() or similar).
  	 * We can't race against any other mmu notifier method either
  	 * thanks to mm_take_all_locks().
  	 */
  	spin_lock(&mm->mmu_notifier_mm->lock);
  	hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list);
  	spin_unlock(&mm->mmu_notifier_mm->lock);
  
  	mm_drop_all_locks(mm);
35cfa2b0b   Gavin Shan   mm/mmu_notifier: ...
229
  out_clean:
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
230
231
  	if (take_mmap_sem)
  		up_write(&mm->mmap_sem);
35cfa2b0b   Gavin Shan   mm/mmu_notifier: ...
232
233
  	kfree(mmu_notifier_mm);
  out:
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
  	BUG_ON(atomic_read(&mm->mm_users) <= 0);
  	return ret;
  }
  
  /*
   * Must not hold mmap_sem nor any other VM related lock when calling
   * this registration function. Must also ensure mm_users can't go down
   * to zero while this runs to avoid races with mmu_notifier_release,
   * so mm has to be current->mm or the mm should be pinned safely such
   * as with get_task_mm(). If the mm is not current->mm, the mm_users
   * pin should be released by calling mmput after mmu_notifier_register
   * returns. mmu_notifier_unregister must be always called to
   * unregister the notifier. mm_count is automatically pinned to allow
   * mmu_notifier_unregister to safely run at any time later, before or
   * after exit_mmap. ->release will always be called before exit_mmap
   * frees the pages.
   */
  int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
  {
  	return do_mmu_notifier_register(mn, mm, 1);
  }
  EXPORT_SYMBOL_GPL(mmu_notifier_register);
  
  /*
   * Same as mmu_notifier_register but here the caller must hold the
   * mmap_sem in write mode.
   */
  int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
  {
  	return do_mmu_notifier_register(mn, mm, 0);
  }
  EXPORT_SYMBOL_GPL(__mmu_notifier_register);
  
  /* this is called after the last mmu_notifier_unregister() returned */
  void __mmu_notifier_mm_destroy(struct mm_struct *mm)
  {
  	BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list));
  	kfree(mm->mmu_notifier_mm);
  	mm->mmu_notifier_mm = LIST_POISON1; /* debug */
  }
  
  /*
   * This releases the mm_count pin automatically and frees the mm
   * structure if it was the last user of it. It serializes against
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
278
279
   * running mmu notifiers with SRCU and against mmu_notifier_unregister
   * with the unregister lock + SRCU. All sptes must be dropped before
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
280
281
282
283
284
285
286
287
   * calling mmu_notifier_unregister. ->release or any other notifier
   * method may be invoked concurrently with mmu_notifier_unregister,
   * and only after mmu_notifier_unregister returned we're guaranteed
   * that ->release or any other method can't run anymore.
   */
  void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
  {
  	BUG_ON(atomic_read(&mm->mm_count) <= 0);
751efd861   Robin Holt   mmu_notifier_unre...
288
  	spin_lock(&mm->mmu_notifier_mm->lock);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
289
  	if (!hlist_unhashed(&mn->hlist)) {
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
290
  		int id;
3ad3d901b   Xiao Guangrong   mm: mmu_notifier:...
291

cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
292
  		/*
751efd861   Robin Holt   mmu_notifier_unre...
293
  		 * Ensure we synchronize up with __mmu_notifier_release().
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
294
  		 */
751efd861   Robin Holt   mmu_notifier_unre...
295
296
297
298
  		id = srcu_read_lock(&srcu);
  
  		hlist_del_rcu(&mn->hlist);
  		spin_unlock(&mm->mmu_notifier_mm->lock);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
299
300
  		if (mn->ops->release)
  			mn->ops->release(mn, mm);
3ad3d901b   Xiao Guangrong   mm: mmu_notifier:...
301

751efd861   Robin Holt   mmu_notifier_unre...
302
303
304
305
306
  		/*
  		 * Allow __mmu_notifier_release() to complete.
  		 */
  		srcu_read_unlock(&srcu, id);
  	} else
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
307
308
309
  		spin_unlock(&mm->mmu_notifier_mm->lock);
  
  	/*
751efd861   Robin Holt   mmu_notifier_unre...
310
311
  	 * Wait for any running method to finish, including ->release() if it
  	 * was run by __mmu_notifier_release() instead of us.
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
312
  	 */
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
313
  	synchronize_srcu(&srcu);
cddb8a5c1   Andrea Arcangeli   mmu-notifiers: core
314
315
316
317
318
319
  
  	BUG_ON(atomic_read(&mm->mm_count) <= 0);
  
  	mmdrop(mm);
  }
  EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
21a92735f   Sagi Grimberg   mm: mmu_notifier:...
320
321
322
323
324
325
326
  
  static int __init mmu_notifier_init(void)
  {
  	return init_srcu_struct(&srcu);
  }
  
  module_init(mmu_notifier_init);