Blame view
mm/mmu_notifier.c
34.6 KB
20c8ccb19 treewide: Replace... |
1 |
// SPDX-License-Identifier: GPL-2.0-only |
cddb8a5c1 mmu-notifiers: core |
2 3 4 5 6 |
/* * linux/mm/mmu_notifier.c * * Copyright (C) 2008 Qumranet, Inc. * Copyright (C) 2008 SGI |
93e205a72 fix Christoph's e... |
7 |
* Christoph Lameter <cl@linux.com> |
cddb8a5c1 mmu-notifiers: core |
8 9 10 11 |
*/ #include <linux/rculist.h> #include <linux/mmu_notifier.h> |
b95f1b31b mm: Map most file... |
12 |
#include <linux/export.h> |
cddb8a5c1 mmu-notifiers: core |
13 14 |
#include <linux/mm.h> #include <linux/err.h> |
99cb252f5 mm/mmu_notifier: ... |
15 |
#include <linux/interval_tree.h> |
21a92735f mm: mmu_notifier:... |
16 |
#include <linux/srcu.h> |
cddb8a5c1 mmu-notifiers: core |
17 18 |
#include <linux/rcupdate.h> #include <linux/sched.h> |
6e84f3152 sched/headers: Pr... |
19 |
#include <linux/sched/mm.h> |
5a0e3ad6a include cleanup: ... |
20 |
#include <linux/slab.h> |
cddb8a5c1 mmu-notifiers: core |
21 |
|
21a92735f mm: mmu_notifier:... |
22 |
/* global SRCU for all MMs */ |
dde8da6cf mm: Use static in... |
23 |
DEFINE_STATIC_SRCU(srcu); |
21a92735f mm: mmu_notifier:... |
24 |
|
23b68395c mm/mmu_notifiers:... |
25 26 27 28 29 |
#ifdef CONFIG_LOCKDEP struct lockdep_map __mmu_notifier_invalidate_range_start_map = { .name = "mmu_notifier_invalidate_range_start" }; #endif |
cddb8a5c1 mmu-notifiers: core |
30 |
/* |
984cfe4e2 mm/mmu_notifier: ... |
31 32 |
* The mmu_notifier_subscriptions structure is allocated and installed in * mm->notifier_subscriptions inside the mm_take_all_locks() protected |
56f434f40 mm/mmu_notifier: ... |
33 34 35 |
* critical section and it's released only when mm_count reaches zero * in mmdrop(). */ |
984cfe4e2 mm/mmu_notifier: ... |
36 |
struct mmu_notifier_subscriptions { |
56f434f40 mm/mmu_notifier: ... |
37 38 |
/* all mmu notifiers registered in this mm are queued in this list */ struct hlist_head list; |
99cb252f5 mm/mmu_notifier: ... |
39 |
bool has_itree; |
56f434f40 mm/mmu_notifier: ... |
40 41 |
/* to serialize the list modifications and hlist_unhashed */ spinlock_t lock; |
99cb252f5 mm/mmu_notifier: ... |
42 43 44 45 46 |
unsigned long invalidate_seq; unsigned long active_invalidate_ranges; struct rb_root_cached itree; wait_queue_head_t wq; struct hlist_head deferred_list; |
56f434f40 mm/mmu_notifier: ... |
47 48 49 |
}; /* |
99cb252f5 mm/mmu_notifier: ... |
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
* This is a collision-retry read-side/write-side 'lock', a lot like a * seqcount, however this allows multiple write-sides to hold it at * once. Conceptually the write side is protecting the values of the PTEs in * this mm, such that PTES cannot be read into SPTEs (shadow PTEs) while any * writer exists. * * Note that the core mm creates nested invalidate_range_start()/end() regions * within the same thread, and runs invalidate_range_start()/end() in parallel * on multiple CPUs. This is designed to not reduce concurrency or block * progress on the mm side. * * As a secondary function, holding the full write side also serves to prevent * writers for the itree, this is an optimization to avoid extra locking * during invalidate_range_start/end notifiers. * * The write side has two states, fully excluded: * - mm->active_invalidate_ranges != 0 |
984cfe4e2 mm/mmu_notifier: ... |
67 |
* - subscriptions->invalidate_seq & 1 == True (odd) |
99cb252f5 mm/mmu_notifier: ... |
68 69 70 71 72 |
* - some range on the mm_struct is being invalidated * - the itree is not allowed to change * * And partially excluded: * - mm->active_invalidate_ranges != 0 |
984cfe4e2 mm/mmu_notifier: ... |
73 |
* - subscriptions->invalidate_seq & 1 == False (even) |
99cb252f5 mm/mmu_notifier: ... |
74 75 76 |
* - some range on the mm_struct is being invalidated * - the itree is allowed to change * |
984cfe4e2 mm/mmu_notifier: ... |
77 |
* Operations on notifier_subscriptions->invalidate_seq (under spinlock): |
99cb252f5 mm/mmu_notifier: ... |
78 79 80 81 82 |
* seq |= 1 # Begin writing * seq++ # Release the writing state * seq & 1 # True if a writer exists * * The later state avoids some expensive work on inv_end in the common case of |
5292e24a6 mm/mmu_notifiers:... |
83 |
* no mmu_interval_notifier monitoring the VA. |
99cb252f5 mm/mmu_notifier: ... |
84 |
*/ |
984cfe4e2 mm/mmu_notifier: ... |
85 86 |
static bool mn_itree_is_invalidating(struct mmu_notifier_subscriptions *subscriptions) |
99cb252f5 mm/mmu_notifier: ... |
87 |
{ |
984cfe4e2 mm/mmu_notifier: ... |
88 89 |
lockdep_assert_held(&subscriptions->lock); return subscriptions->invalidate_seq & 1; |
99cb252f5 mm/mmu_notifier: ... |
90 91 92 |
} static struct mmu_interval_notifier * |
984cfe4e2 mm/mmu_notifier: ... |
93 |
mn_itree_inv_start_range(struct mmu_notifier_subscriptions *subscriptions, |
99cb252f5 mm/mmu_notifier: ... |
94 95 96 97 98 |
const struct mmu_notifier_range *range, unsigned long *seq) { struct interval_tree_node *node; struct mmu_interval_notifier *res = NULL; |
984cfe4e2 mm/mmu_notifier: ... |
99 100 101 |
spin_lock(&subscriptions->lock); subscriptions->active_invalidate_ranges++; node = interval_tree_iter_first(&subscriptions->itree, range->start, |
99cb252f5 mm/mmu_notifier: ... |
102 103 |
range->end - 1); if (node) { |
984cfe4e2 mm/mmu_notifier: ... |
104 |
subscriptions->invalidate_seq |= 1; |
99cb252f5 mm/mmu_notifier: ... |
105 106 107 |
res = container_of(node, struct mmu_interval_notifier, interval_tree); } |
984cfe4e2 mm/mmu_notifier: ... |
108 109 |
*seq = subscriptions->invalidate_seq; spin_unlock(&subscriptions->lock); |
99cb252f5 mm/mmu_notifier: ... |
110 111 112 113 |
return res; } static struct mmu_interval_notifier * |
5292e24a6 mm/mmu_notifiers:... |
114 |
mn_itree_inv_next(struct mmu_interval_notifier *interval_sub, |
99cb252f5 mm/mmu_notifier: ... |
115 116 117 |
const struct mmu_notifier_range *range) { struct interval_tree_node *node; |
5292e24a6 mm/mmu_notifiers:... |
118 119 |
node = interval_tree_iter_next(&interval_sub->interval_tree, range->start, range->end - 1); |
99cb252f5 mm/mmu_notifier: ... |
120 121 122 123 |
if (!node) return NULL; return container_of(node, struct mmu_interval_notifier, interval_tree); } |
984cfe4e2 mm/mmu_notifier: ... |
124 |
static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions) |
99cb252f5 mm/mmu_notifier: ... |
125 |
{ |
5292e24a6 mm/mmu_notifiers:... |
126 |
struct mmu_interval_notifier *interval_sub; |
99cb252f5 mm/mmu_notifier: ... |
127 |
struct hlist_node *next; |
984cfe4e2 mm/mmu_notifier: ... |
128 129 130 131 |
spin_lock(&subscriptions->lock); if (--subscriptions->active_invalidate_ranges || !mn_itree_is_invalidating(subscriptions)) { spin_unlock(&subscriptions->lock); |
99cb252f5 mm/mmu_notifier: ... |
132 133 134 135 |
return; } /* Make invalidate_seq even */ |
984cfe4e2 mm/mmu_notifier: ... |
136 |
subscriptions->invalidate_seq++; |
99cb252f5 mm/mmu_notifier: ... |
137 138 139 140 141 142 143 |
/* * The inv_end incorporates a deferred mechanism like rtnl_unlock(). * Adds and removes are queued until the final inv_end happens then * they are progressed. This arrangement for tree updates is used to * avoid using a blocking lock during invalidate_range_start. */ |
5292e24a6 mm/mmu_notifiers:... |
144 145 |
hlist_for_each_entry_safe(interval_sub, next, &subscriptions->deferred_list, |
99cb252f5 mm/mmu_notifier: ... |
146 |
deferred_item) { |
5292e24a6 mm/mmu_notifiers:... |
147 148 |
if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) interval_tree_insert(&interval_sub->interval_tree, |
984cfe4e2 mm/mmu_notifier: ... |
149 |
&subscriptions->itree); |
99cb252f5 mm/mmu_notifier: ... |
150 |
else |
5292e24a6 mm/mmu_notifiers:... |
151 |
interval_tree_remove(&interval_sub->interval_tree, |
984cfe4e2 mm/mmu_notifier: ... |
152 |
&subscriptions->itree); |
5292e24a6 mm/mmu_notifiers:... |
153 |
hlist_del(&interval_sub->deferred_item); |
99cb252f5 mm/mmu_notifier: ... |
154 |
} |
984cfe4e2 mm/mmu_notifier: ... |
155 |
spin_unlock(&subscriptions->lock); |
99cb252f5 mm/mmu_notifier: ... |
156 |
|
984cfe4e2 mm/mmu_notifier: ... |
157 |
wake_up_all(&subscriptions->wq); |
99cb252f5 mm/mmu_notifier: ... |
158 159 160 161 162 |
} /** * mmu_interval_read_begin - Begin a read side critical section against a VA * range |
d49653f35 mm: mmu_notifier:... |
163 |
* @interval_sub: The interval subscription |
99cb252f5 mm/mmu_notifier: ... |
164 165 |
* * mmu_iterval_read_begin()/mmu_iterval_read_retry() implement a |
5292e24a6 mm/mmu_notifiers:... |
166 167 168 |
* collision-retry scheme similar to seqcount for the VA range under * subscription. If the mm invokes invalidation during the critical section * then mmu_interval_read_retry() will return true. |
99cb252f5 mm/mmu_notifier: ... |
169 170 171 172 173 174 175 176 177 178 |
* * This is useful to obtain shadow PTEs where teardown or setup of the SPTEs * require a blocking context. The critical region formed by this can sleep, * and the required 'user_lock' can also be a sleeping lock. * * The caller is required to provide a 'user_lock' to serialize both teardown * and setup. * * The return value should be passed to mmu_interval_read_retry(). */ |
5292e24a6 mm/mmu_notifiers:... |
179 180 |
unsigned long mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub) |
99cb252f5 mm/mmu_notifier: ... |
181 |
{ |
984cfe4e2 mm/mmu_notifier: ... |
182 |
struct mmu_notifier_subscriptions *subscriptions = |
5292e24a6 mm/mmu_notifiers:... |
183 |
interval_sub->mm->notifier_subscriptions; |
99cb252f5 mm/mmu_notifier: ... |
184 185 186 187 |
unsigned long seq; bool is_invalidating; /* |
5292e24a6 mm/mmu_notifiers:... |
188 189 |
* If the subscription has a different seq value under the user_lock * than we started with then it has collided. |
99cb252f5 mm/mmu_notifier: ... |
190 |
* |
5292e24a6 mm/mmu_notifiers:... |
191 192 193 |
* If the subscription currently has the same seq value as the * subscriptions seq, then it is currently between * invalidate_start/end and is colliding. |
99cb252f5 mm/mmu_notifier: ... |
194 195 196 197 |
* * The locking looks broadly like this: * mn_tree_invalidate_start(): mmu_interval_read_begin(): * spin_lock |
5292e24a6 mm/mmu_notifiers:... |
198 |
* seq = READ_ONCE(interval_sub->invalidate_seq); |
984cfe4e2 mm/mmu_notifier: ... |
199 |
* seq == subs->invalidate_seq |
99cb252f5 mm/mmu_notifier: ... |
200 201 |
* spin_unlock * spin_lock |
984cfe4e2 mm/mmu_notifier: ... |
202 |
* seq = ++subscriptions->invalidate_seq |
99cb252f5 mm/mmu_notifier: ... |
203 204 205 206 |
* spin_unlock * op->invalidate_range(): * user_lock * mmu_interval_set_seq() |
5292e24a6 mm/mmu_notifiers:... |
207 |
* interval_sub->invalidate_seq = seq |
99cb252f5 mm/mmu_notifier: ... |
208 209 210 211 212 213 |
* user_unlock * * [Required: mmu_interval_read_retry() == true] * * mn_itree_inv_end(): * spin_lock |
984cfe4e2 mm/mmu_notifier: ... |
214 |
* seq = ++subscriptions->invalidate_seq |
99cb252f5 mm/mmu_notifier: ... |
215 216 217 218 |
* spin_unlock * * user_lock * mmu_interval_read_retry(): |
5292e24a6 mm/mmu_notifiers:... |
219 |
* interval_sub->invalidate_seq != seq |
99cb252f5 mm/mmu_notifier: ... |
220 221 222 223 224 225 |
* user_unlock * * Barriers are not needed here as any races here are closed by an * eventual mmu_interval_read_retry(), which provides a barrier via the * user_lock. */ |
984cfe4e2 mm/mmu_notifier: ... |
226 |
spin_lock(&subscriptions->lock); |
99cb252f5 mm/mmu_notifier: ... |
227 |
/* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ |
5292e24a6 mm/mmu_notifiers:... |
228 |
seq = READ_ONCE(interval_sub->invalidate_seq); |
984cfe4e2 mm/mmu_notifier: ... |
229 230 |
is_invalidating = seq == subscriptions->invalidate_seq; spin_unlock(&subscriptions->lock); |
99cb252f5 mm/mmu_notifier: ... |
231 232 |
/* |
5292e24a6 mm/mmu_notifiers:... |
233 |
* interval_sub->invalidate_seq must always be set to an odd value via |
99cb252f5 mm/mmu_notifier: ... |
234 235 236 |
* mmu_interval_set_seq() using the provided cur_seq from * mn_itree_inv_start_range(). This ensures that if seq does wrap we * will always clear the below sleep in some reasonable time as |
984cfe4e2 mm/mmu_notifier: ... |
237 |
* subscriptions->invalidate_seq is even in the idle state. |
99cb252f5 mm/mmu_notifier: ... |
238 239 240 241 |
*/ lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); lock_map_release(&__mmu_notifier_invalidate_range_start_map); if (is_invalidating) |
984cfe4e2 mm/mmu_notifier: ... |
242 243 |
wait_event(subscriptions->wq, READ_ONCE(subscriptions->invalidate_seq) != seq); |
99cb252f5 mm/mmu_notifier: ... |
244 245 246 247 248 249 250 251 252 253 |
/* * Notice that mmu_interval_read_retry() can already be true at this * point, avoiding loops here allows the caller to provide a global * time bound. */ return seq; } EXPORT_SYMBOL_GPL(mmu_interval_read_begin); |
984cfe4e2 mm/mmu_notifier: ... |
254 |
static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions, |
99cb252f5 mm/mmu_notifier: ... |
255 256 257 258 259 260 261 262 263 |
struct mm_struct *mm) { struct mmu_notifier_range range = { .flags = MMU_NOTIFIER_RANGE_BLOCKABLE, .event = MMU_NOTIFY_RELEASE, .mm = mm, .start = 0, .end = ULONG_MAX, }; |
5292e24a6 mm/mmu_notifiers:... |
264 |
struct mmu_interval_notifier *interval_sub; |
99cb252f5 mm/mmu_notifier: ... |
265 266 |
unsigned long cur_seq; bool ret; |
5292e24a6 mm/mmu_notifiers:... |
267 268 269 270 271 272 |
for (interval_sub = mn_itree_inv_start_range(subscriptions, &range, &cur_seq); interval_sub; interval_sub = mn_itree_inv_next(interval_sub, &range)) { ret = interval_sub->ops->invalidate(interval_sub, &range, cur_seq); |
99cb252f5 mm/mmu_notifier: ... |
273 274 |
WARN_ON(!ret); } |
984cfe4e2 mm/mmu_notifier: ... |
275 |
mn_itree_inv_end(subscriptions); |
99cb252f5 mm/mmu_notifier: ... |
276 277 278 |
} /* |
cddb8a5c1 mmu-notifiers: core |
279 280 281 282 283 284 |
* This function can't run concurrently against mmu_notifier_register * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap * runs with mm_users == 0. Other tasks may still invoke mmu notifiers * in parallel despite there being no task using this mm any more, * through the vmas outside of the exit_mmap context, such as with * vmtruncate. This serializes against mmu_notifier_unregister with |
984cfe4e2 mm/mmu_notifier: ... |
285 286 |
* the notifier_subscriptions->lock in addition to SRCU and it serializes * against the other mmu notifiers with SRCU. struct mmu_notifier_subscriptions |
cddb8a5c1 mmu-notifiers: core |
287 288 289 |
* can't go away from under us as exit_mmap holds an mm_count pin * itself. */ |
984cfe4e2 mm/mmu_notifier: ... |
290 |
static void mn_hlist_release(struct mmu_notifier_subscriptions *subscriptions, |
99cb252f5 mm/mmu_notifier: ... |
291 |
struct mm_struct *mm) |
cddb8a5c1 mmu-notifiers: core |
292 |
{ |
1991722a7 mm/mmu_notifiers:... |
293 |
struct mmu_notifier *subscription; |
21a92735f mm: mmu_notifier:... |
294 |
int id; |
3ad3d901b mm: mmu_notifier:... |
295 296 |
/* |
d34883d4e mm: mmu_notifier:... |
297 298 |
* SRCU here will block mmu_notifier_unregister until * ->release returns. |
3ad3d901b mm: mmu_notifier:... |
299 |
*/ |
21a92735f mm: mmu_notifier:... |
300 |
id = srcu_read_lock(&srcu); |
63886bad9 mm/mmu_notifier: ... |
301 302 |
hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) |
d34883d4e mm: mmu_notifier:... |
303 304 305 306 307 308 |
/* * If ->release runs before mmu_notifier_unregister it must be * handled, as it's the only way for the driver to flush all * existing sptes and stop the driver from establishing any more * sptes before all the pages in the mm are freed. */ |
1991722a7 mm/mmu_notifiers:... |
309 310 |
if (subscription->ops->release) subscription->ops->release(subscription, mm); |
d34883d4e mm: mmu_notifier:... |
311 |
|
984cfe4e2 mm/mmu_notifier: ... |
312 313 |
spin_lock(&subscriptions->lock); while (unlikely(!hlist_empty(&subscriptions->list))) { |
1991722a7 mm/mmu_notifiers:... |
314 315 |
subscription = hlist_entry(subscriptions->list.first, struct mmu_notifier, hlist); |
cddb8a5c1 mmu-notifiers: core |
316 |
/* |
d34883d4e mm: mmu_notifier:... |
317 318 319 320 |
* We arrived before mmu_notifier_unregister so * mmu_notifier_unregister will do nothing other than to wait * for ->release to finish and for mmu_notifier_unregister to * return. |
cddb8a5c1 mmu-notifiers: core |
321 |
*/ |
1991722a7 mm/mmu_notifiers:... |
322 |
hlist_del_init_rcu(&subscription->hlist); |
cddb8a5c1 mmu-notifiers: core |
323 |
} |
984cfe4e2 mm/mmu_notifier: ... |
324 |
spin_unlock(&subscriptions->lock); |
b972216e2 mmu_notifier: add... |
325 |
srcu_read_unlock(&srcu, id); |
cddb8a5c1 mmu-notifiers: core |
326 327 |
/* |
d34883d4e mm: mmu_notifier:... |
328 329 330 331 332 |
* synchronize_srcu here prevents mmu_notifier_release from returning to * exit_mmap (which would proceed with freeing all pages in the mm) * until the ->release method returns, if it was invoked by * mmu_notifier_unregister. * |
984cfe4e2 mm/mmu_notifier: ... |
333 334 |
* The notifier_subscriptions can't go away from under us because * one mm_count is held by exit_mmap. |
cddb8a5c1 mmu-notifiers: core |
335 |
*/ |
21a92735f mm: mmu_notifier:... |
336 |
synchronize_srcu(&srcu); |
cddb8a5c1 mmu-notifiers: core |
337 |
} |
99cb252f5 mm/mmu_notifier: ... |
338 339 |
void __mmu_notifier_release(struct mm_struct *mm) { |
984cfe4e2 mm/mmu_notifier: ... |
340 341 |
struct mmu_notifier_subscriptions *subscriptions = mm->notifier_subscriptions; |
99cb252f5 mm/mmu_notifier: ... |
342 |
|
984cfe4e2 mm/mmu_notifier: ... |
343 344 |
if (subscriptions->has_itree) mn_itree_release(subscriptions, mm); |
99cb252f5 mm/mmu_notifier: ... |
345 |
|
984cfe4e2 mm/mmu_notifier: ... |
346 347 |
if (!hlist_empty(&subscriptions->list)) mn_hlist_release(subscriptions, mm); |
99cb252f5 mm/mmu_notifier: ... |
348 |
} |
cddb8a5c1 mmu-notifiers: core |
349 350 351 352 353 354 |
/* * If no young bitflag is supported by the hardware, ->clear_flush_young can * unmap the address and return 1 or 0 depending if the mapping previously * existed or not. */ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, |
571284680 kvm: Fix page age... |
355 356 |
unsigned long start, unsigned long end) |
cddb8a5c1 mmu-notifiers: core |
357 |
{ |
1991722a7 mm/mmu_notifiers:... |
358 |
struct mmu_notifier *subscription; |
21a92735f mm: mmu_notifier:... |
359 |
int young = 0, id; |
cddb8a5c1 mmu-notifiers: core |
360 |
|
21a92735f mm: mmu_notifier:... |
361 |
id = srcu_read_lock(&srcu); |
1991722a7 mm/mmu_notifiers:... |
362 |
hlist_for_each_entry_rcu(subscription, |
63886bad9 mm/mmu_notifier: ... |
363 364 |
&mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { |
1991722a7 mm/mmu_notifiers:... |
365 366 367 |
if (subscription->ops->clear_flush_young) young |= subscription->ops->clear_flush_young( subscription, mm, start, end); |
cddb8a5c1 mmu-notifiers: core |
368 |
} |
21a92735f mm: mmu_notifier:... |
369 |
srcu_read_unlock(&srcu, id); |
cddb8a5c1 mmu-notifiers: core |
370 371 372 |
return young; } |
1d7715c67 mmu-notifier: add... |
373 374 375 376 |
int __mmu_notifier_clear_young(struct mm_struct *mm, unsigned long start, unsigned long end) { |
1991722a7 mm/mmu_notifiers:... |
377 |
struct mmu_notifier *subscription; |
1d7715c67 mmu-notifier: add... |
378 379 380 |
int young = 0, id; id = srcu_read_lock(&srcu); |
1991722a7 mm/mmu_notifiers:... |
381 |
hlist_for_each_entry_rcu(subscription, |
63886bad9 mm/mmu_notifier: ... |
382 383 |
&mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { |
1991722a7 mm/mmu_notifiers:... |
384 385 386 |
if (subscription->ops->clear_young) young |= subscription->ops->clear_young(subscription, mm, start, end); |
1d7715c67 mmu-notifier: add... |
387 388 389 390 391 |
} srcu_read_unlock(&srcu, id); return young; } |
8ee53820e thp: mmu_notifier... |
392 393 394 |
int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { |
1991722a7 mm/mmu_notifiers:... |
395 |
struct mmu_notifier *subscription; |
21a92735f mm: mmu_notifier:... |
396 |
int young = 0, id; |
8ee53820e thp: mmu_notifier... |
397 |
|
21a92735f mm: mmu_notifier:... |
398 |
id = srcu_read_lock(&srcu); |
1991722a7 mm/mmu_notifiers:... |
399 |
hlist_for_each_entry_rcu(subscription, |
63886bad9 mm/mmu_notifier: ... |
400 401 |
&mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { |
1991722a7 mm/mmu_notifiers:... |
402 403 404 |
if (subscription->ops->test_young) { young = subscription->ops->test_young(subscription, mm, address); |
8ee53820e thp: mmu_notifier... |
405 406 407 408 |
if (young) break; } } |
21a92735f mm: mmu_notifier:... |
409 |
srcu_read_unlock(&srcu, id); |
8ee53820e thp: mmu_notifier... |
410 411 412 |
return young; } |
828502d30 ksm: add mmu_noti... |
413 414 415 |
void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte) { |
1991722a7 mm/mmu_notifiers:... |
416 |
struct mmu_notifier *subscription; |
21a92735f mm: mmu_notifier:... |
417 |
int id; |
828502d30 ksm: add mmu_noti... |
418 |
|
21a92735f mm: mmu_notifier:... |
419 |
id = srcu_read_lock(&srcu); |
1991722a7 mm/mmu_notifiers:... |
420 |
hlist_for_each_entry_rcu(subscription, |
63886bad9 mm/mmu_notifier: ... |
421 422 |
&mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { |
1991722a7 mm/mmu_notifiers:... |
423 424 425 |
if (subscription->ops->change_pte) subscription->ops->change_pte(subscription, mm, address, pte); |
828502d30 ksm: add mmu_noti... |
426 |
} |
21a92735f mm: mmu_notifier:... |
427 |
srcu_read_unlock(&srcu, id); |
828502d30 ksm: add mmu_noti... |
428 |
} |
984cfe4e2 mm/mmu_notifier: ... |
429 |
static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions, |
99cb252f5 mm/mmu_notifier: ... |
430 431 |
const struct mmu_notifier_range *range) { |
5292e24a6 mm/mmu_notifiers:... |
432 |
struct mmu_interval_notifier *interval_sub; |
99cb252f5 mm/mmu_notifier: ... |
433 |
unsigned long cur_seq; |
5292e24a6 mm/mmu_notifiers:... |
434 435 436 437 |
for (interval_sub = mn_itree_inv_start_range(subscriptions, range, &cur_seq); interval_sub; interval_sub = mn_itree_inv_next(interval_sub, range)) { |
99cb252f5 mm/mmu_notifier: ... |
438 |
bool ret; |
5292e24a6 mm/mmu_notifiers:... |
439 440 |
ret = interval_sub->ops->invalidate(interval_sub, range, cur_seq); |
99cb252f5 mm/mmu_notifier: ... |
441 442 443 444 445 446 447 448 449 450 451 452 453 |
if (!ret) { if (WARN_ON(mmu_notifier_range_blockable(range))) continue; goto out_would_block; } } return 0; out_would_block: /* * On -EAGAIN the non-blocking caller is not allowed to call * invalidate_range_end() */ |
984cfe4e2 mm/mmu_notifier: ... |
454 |
mn_itree_inv_end(subscriptions); |
99cb252f5 mm/mmu_notifier: ... |
455 456 |
return -EAGAIN; } |
984cfe4e2 mm/mmu_notifier: ... |
457 458 459 |
static int mn_hlist_invalidate_range_start( struct mmu_notifier_subscriptions *subscriptions, struct mmu_notifier_range *range) |
cddb8a5c1 mmu-notifiers: core |
460 |
{ |
1991722a7 mm/mmu_notifiers:... |
461 |
struct mmu_notifier *subscription; |
93065ac75 mm, oom: distingu... |
462 |
int ret = 0; |
21a92735f mm: mmu_notifier:... |
463 |
int id; |
cddb8a5c1 mmu-notifiers: core |
464 |
|
21a92735f mm: mmu_notifier:... |
465 |
id = srcu_read_lock(&srcu); |
63886bad9 mm/mmu_notifier: ... |
466 467 |
hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { |
1991722a7 mm/mmu_notifiers:... |
468 469 470 |
const struct mmu_notifier_ops *ops = subscription->ops; if (ops->invalidate_range_start) { |
ba170f76b mm, notifier: Cat... |
471 472 473 474 |
int _ret; if (!mmu_notifier_range_blockable(range)) non_block_start(); |
1991722a7 mm/mmu_notifiers:... |
475 |
_ret = ops->invalidate_range_start(subscription, range); |
ba170f76b mm, notifier: Cat... |
476 477 |
if (!mmu_notifier_range_blockable(range)) non_block_end(); |
93065ac75 mm, oom: distingu... |
478 479 480 |
if (_ret) { pr_info("%pS callback failed with %d in %sblockable context. ", |
1991722a7 mm/mmu_notifiers:... |
481 482 483 484 |
ops->invalidate_range_start, _ret, !mmu_notifier_range_blockable(range) ? "non-" : ""); |
8402ce61b mm/mmu_notifiers:... |
485 |
WARN_ON(mmu_notifier_range_blockable(range) || |
df2ec7641 mm/mmu_notifiers:... |
486 |
_ret != -EAGAIN); |
93065ac75 mm, oom: distingu... |
487 488 489 |
ret = _ret; } } |
cddb8a5c1 mmu-notifiers: core |
490 |
} |
21a92735f mm: mmu_notifier:... |
491 |
srcu_read_unlock(&srcu, id); |
93065ac75 mm, oom: distingu... |
492 493 |
return ret; |
cddb8a5c1 mmu-notifiers: core |
494 |
} |
99cb252f5 mm/mmu_notifier: ... |
495 496 |
int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { |
984cfe4e2 mm/mmu_notifier: ... |
497 498 |
struct mmu_notifier_subscriptions *subscriptions = range->mm->notifier_subscriptions; |
99cb252f5 mm/mmu_notifier: ... |
499 |
int ret; |
984cfe4e2 mm/mmu_notifier: ... |
500 501 |
if (subscriptions->has_itree) { ret = mn_itree_invalidate(subscriptions, range); |
99cb252f5 mm/mmu_notifier: ... |
502 503 504 |
if (ret) return ret; } |
984cfe4e2 mm/mmu_notifier: ... |
505 506 |
if (!hlist_empty(&subscriptions->list)) return mn_hlist_invalidate_range_start(subscriptions, range); |
99cb252f5 mm/mmu_notifier: ... |
507 508 |
return 0; } |
984cfe4e2 mm/mmu_notifier: ... |
509 510 511 |
static void mn_hlist_invalidate_end(struct mmu_notifier_subscriptions *subscriptions, struct mmu_notifier_range *range, bool only_end) |
cddb8a5c1 mmu-notifiers: core |
512 |
{ |
1991722a7 mm/mmu_notifiers:... |
513 |
struct mmu_notifier *subscription; |
21a92735f mm: mmu_notifier:... |
514 |
int id; |
cddb8a5c1 mmu-notifiers: core |
515 |
|
21a92735f mm: mmu_notifier:... |
516 |
id = srcu_read_lock(&srcu); |
63886bad9 mm/mmu_notifier: ... |
517 518 |
hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { |
0f0a327fa mmu_notifier: add... |
519 520 521 522 523 524 525 |
/* * Call invalidate_range here too to avoid the need for the * subsystem of having to register an invalidate_range_end * call-back when there is invalidate_range already. Usually a * subsystem registers either invalidate_range_start()/end() or * invalidate_range(), so this will be no additional overhead * (besides the pointer check). |
4645b9fe8 mm/mmu_notifier: ... |
526 527 528 529 530 |
* * We skip call to invalidate_range() if we know it is safe ie * call site use mmu_notifier_invalidate_range_only_end() which * is safe to do when we know that a call to invalidate_range() * already happen under page table lock. |
0f0a327fa mmu_notifier: add... |
531 |
*/ |
1991722a7 mm/mmu_notifiers:... |
532 533 534 535 536 537 |
if (!only_end && subscription->ops->invalidate_range) subscription->ops->invalidate_range(subscription, range->mm, range->start, range->end); if (subscription->ops->invalidate_range_end) { |
ba170f76b mm, notifier: Cat... |
538 539 |
if (!mmu_notifier_range_blockable(range)) non_block_start(); |
1991722a7 mm/mmu_notifiers:... |
540 541 |
subscription->ops->invalidate_range_end(subscription, range); |
ba170f76b mm, notifier: Cat... |
542 543 544 |
if (!mmu_notifier_range_blockable(range)) non_block_end(); } |
cddb8a5c1 mmu-notifiers: core |
545 |
} |
21a92735f mm: mmu_notifier:... |
546 |
srcu_read_unlock(&srcu, id); |
99cb252f5 mm/mmu_notifier: ... |
547 548 549 550 551 |
} void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range, bool only_end) { |
984cfe4e2 mm/mmu_notifier: ... |
552 553 |
struct mmu_notifier_subscriptions *subscriptions = range->mm->notifier_subscriptions; |
99cb252f5 mm/mmu_notifier: ... |
554 555 |
lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); |
984cfe4e2 mm/mmu_notifier: ... |
556 557 |
if (subscriptions->has_itree) mn_itree_inv_end(subscriptions); |
99cb252f5 mm/mmu_notifier: ... |
558 |
|
984cfe4e2 mm/mmu_notifier: ... |
559 560 |
if (!hlist_empty(&subscriptions->list)) mn_hlist_invalidate_end(subscriptions, range, only_end); |
23b68395c mm/mmu_notifiers:... |
561 |
lock_map_release(&__mmu_notifier_invalidate_range_start_map); |
cddb8a5c1 mmu-notifiers: core |
562 |
} |
0f0a327fa mmu_notifier: add... |
563 564 565 |
void __mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end) { |
1991722a7 mm/mmu_notifiers:... |
566 |
struct mmu_notifier *subscription; |
0f0a327fa mmu_notifier: add... |
567 568 569 |
int id; id = srcu_read_lock(&srcu); |
1991722a7 mm/mmu_notifiers:... |
570 |
hlist_for_each_entry_rcu(subscription, |
63886bad9 mm/mmu_notifier: ... |
571 572 |
&mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { |
1991722a7 mm/mmu_notifiers:... |
573 574 575 |
if (subscription->ops->invalidate_range) subscription->ops->invalidate_range(subscription, mm, start, end); |
0f0a327fa mmu_notifier: add... |
576 577 578 |
} srcu_read_unlock(&srcu, id); } |
0f0a327fa mmu_notifier: add... |
579 |
|
56c57103d mm/mmu_notifiers:... |
580 |
/* |
c1e8d7c6a mmap locking API:... |
581 |
* Same as mmu_notifier_register but here the caller must hold the mmap_lock in |
99cb252f5 mm/mmu_notifier: ... |
582 583 |
* write mode. A NULL mn signals the notifier is being registered for itree * mode. |
56c57103d mm/mmu_notifiers:... |
584 |
*/ |
1991722a7 mm/mmu_notifiers:... |
585 586 |
int __mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm) |
cddb8a5c1 mmu-notifiers: core |
587 |
{ |
984cfe4e2 mm/mmu_notifier: ... |
588 |
struct mmu_notifier_subscriptions *subscriptions = NULL; |
cddb8a5c1 mmu-notifiers: core |
589 |
int ret; |
42fc54140 mmap locking API:... |
590 |
mmap_assert_write_locked(mm); |
cddb8a5c1 mmu-notifiers: core |
591 |
BUG_ON(atomic_read(&mm->mm_users) <= 0); |
66204f1d2 mm/mmu_notifiers:... |
592 593 594 595 596 597 |
if (IS_ENABLED(CONFIG_LOCKDEP)) { fs_reclaim_acquire(GFP_KERNEL); lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); lock_map_release(&__mmu_notifier_invalidate_range_start_map); fs_reclaim_release(GFP_KERNEL); } |
984cfe4e2 mm/mmu_notifier: ... |
598 |
if (!mm->notifier_subscriptions) { |
70df291bf mm/mmu_notifiers:... |
599 600 |
/* * kmalloc cannot be called under mm_take_all_locks(), but we |
984cfe4e2 mm/mmu_notifier: ... |
601 |
* know that mm->notifier_subscriptions can't change while we |
c1e8d7c6a mmap locking API:... |
602 |
* hold the write side of the mmap_lock. |
70df291bf mm/mmu_notifiers:... |
603 |
*/ |
984cfe4e2 mm/mmu_notifier: ... |
604 605 606 |
subscriptions = kzalloc( sizeof(struct mmu_notifier_subscriptions), GFP_KERNEL); if (!subscriptions) |
70df291bf mm/mmu_notifiers:... |
607 |
return -ENOMEM; |
984cfe4e2 mm/mmu_notifier: ... |
608 609 610 611 612 613 |
INIT_HLIST_HEAD(&subscriptions->list); spin_lock_init(&subscriptions->lock); subscriptions->invalidate_seq = 2; subscriptions->itree = RB_ROOT_CACHED; init_waitqueue_head(&subscriptions->wq); INIT_HLIST_HEAD(&subscriptions->deferred_list); |
70df291bf mm/mmu_notifiers:... |
614 |
} |
35cfa2b0b mm/mmu_notifier: ... |
615 |
|
cddb8a5c1 mmu-notifiers: core |
616 617 |
ret = mm_take_all_locks(mm); if (unlikely(ret)) |
35cfa2b0b mm/mmu_notifier: ... |
618 |
goto out_clean; |
cddb8a5c1 mmu-notifiers: core |
619 |
|
cddb8a5c1 mmu-notifiers: core |
620 621 622 623 624 625 626 |
/* * Serialize the update against mmu_notifier_unregister. A * side note: mmu_notifier_release can't run concurrently with * us because we hold the mm_users pin (either implicitly as * current->mm or explicitly with get_task_mm() or similar). * We can't race against any other mmu notifier method either * thanks to mm_take_all_locks(). |
99cb252f5 mm/mmu_notifier: ... |
627 |
* |
984cfe4e2 mm/mmu_notifier: ... |
628 629 630 631 632 |
* release semantics on the initialization of the * mmu_notifier_subscriptions's contents are provided for unlocked * readers. acquire can only be used while holding the mmgrab or * mmget, and is safe because once created the * mmu_notifier_subscriptions is not freed until the mm is destroyed. |
c1e8d7c6a mmap locking API:... |
633 |
* As above, users holding the mmap_lock or one of the |
99cb252f5 mm/mmu_notifier: ... |
634 |
* mm_take_all_locks() do not need to use acquire semantics. |
cddb8a5c1 mmu-notifiers: core |
635 |
*/ |
984cfe4e2 mm/mmu_notifier: ... |
636 637 |
if (subscriptions) smp_store_release(&mm->notifier_subscriptions, subscriptions); |
70df291bf mm/mmu_notifiers:... |
638 |
|
1991722a7 mm/mmu_notifiers:... |
639 |
if (subscription) { |
99cb252f5 mm/mmu_notifier: ... |
640 641 |
/* Pairs with the mmdrop in mmu_notifier_unregister_* */ mmgrab(mm); |
1991722a7 mm/mmu_notifiers:... |
642 643 |
subscription->mm = mm; subscription->users = 1; |
99cb252f5 mm/mmu_notifier: ... |
644 |
|
984cfe4e2 mm/mmu_notifier: ... |
645 |
spin_lock(&mm->notifier_subscriptions->lock); |
1991722a7 mm/mmu_notifiers:... |
646 |
hlist_add_head_rcu(&subscription->hlist, |
984cfe4e2 mm/mmu_notifier: ... |
647 648 |
&mm->notifier_subscriptions->list); spin_unlock(&mm->notifier_subscriptions->lock); |
99cb252f5 mm/mmu_notifier: ... |
649 |
} else |
984cfe4e2 mm/mmu_notifier: ... |
650 |
mm->notifier_subscriptions->has_itree = true; |
cddb8a5c1 mmu-notifiers: core |
651 652 |
mm_drop_all_locks(mm); |
70df291bf mm/mmu_notifiers:... |
653 654 |
BUG_ON(atomic_read(&mm->mm_users) <= 0); return 0; |
35cfa2b0b mm/mmu_notifier: ... |
655 |
out_clean: |
984cfe4e2 mm/mmu_notifier: ... |
656 |
kfree(subscriptions); |
cddb8a5c1 mmu-notifiers: core |
657 658 |
return ret; } |
56c57103d mm/mmu_notifiers:... |
659 |
EXPORT_SYMBOL_GPL(__mmu_notifier_register); |
cddb8a5c1 mmu-notifiers: core |
660 |
|
2c7933f53 mm/mmu_notifiers:... |
661 662 |
/** * mmu_notifier_register - Register a notifier on a mm |
d49653f35 mm: mmu_notifier:... |
663 |
* @subscription: The notifier to attach |
2c7933f53 mm/mmu_notifiers:... |
664 665 |
* @mm: The mm to attach the notifier to * |
c1e8d7c6a mmap locking API:... |
666 |
* Must not hold mmap_lock nor any other VM related lock when calling |
cddb8a5c1 mmu-notifiers: core |
667 668 669 670 671 |
* this registration function. Must also ensure mm_users can't go down * to zero while this runs to avoid races with mmu_notifier_release, * so mm has to be current->mm or the mm should be pinned safely such * as with get_task_mm(). If the mm is not current->mm, the mm_users * pin should be released by calling mmput after mmu_notifier_register |
2c7933f53 mm/mmu_notifiers:... |
672 673 674 675 676 |
* returns. * * mmu_notifier_unregister() or mmu_notifier_put() must be always called to * unregister the notifier. * |
1991722a7 mm/mmu_notifiers:... |
677 |
* While the caller has a mmu_notifier get the subscription->mm pointer will remain |
2c7933f53 mm/mmu_notifiers:... |
678 |
* valid, and can be converted to an active mm pointer via mmget_not_zero(). |
cddb8a5c1 mmu-notifiers: core |
679 |
*/ |
1991722a7 mm/mmu_notifiers:... |
680 681 |
int mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm) |
cddb8a5c1 mmu-notifiers: core |
682 |
{ |
56c57103d mm/mmu_notifiers:... |
683 |
int ret; |
cddb8a5c1 mmu-notifiers: core |
684 |
|
d8ed45c5d mmap locking API:... |
685 |
mmap_write_lock(mm); |
1991722a7 mm/mmu_notifiers:... |
686 |
ret = __mmu_notifier_register(subscription, mm); |
d8ed45c5d mmap locking API:... |
687 |
mmap_write_unlock(mm); |
56c57103d mm/mmu_notifiers:... |
688 |
return ret; |
cddb8a5c1 mmu-notifiers: core |
689 |
} |
56c57103d mm/mmu_notifiers:... |
690 |
EXPORT_SYMBOL_GPL(mmu_notifier_register); |
cddb8a5c1 mmu-notifiers: core |
691 |
|
2c7933f53 mm/mmu_notifiers:... |
692 693 694 |
static struct mmu_notifier * find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops) { |
1991722a7 mm/mmu_notifiers:... |
695 |
struct mmu_notifier *subscription; |
2c7933f53 mm/mmu_notifiers:... |
696 |
|
984cfe4e2 mm/mmu_notifier: ... |
697 |
spin_lock(&mm->notifier_subscriptions->lock); |
1991722a7 mm/mmu_notifiers:... |
698 |
hlist_for_each_entry_rcu(subscription, |
63886bad9 mm/mmu_notifier: ... |
699 700 |
&mm->notifier_subscriptions->list, hlist, lockdep_is_held(&mm->notifier_subscriptions->lock)) { |
1991722a7 mm/mmu_notifiers:... |
701 |
if (subscription->ops != ops) |
2c7933f53 mm/mmu_notifiers:... |
702 |
continue; |
1991722a7 mm/mmu_notifiers:... |
703 704 |
if (likely(subscription->users != UINT_MAX)) subscription->users++; |
2c7933f53 mm/mmu_notifiers:... |
705 |
else |
1991722a7 mm/mmu_notifiers:... |
706 |
subscription = ERR_PTR(-EOVERFLOW); |
984cfe4e2 mm/mmu_notifier: ... |
707 |
spin_unlock(&mm->notifier_subscriptions->lock); |
1991722a7 mm/mmu_notifiers:... |
708 |
return subscription; |
2c7933f53 mm/mmu_notifiers:... |
709 |
} |
984cfe4e2 mm/mmu_notifier: ... |
710 |
spin_unlock(&mm->notifier_subscriptions->lock); |
2c7933f53 mm/mmu_notifiers:... |
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 |
return NULL; } /** * mmu_notifier_get_locked - Return the single struct mmu_notifier for * the mm & ops * @ops: The operations struct being subscribe with * @mm : The mm to attach notifiers too * * This function either allocates a new mmu_notifier via * ops->alloc_notifier(), or returns an already existing notifier on the * list. The value of the ops pointer is used to determine when two notifiers * are the same. * * Each call to mmu_notifier_get() must be paired with a call to |
c1e8d7c6a mmap locking API:... |
726 |
* mmu_notifier_put(). The caller must hold the write side of mm->mmap_lock. |
2c7933f53 mm/mmu_notifiers:... |
727 728 729 730 731 732 733 |
* * While the caller has a mmu_notifier get the mm pointer will remain valid, * and can be converted to an active mm pointer via mmget_not_zero(). */ struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, struct mm_struct *mm) { |
1991722a7 mm/mmu_notifiers:... |
734 |
struct mmu_notifier *subscription; |
2c7933f53 mm/mmu_notifiers:... |
735 |
int ret; |
42fc54140 mmap locking API:... |
736 |
mmap_assert_write_locked(mm); |
2c7933f53 mm/mmu_notifiers:... |
737 |
|
984cfe4e2 mm/mmu_notifier: ... |
738 |
if (mm->notifier_subscriptions) { |
1991722a7 mm/mmu_notifiers:... |
739 740 741 |
subscription = find_get_mmu_notifier(mm, ops); if (subscription) return subscription; |
2c7933f53 mm/mmu_notifiers:... |
742 |
} |
1991722a7 mm/mmu_notifiers:... |
743 744 745 746 747 |
subscription = ops->alloc_notifier(mm); if (IS_ERR(subscription)) return subscription; subscription->ops = ops; ret = __mmu_notifier_register(subscription, mm); |
2c7933f53 mm/mmu_notifiers:... |
748 749 |
if (ret) goto out_free; |
1991722a7 mm/mmu_notifiers:... |
750 |
return subscription; |
2c7933f53 mm/mmu_notifiers:... |
751 |
out_free: |
1991722a7 mm/mmu_notifiers:... |
752 |
subscription->ops->free_notifier(subscription); |
2c7933f53 mm/mmu_notifiers:... |
753 754 755 |
return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(mmu_notifier_get_locked); |
cddb8a5c1 mmu-notifiers: core |
756 |
/* this is called after the last mmu_notifier_unregister() returned */ |
984cfe4e2 mm/mmu_notifier: ... |
757 |
void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm) |
cddb8a5c1 mmu-notifiers: core |
758 |
{ |
984cfe4e2 mm/mmu_notifier: ... |
759 760 761 |
BUG_ON(!hlist_empty(&mm->notifier_subscriptions->list)); kfree(mm->notifier_subscriptions); mm->notifier_subscriptions = LIST_POISON1; /* debug */ |
cddb8a5c1 mmu-notifiers: core |
762 763 764 765 766 |
} /* * This releases the mm_count pin automatically and frees the mm * structure if it was the last user of it. It serializes against |
21a92735f mm: mmu_notifier:... |
767 768 |
* running mmu notifiers with SRCU and against mmu_notifier_unregister * with the unregister lock + SRCU. All sptes must be dropped before |
cddb8a5c1 mmu-notifiers: core |
769 770 771 772 773 |
* calling mmu_notifier_unregister. ->release or any other notifier * method may be invoked concurrently with mmu_notifier_unregister, * and only after mmu_notifier_unregister returned we're guaranteed * that ->release or any other method can't run anymore. */ |
1991722a7 mm/mmu_notifiers:... |
774 775 |
void mmu_notifier_unregister(struct mmu_notifier *subscription, struct mm_struct *mm) |
cddb8a5c1 mmu-notifiers: core |
776 777 |
{ BUG_ON(atomic_read(&mm->mm_count) <= 0); |
1991722a7 mm/mmu_notifiers:... |
778 |
if (!hlist_unhashed(&subscription->hlist)) { |
d34883d4e mm: mmu_notifier:... |
779 780 781 782 |
/* * SRCU here will force exit_mmap to wait for ->release to * finish before freeing the pages. */ |
21a92735f mm: mmu_notifier:... |
783 |
int id; |
3ad3d901b mm: mmu_notifier:... |
784 |
|
d34883d4e mm: mmu_notifier:... |
785 |
id = srcu_read_lock(&srcu); |
cddb8a5c1 mmu-notifiers: core |
786 |
/* |
d34883d4e mm: mmu_notifier:... |
787 788 |
* exit_mmap will block in mmu_notifier_release to guarantee * that ->release is called before freeing the pages. |
cddb8a5c1 mmu-notifiers: core |
789 |
*/ |
1991722a7 mm/mmu_notifiers:... |
790 791 |
if (subscription->ops->release) subscription->ops->release(subscription, mm); |
d34883d4e mm: mmu_notifier:... |
792 |
srcu_read_unlock(&srcu, id); |
3ad3d901b mm: mmu_notifier:... |
793 |
|
984cfe4e2 mm/mmu_notifier: ... |
794 |
spin_lock(&mm->notifier_subscriptions->lock); |
751efd861 mmu_notifier_unre... |
795 |
/* |
d34883d4e mm: mmu_notifier:... |
796 797 |
* Can not use list_del_rcu() since __mmu_notifier_release * can delete it before we hold the lock. |
751efd861 mmu_notifier_unre... |
798 |
*/ |
1991722a7 mm/mmu_notifiers:... |
799 |
hlist_del_init_rcu(&subscription->hlist); |
984cfe4e2 mm/mmu_notifier: ... |
800 |
spin_unlock(&mm->notifier_subscriptions->lock); |
d34883d4e mm: mmu_notifier:... |
801 |
} |
cddb8a5c1 mmu-notifiers: core |
802 803 |
/* |
d34883d4e mm: mmu_notifier:... |
804 |
* Wait for any running method to finish, of course including |
83a35e360 treewide: relase ... |
805 |
* ->release if it was run by mmu_notifier_release instead of us. |
cddb8a5c1 mmu-notifiers: core |
806 |
*/ |
21a92735f mm: mmu_notifier:... |
807 |
synchronize_srcu(&srcu); |
cddb8a5c1 mmu-notifiers: core |
808 809 810 811 812 813 |
BUG_ON(atomic_read(&mm->mm_count) <= 0); mmdrop(mm); } EXPORT_SYMBOL_GPL(mmu_notifier_unregister); |
21a92735f mm: mmu_notifier:... |
814 |
|
2c7933f53 mm/mmu_notifiers:... |
815 816 |
static void mmu_notifier_free_rcu(struct rcu_head *rcu) { |
1991722a7 mm/mmu_notifiers:... |
817 818 819 |
struct mmu_notifier *subscription = container_of(rcu, struct mmu_notifier, rcu); struct mm_struct *mm = subscription->mm; |
2c7933f53 mm/mmu_notifiers:... |
820 |
|
1991722a7 mm/mmu_notifiers:... |
821 |
subscription->ops->free_notifier(subscription); |
2c7933f53 mm/mmu_notifiers:... |
822 823 824 825 826 827 |
/* Pairs with the get in __mmu_notifier_register() */ mmdrop(mm); } /** * mmu_notifier_put - Release the reference on the notifier |
d49653f35 mm: mmu_notifier:... |
828 |
* @subscription: The notifier to act on |
2c7933f53 mm/mmu_notifiers:... |
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 |
* * This function must be paired with each mmu_notifier_get(), it releases the * reference obtained by the get. If this is the last reference then process * to free the notifier will be run asynchronously. * * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release * when the mm_struct is destroyed. Instead free_notifier is always called to * release any resources held by the user. * * As ops->release is not guaranteed to be called, the user must ensure that * all sptes are dropped, and no new sptes can be established before * mmu_notifier_put() is called. * * This function can be called from the ops->release callback, however the * caller must still ensure it is called pairwise with mmu_notifier_get(). * * Modules calling this function must call mmu_notifier_synchronize() in * their __exit functions to ensure the async work is completed. */ |
1991722a7 mm/mmu_notifiers:... |
848 |
void mmu_notifier_put(struct mmu_notifier *subscription) |
2c7933f53 mm/mmu_notifiers:... |
849 |
{ |
1991722a7 mm/mmu_notifiers:... |
850 |
struct mm_struct *mm = subscription->mm; |
2c7933f53 mm/mmu_notifiers:... |
851 |
|
984cfe4e2 mm/mmu_notifier: ... |
852 |
spin_lock(&mm->notifier_subscriptions->lock); |
1991722a7 mm/mmu_notifiers:... |
853 |
if (WARN_ON(!subscription->users) || --subscription->users) |
2c7933f53 mm/mmu_notifiers:... |
854 |
goto out_unlock; |
1991722a7 mm/mmu_notifiers:... |
855 |
hlist_del_init_rcu(&subscription->hlist); |
984cfe4e2 mm/mmu_notifier: ... |
856 |
spin_unlock(&mm->notifier_subscriptions->lock); |
2c7933f53 mm/mmu_notifiers:... |
857 |
|
1991722a7 mm/mmu_notifiers:... |
858 |
call_srcu(&srcu, &subscription->rcu, mmu_notifier_free_rcu); |
2c7933f53 mm/mmu_notifiers:... |
859 860 861 |
return; out_unlock: |
984cfe4e2 mm/mmu_notifier: ... |
862 |
spin_unlock(&mm->notifier_subscriptions->lock); |
2c7933f53 mm/mmu_notifiers:... |
863 864 |
} EXPORT_SYMBOL_GPL(mmu_notifier_put); |
99cb252f5 mm/mmu_notifier: ... |
865 |
static int __mmu_interval_notifier_insert( |
5292e24a6 mm/mmu_notifiers:... |
866 |
struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, |
984cfe4e2 mm/mmu_notifier: ... |
867 |
struct mmu_notifier_subscriptions *subscriptions, unsigned long start, |
99cb252f5 mm/mmu_notifier: ... |
868 869 |
unsigned long length, const struct mmu_interval_notifier_ops *ops) { |
5292e24a6 mm/mmu_notifiers:... |
870 871 872 873 |
interval_sub->mm = mm; interval_sub->ops = ops; RB_CLEAR_NODE(&interval_sub->interval_tree.rb); interval_sub->interval_tree.start = start; |
99cb252f5 mm/mmu_notifier: ... |
874 875 876 877 878 |
/* * Note that the representation of the intervals in the interval tree * considers the ending point as contained in the interval. */ if (length == 0 || |
5292e24a6 mm/mmu_notifiers:... |
879 880 |
check_add_overflow(start, length - 1, &interval_sub->interval_tree.last)) |
99cb252f5 mm/mmu_notifier: ... |
881 882 883 |
return -EOVERFLOW; /* Must call with a mmget() held */ |
c9682d102 mm/mmu_notifier: ... |
884 |
if (WARN_ON(atomic_read(&mm->mm_users) <= 0)) |
99cb252f5 mm/mmu_notifier: ... |
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 |
return -EINVAL; /* pairs with mmdrop in mmu_interval_notifier_remove() */ mmgrab(mm); /* * If some invalidate_range_start/end region is going on in parallel * we don't know what VA ranges are affected, so we must assume this * new range is included. * * If the itree is invalidating then we are not allowed to change * it. Retrying until invalidation is done is tricky due to the * possibility for live lock, instead defer the add to * mn_itree_inv_end() so this algorithm is deterministic. * |
5292e24a6 mm/mmu_notifiers:... |
900 |
* In all cases the value for the interval_sub->invalidate_seq should be |
99cb252f5 mm/mmu_notifier: ... |
901 902 |
* odd, see mmu_interval_read_begin() */ |
984cfe4e2 mm/mmu_notifier: ... |
903 904 905 |
spin_lock(&subscriptions->lock); if (subscriptions->active_invalidate_ranges) { if (mn_itree_is_invalidating(subscriptions)) |
5292e24a6 mm/mmu_notifiers:... |
906 |
hlist_add_head(&interval_sub->deferred_item, |
984cfe4e2 mm/mmu_notifier: ... |
907 |
&subscriptions->deferred_list); |
99cb252f5 mm/mmu_notifier: ... |
908 |
else { |
984cfe4e2 mm/mmu_notifier: ... |
909 |
subscriptions->invalidate_seq |= 1; |
5292e24a6 mm/mmu_notifiers:... |
910 |
interval_tree_insert(&interval_sub->interval_tree, |
984cfe4e2 mm/mmu_notifier: ... |
911 |
&subscriptions->itree); |
99cb252f5 mm/mmu_notifier: ... |
912 |
} |
5292e24a6 mm/mmu_notifiers:... |
913 |
interval_sub->invalidate_seq = subscriptions->invalidate_seq; |
99cb252f5 mm/mmu_notifier: ... |
914 |
} else { |
984cfe4e2 mm/mmu_notifier: ... |
915 |
WARN_ON(mn_itree_is_invalidating(subscriptions)); |
99cb252f5 mm/mmu_notifier: ... |
916 |
/* |
5292e24a6 mm/mmu_notifiers:... |
917 918 |
* The starting seq for a subscription not under invalidation * should be odd, not equal to the current invalidate_seq and |
99cb252f5 mm/mmu_notifier: ... |
919 920 921 |
* invalidate_seq should not 'wrap' to the new seq any time * soon. */ |
5292e24a6 mm/mmu_notifiers:... |
922 923 924 |
interval_sub->invalidate_seq = subscriptions->invalidate_seq - 1; interval_tree_insert(&interval_sub->interval_tree, |
984cfe4e2 mm/mmu_notifier: ... |
925 |
&subscriptions->itree); |
99cb252f5 mm/mmu_notifier: ... |
926 |
} |
984cfe4e2 mm/mmu_notifier: ... |
927 |
spin_unlock(&subscriptions->lock); |
99cb252f5 mm/mmu_notifier: ... |
928 929 930 931 932 |
return 0; } /** * mmu_interval_notifier_insert - Insert an interval notifier |
5292e24a6 mm/mmu_notifiers:... |
933 |
* @interval_sub: Interval subscription to register |
99cb252f5 mm/mmu_notifier: ... |
934 935 |
* @start: Starting virtual address to monitor * @length: Length of the range to monitor |
d49653f35 mm: mmu_notifier:... |
936 937 |
* @mm: mm_struct to attach to * @ops: Interval notifier operations to be called on matching events |
99cb252f5 mm/mmu_notifier: ... |
938 939 940 941 942 943 944 945 946 |
* * This function subscribes the interval notifier for notifications from the * mm. Upon return the ops related to mmu_interval_notifier will be called * whenever an event that intersects with the given range occurs. * * Upon return the range_notifier may not be present in the interval tree yet. * The caller must use the normal interval notifier read flow via * mmu_interval_read_begin() to establish SPTEs for this range. */ |
5292e24a6 mm/mmu_notifiers:... |
947 |
int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub, |
99cb252f5 mm/mmu_notifier: ... |
948 949 950 951 |
struct mm_struct *mm, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops) { |
984cfe4e2 mm/mmu_notifier: ... |
952 |
struct mmu_notifier_subscriptions *subscriptions; |
99cb252f5 mm/mmu_notifier: ... |
953 |
int ret; |
da1c55f1b mmap locking API:... |
954 |
might_lock(&mm->mmap_lock); |
99cb252f5 mm/mmu_notifier: ... |
955 |
|
984cfe4e2 mm/mmu_notifier: ... |
956 957 |
subscriptions = smp_load_acquire(&mm->notifier_subscriptions); if (!subscriptions || !subscriptions->has_itree) { |
99cb252f5 mm/mmu_notifier: ... |
958 959 960 |
ret = mmu_notifier_register(NULL, mm); if (ret) return ret; |
984cfe4e2 mm/mmu_notifier: ... |
961 |
subscriptions = mm->notifier_subscriptions; |
99cb252f5 mm/mmu_notifier: ... |
962 |
} |
5292e24a6 mm/mmu_notifiers:... |
963 964 |
return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, start, length, ops); |
99cb252f5 mm/mmu_notifier: ... |
965 966 967 968 |
} EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert); int mmu_interval_notifier_insert_locked( |
5292e24a6 mm/mmu_notifiers:... |
969 |
struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, |
99cb252f5 mm/mmu_notifier: ... |
970 971 972 |
unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops) { |
984cfe4e2 mm/mmu_notifier: ... |
973 974 |
struct mmu_notifier_subscriptions *subscriptions = mm->notifier_subscriptions; |
99cb252f5 mm/mmu_notifier: ... |
975 |
int ret; |
42fc54140 mmap locking API:... |
976 |
mmap_assert_write_locked(mm); |
99cb252f5 mm/mmu_notifier: ... |
977 |
|
984cfe4e2 mm/mmu_notifier: ... |
978 |
if (!subscriptions || !subscriptions->has_itree) { |
99cb252f5 mm/mmu_notifier: ... |
979 980 981 |
ret = __mmu_notifier_register(NULL, mm); if (ret) return ret; |
984cfe4e2 mm/mmu_notifier: ... |
982 |
subscriptions = mm->notifier_subscriptions; |
99cb252f5 mm/mmu_notifier: ... |
983 |
} |
5292e24a6 mm/mmu_notifiers:... |
984 985 |
return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, start, length, ops); |
99cb252f5 mm/mmu_notifier: ... |
986 987 988 989 990 |
} EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked); /** * mmu_interval_notifier_remove - Remove a interval notifier |
5292e24a6 mm/mmu_notifiers:... |
991 |
* @interval_sub: Interval subscription to unregister |
99cb252f5 mm/mmu_notifier: ... |
992 993 994 995 996 997 998 |
* * This function must be paired with mmu_interval_notifier_insert(). It cannot * be called from any ops callback. * * Once this returns ops callbacks are no longer running on other CPUs and * will not be called in future. */ |
5292e24a6 mm/mmu_notifiers:... |
999 |
void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub) |
99cb252f5 mm/mmu_notifier: ... |
1000 |
{ |
5292e24a6 mm/mmu_notifiers:... |
1001 |
struct mm_struct *mm = interval_sub->mm; |
984cfe4e2 mm/mmu_notifier: ... |
1002 1003 |
struct mmu_notifier_subscriptions *subscriptions = mm->notifier_subscriptions; |
99cb252f5 mm/mmu_notifier: ... |
1004 1005 1006 |
unsigned long seq = 0; might_sleep(); |
984cfe4e2 mm/mmu_notifier: ... |
1007 1008 |
spin_lock(&subscriptions->lock); if (mn_itree_is_invalidating(subscriptions)) { |
99cb252f5 mm/mmu_notifier: ... |
1009 1010 1011 1012 |
/* * remove is being called after insert put this on the * deferred list, but before the deferred list was processed. */ |
5292e24a6 mm/mmu_notifiers:... |
1013 1014 |
if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) { hlist_del(&interval_sub->deferred_item); |
99cb252f5 mm/mmu_notifier: ... |
1015 |
} else { |
5292e24a6 mm/mmu_notifiers:... |
1016 |
hlist_add_head(&interval_sub->deferred_item, |
984cfe4e2 mm/mmu_notifier: ... |
1017 1018 |
&subscriptions->deferred_list); seq = subscriptions->invalidate_seq; |
99cb252f5 mm/mmu_notifier: ... |
1019 1020 |
} } else { |
5292e24a6 mm/mmu_notifiers:... |
1021 1022 |
WARN_ON(RB_EMPTY_NODE(&interval_sub->interval_tree.rb)); interval_tree_remove(&interval_sub->interval_tree, |
984cfe4e2 mm/mmu_notifier: ... |
1023 |
&subscriptions->itree); |
99cb252f5 mm/mmu_notifier: ... |
1024 |
} |
984cfe4e2 mm/mmu_notifier: ... |
1025 |
spin_unlock(&subscriptions->lock); |
99cb252f5 mm/mmu_notifier: ... |
1026 1027 1028 1029 1030 1031 1032 1033 |
/* * The possible sleep on progress in the invalidation requires the * caller not hold any locks held by invalidation callbacks. */ lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); lock_map_release(&__mmu_notifier_invalidate_range_start_map); if (seq) |
984cfe4e2 mm/mmu_notifier: ... |
1034 1035 |
wait_event(subscriptions->wq, READ_ONCE(subscriptions->invalidate_seq) != seq); |
99cb252f5 mm/mmu_notifier: ... |
1036 1037 1038 1039 1040 |
/* pairs with mmgrab in mmu_interval_notifier_insert() */ mmdrop(mm); } EXPORT_SYMBOL_GPL(mmu_interval_notifier_remove); |
2c7933f53 mm/mmu_notifiers:... |
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 |
/** * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed * * This function ensures that all outstanding async SRU work from * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops * associated with an unused mmu_notifier will no longer be called. * * Before using the caller must ensure that all of its mmu_notifiers have been * fully released via mmu_notifier_put(). * * Modules using the mmu_notifier_put() API should call this in their __exit * function to avoid module unloading races. */ void mmu_notifier_synchronize(void) { synchronize_srcu(&srcu); } EXPORT_SYMBOL_GPL(mmu_notifier_synchronize); |
c6d23413f mm/mmu_notifier: ... |
1059 1060 1061 1062 1063 1064 1065 1066 1067 |
bool mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range) { if (!range->vma || range->event != MMU_NOTIFY_PROTECTION_VMA) return false; /* Return true if the vma still have the read flag set. */ return range->vma->vm_flags & VM_READ; } EXPORT_SYMBOL_GPL(mmu_notifier_range_update_to_read_only); |