Commit 42288fe366c4f1ce7522bc9f27d0bc2a81c55264
Committed by
Linus Torvalds
1 parent
5439ca6b8f
Exists in
master
and in
20 other branches
mm: mempolicy: Convert shared_policy mutex to spinlock
Sasha was fuzzing with trinity and reported the following problem: BUG: sleeping function called from invalid context at kernel/mutex.c:269 in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main 2 locks held by trinity-main/6361: #0: (&mm->mmap_sem){++++++}, at: [<ffffffff810aa314>] __do_page_fault+0x1e4/0x4f0 #1: (&(&mm->page_table_lock)->rlock){+.+...}, at: [<ffffffff8122f017>] handle_pte_fault+0x3f7/0x6a0 Pid: 6361, comm: trinity-main Tainted: G W 3.7.0-rc2-next-20121024-sasha-00001-gd95ef01-dirty #74 Call Trace: __might_sleep+0x1c3/0x1e0 mutex_lock_nested+0x29/0x50 mpol_shared_policy_lookup+0x2e/0x90 shmem_get_policy+0x2e/0x30 get_vma_policy+0x5a/0xa0 mpol_misplaced+0x41/0x1d0 handle_pte_fault+0x465/0x6a0 This was triggered by a different version of automatic NUMA balancing but in theory the current version is vunerable to the same problem. do_numa_page -> numa_migrate_prep -> mpol_misplaced -> get_vma_policy -> shmem_get_policy It's very unlikely this will happen as shared pages are not marked pte_numa -- see the page_mapcount() check in change_pte_range() -- but it is possible. To address this, this patch restores sp->lock as originally implemented by Kosaki Motohiro. In the path where get_vma_policy() is called, it should not be calling sp_alloc() so it is not necessary to treat the PTL specially. Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 2 changed files with 49 additions and 21 deletions Side-by-side Diff
include/linux/mempolicy.h
mm/mempolicy.c
... | ... | @@ -2132,7 +2132,7 @@ |
2132 | 2132 | */ |
2133 | 2133 | |
2134 | 2134 | /* lookup first element intersecting start-end */ |
2135 | -/* Caller holds sp->mutex */ | |
2135 | +/* Caller holds sp->lock */ | |
2136 | 2136 | static struct sp_node * |
2137 | 2137 | sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) |
2138 | 2138 | { |
2139 | 2139 | |
... | ... | @@ -2196,13 +2196,13 @@ |
2196 | 2196 | |
2197 | 2197 | if (!sp->root.rb_node) |
2198 | 2198 | return NULL; |
2199 | - mutex_lock(&sp->mutex); | |
2199 | + spin_lock(&sp->lock); | |
2200 | 2200 | sn = sp_lookup(sp, idx, idx+1); |
2201 | 2201 | if (sn) { |
2202 | 2202 | mpol_get(sn->policy); |
2203 | 2203 | pol = sn->policy; |
2204 | 2204 | } |
2205 | - mutex_unlock(&sp->mutex); | |
2205 | + spin_unlock(&sp->lock); | |
2206 | 2206 | return pol; |
2207 | 2207 | } |
2208 | 2208 | |
... | ... | @@ -2328,6 +2328,14 @@ |
2328 | 2328 | sp_free(n); |
2329 | 2329 | } |
2330 | 2330 | |
2331 | +static void sp_node_init(struct sp_node *node, unsigned long start, | |
2332 | + unsigned long end, struct mempolicy *pol) | |
2333 | +{ | |
2334 | + node->start = start; | |
2335 | + node->end = end; | |
2336 | + node->policy = pol; | |
2337 | +} | |
2338 | + | |
2331 | 2339 | static struct sp_node *sp_alloc(unsigned long start, unsigned long end, |
2332 | 2340 | struct mempolicy *pol) |
2333 | 2341 | { |
2334 | 2342 | |
... | ... | @@ -2344,11 +2352,8 @@ |
2344 | 2352 | return NULL; |
2345 | 2353 | } |
2346 | 2354 | newpol->flags |= MPOL_F_SHARED; |
2355 | + sp_node_init(n, start, end, newpol); | |
2347 | 2356 | |
2348 | - n->start = start; | |
2349 | - n->end = end; | |
2350 | - n->policy = newpol; | |
2351 | - | |
2352 | 2357 | return n; |
2353 | 2358 | } |
2354 | 2359 | |
2355 | 2360 | |
... | ... | @@ -2357,9 +2362,12 @@ |
2357 | 2362 | unsigned long end, struct sp_node *new) |
2358 | 2363 | { |
2359 | 2364 | struct sp_node *n; |
2365 | + struct sp_node *n_new = NULL; | |
2366 | + struct mempolicy *mpol_new = NULL; | |
2360 | 2367 | int ret = 0; |
2361 | 2368 | |
2362 | - mutex_lock(&sp->mutex); | |
2369 | +restart: | |
2370 | + spin_lock(&sp->lock); | |
2363 | 2371 | n = sp_lookup(sp, start, end); |
2364 | 2372 | /* Take care of old policies in the same range. */ |
2365 | 2373 | while (n && n->start < end) { |
2366 | 2374 | |
... | ... | @@ -2372,14 +2380,16 @@ |
2372 | 2380 | } else { |
2373 | 2381 | /* Old policy spanning whole new range. */ |
2374 | 2382 | if (n->end > end) { |
2375 | - struct sp_node *new2; | |
2376 | - new2 = sp_alloc(end, n->end, n->policy); | |
2377 | - if (!new2) { | |
2378 | - ret = -ENOMEM; | |
2379 | - goto out; | |
2380 | - } | |
2383 | + if (!n_new) | |
2384 | + goto alloc_new; | |
2385 | + | |
2386 | + *mpol_new = *n->policy; | |
2387 | + atomic_set(&mpol_new->refcnt, 1); | |
2388 | + sp_node_init(n_new, n->end, end, mpol_new); | |
2389 | + sp_insert(sp, n_new); | |
2381 | 2390 | n->end = start; |
2382 | - sp_insert(sp, new2); | |
2391 | + n_new = NULL; | |
2392 | + mpol_new = NULL; | |
2383 | 2393 | break; |
2384 | 2394 | } else |
2385 | 2395 | n->end = start; |
2386 | 2396 | |
... | ... | @@ -2390,9 +2400,27 @@ |
2390 | 2400 | } |
2391 | 2401 | if (new) |
2392 | 2402 | sp_insert(sp, new); |
2393 | -out: | |
2394 | - mutex_unlock(&sp->mutex); | |
2403 | + spin_unlock(&sp->lock); | |
2404 | + ret = 0; | |
2405 | + | |
2406 | +err_out: | |
2407 | + if (mpol_new) | |
2408 | + mpol_put(mpol_new); | |
2409 | + if (n_new) | |
2410 | + kmem_cache_free(sn_cache, n_new); | |
2411 | + | |
2395 | 2412 | return ret; |
2413 | + | |
2414 | +alloc_new: | |
2415 | + spin_unlock(&sp->lock); | |
2416 | + ret = -ENOMEM; | |
2417 | + n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL); | |
2418 | + if (!n_new) | |
2419 | + goto err_out; | |
2420 | + mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); | |
2421 | + if (!mpol_new) | |
2422 | + goto err_out; | |
2423 | + goto restart; | |
2396 | 2424 | } |
2397 | 2425 | |
2398 | 2426 | /** |
... | ... | @@ -2410,7 +2438,7 @@ |
2410 | 2438 | int ret; |
2411 | 2439 | |
2412 | 2440 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ |
2413 | - mutex_init(&sp->mutex); | |
2441 | + spin_lock_init(&sp->lock); | |
2414 | 2442 | |
2415 | 2443 | if (mpol) { |
2416 | 2444 | struct vm_area_struct pvma; |
2417 | 2445 | |
... | ... | @@ -2476,14 +2504,14 @@ |
2476 | 2504 | |
2477 | 2505 | if (!p->root.rb_node) |
2478 | 2506 | return; |
2479 | - mutex_lock(&p->mutex); | |
2507 | + spin_lock(&p->lock); | |
2480 | 2508 | next = rb_first(&p->root); |
2481 | 2509 | while (next) { |
2482 | 2510 | n = rb_entry(next, struct sp_node, nd); |
2483 | 2511 | next = rb_next(&n->nd); |
2484 | 2512 | sp_delete(p, n); |
2485 | 2513 | } |
2486 | - mutex_unlock(&p->mutex); | |
2514 | + spin_unlock(&p->lock); | |
2487 | 2515 | } |
2488 | 2516 | |
2489 | 2517 | #ifdef CONFIG_NUMA_BALANCING |