Commit cf2a473c4089aa41c26f653200673f5a4cc25047
Committed by
Linus Torvalds
1 parent
b4b2641843
[PATCH] cpuset: combine refresh_mems and update_mems
The important code paths through alloc_pages_current() and alloc_page_vma(), by which most kernel page allocations go, both called cpuset_update_current_mems_allowed(), which in turn called refresh_mems(). -Both- of these latter two routines did a tasklock, got the tasks cpuset pointer, and checked for out of date cpuset->mems_generation. That was a silly duplication of code and waste of CPU cycles on an important code path. Consolidated those two routines into a single routine, called cpuset_update_task_memory_state(), since it updates more than just mems_allowed. Changed all callers of either routine to call the new consolidated routine. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 3 changed files with 48 additions and 61 deletions Side-by-side Diff
include/linux/cpuset.h
... | ... | @@ -20,7 +20,7 @@ |
20 | 20 | extern void cpuset_exit(struct task_struct *p); |
21 | 21 | extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p); |
22 | 22 | void cpuset_init_current_mems_allowed(void); |
23 | -void cpuset_update_current_mems_allowed(void); | |
23 | +void cpuset_update_task_memory_state(void); | |
24 | 24 | #define cpuset_nodes_subset_current_mems_allowed(nodes) \ |
25 | 25 | nodes_subset((nodes), current->mems_allowed) |
26 | 26 | int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); |
... | ... | @@ -51,7 +51,7 @@ |
51 | 51 | } |
52 | 52 | |
53 | 53 | static inline void cpuset_init_current_mems_allowed(void) {} |
54 | -static inline void cpuset_update_current_mems_allowed(void) {} | |
54 | +static inline void cpuset_update_task_memory_state(void) {} | |
55 | 55 | #define cpuset_nodes_subset_current_mems_allowed(nodes) (1) |
56 | 56 | |
57 | 57 | static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) |
kernel/cpuset.c
... | ... | @@ -584,14 +584,27 @@ |
584 | 584 | BUG_ON(!nodes_intersects(*pmask, node_online_map)); |
585 | 585 | } |
586 | 586 | |
587 | -/* | |
588 | - * Refresh current tasks mems_allowed and mems_generation from current | |
589 | - * tasks cpuset. | |
587 | +/** | |
588 | + * cpuset_update_task_memory_state - update task memory placement | |
590 | 589 | * |
591 | - * Call without callback_sem or task_lock() held. May be called with | |
592 | - * or without manage_sem held. Will acquire task_lock() and might | |
593 | - * acquire callback_sem during call. | |
590 | + * If the current tasks cpusets mems_allowed changed behind our | |
591 | + * backs, update current->mems_allowed, mems_generation and task NUMA | |
592 | + * mempolicy to the new value. | |
594 | 593 | * |
594 | + * Task mempolicy is updated by rebinding it relative to the | |
595 | + * current->cpuset if a task has its memory placement changed. | |
596 | + * Do not call this routine if in_interrupt(). | |
597 | + * | |
598 | + * Call without callback_sem or task_lock() held. May be called | |
599 | + * with or without manage_sem held. Except in early boot or | |
600 | + * an exiting task, when tsk->cpuset is NULL, this routine will | |
601 | + * acquire task_lock(). We don't need to use task_lock to guard | |
602 | + * against another task changing a non-NULL cpuset pointer to NULL, | |
603 | + * as that is only done by a task on itself, and if the current task | |
604 | + * is here, it is not simultaneously in the exit code NULL'ing its | |
605 | + * cpuset pointer. This routine also might acquire callback_sem and | |
606 | + * current->mm->mmap_sem during call. | |
607 | + * | |
595 | 608 | * The task_lock() is required to dereference current->cpuset safely. |
596 | 609 | * Without it, we could pick up the pointer value of current->cpuset |
597 | 610 | * in one instruction, and then attach_task could give us a different |
598 | 611 | |
599 | 612 | |
600 | 613 | |
601 | 614 | |
602 | 615 | |
603 | 616 | |
604 | 617 | |
... | ... | @@ -605,32 +618,36 @@ |
605 | 618 | * task has been modifying its cpuset. |
606 | 619 | */ |
607 | 620 | |
608 | -static void refresh_mems(void) | |
621 | +void cpuset_update_task_memory_state() | |
609 | 622 | { |
610 | 623 | int my_cpusets_mem_gen; |
624 | + struct task_struct *tsk = current; | |
625 | + struct cpuset *cs = tsk->cpuset; | |
611 | 626 | |
612 | - task_lock(current); | |
613 | - my_cpusets_mem_gen = current->cpuset->mems_generation; | |
614 | - task_unlock(current); | |
627 | + if (unlikely(!cs)) | |
628 | + return; | |
615 | 629 | |
616 | - if (current->cpuset_mems_generation != my_cpusets_mem_gen) { | |
617 | - struct cpuset *cs; | |
618 | - nodemask_t oldmem = current->mems_allowed; | |
630 | + task_lock(tsk); | |
631 | + my_cpusets_mem_gen = cs->mems_generation; | |
632 | + task_unlock(tsk); | |
633 | + | |
634 | + if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { | |
635 | + nodemask_t oldmem = tsk->mems_allowed; | |
619 | 636 | int migrate; |
620 | 637 | |
621 | 638 | down(&callback_sem); |
622 | - task_lock(current); | |
623 | - cs = current->cpuset; | |
639 | + task_lock(tsk); | |
640 | + cs = tsk->cpuset; /* Maybe changed when task not locked */ | |
624 | 641 | migrate = is_memory_migrate(cs); |
625 | - guarantee_online_mems(cs, ¤t->mems_allowed); | |
626 | - current->cpuset_mems_generation = cs->mems_generation; | |
627 | - task_unlock(current); | |
642 | + guarantee_online_mems(cs, &tsk->mems_allowed); | |
643 | + tsk->cpuset_mems_generation = cs->mems_generation; | |
644 | + task_unlock(tsk); | |
628 | 645 | up(&callback_sem); |
629 | - if (!nodes_equal(oldmem, current->mems_allowed)) { | |
630 | - numa_policy_rebind(&oldmem, ¤t->mems_allowed); | |
646 | + numa_policy_rebind(&oldmem, &tsk->mems_allowed); | |
647 | + if (!nodes_equal(oldmem, tsk->mems_allowed)) { | |
631 | 648 | if (migrate) { |
632 | - do_migrate_pages(current->mm, &oldmem, | |
633 | - ¤t->mems_allowed, | |
649 | + do_migrate_pages(tsk->mm, &oldmem, | |
650 | + &tsk->mems_allowed, | |
634 | 651 | MPOL_MF_MOVE_ALL); |
635 | 652 | } |
636 | 653 | } |
... | ... | @@ -1630,7 +1647,7 @@ |
1630 | 1647 | return -ENOMEM; |
1631 | 1648 | |
1632 | 1649 | down(&manage_sem); |
1633 | - refresh_mems(); | |
1650 | + cpuset_update_task_memory_state(); | |
1634 | 1651 | cs->flags = 0; |
1635 | 1652 | if (notify_on_release(parent)) |
1636 | 1653 | set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); |
... | ... | @@ -1688,7 +1705,7 @@ |
1688 | 1705 | /* the vfs holds both inode->i_sem already */ |
1689 | 1706 | |
1690 | 1707 | down(&manage_sem); |
1691 | - refresh_mems(); | |
1708 | + cpuset_update_task_memory_state(); | |
1692 | 1709 | if (atomic_read(&cs->count) > 0) { |
1693 | 1710 | up(&manage_sem); |
1694 | 1711 | return -EBUSY; |
... | ... | @@ -1870,36 +1887,6 @@ |
1870 | 1887 | void cpuset_init_current_mems_allowed(void) |
1871 | 1888 | { |
1872 | 1889 | current->mems_allowed = NODE_MASK_ALL; |
1873 | -} | |
1874 | - | |
1875 | -/** | |
1876 | - * cpuset_update_current_mems_allowed - update mems parameters to new values | |
1877 | - * | |
1878 | - * If the current tasks cpusets mems_allowed changed behind our backs, | |
1879 | - * update current->mems_allowed and mems_generation to the new value. | |
1880 | - * Do not call this routine if in_interrupt(). | |
1881 | - * | |
1882 | - * Call without callback_sem or task_lock() held. May be called | |
1883 | - * with or without manage_sem held. Unless exiting, it will acquire | |
1884 | - * task_lock(). Also might acquire callback_sem during call to | |
1885 | - * refresh_mems(). | |
1886 | - */ | |
1887 | - | |
1888 | -void cpuset_update_current_mems_allowed(void) | |
1889 | -{ | |
1890 | - struct cpuset *cs; | |
1891 | - int need_to_refresh = 0; | |
1892 | - | |
1893 | - task_lock(current); | |
1894 | - cs = current->cpuset; | |
1895 | - if (!cs) | |
1896 | - goto done; | |
1897 | - if (current->cpuset_mems_generation != cs->mems_generation) | |
1898 | - need_to_refresh = 1; | |
1899 | -done: | |
1900 | - task_unlock(current); | |
1901 | - if (need_to_refresh) | |
1902 | - refresh_mems(); | |
1903 | 1890 | } |
1904 | 1891 | |
1905 | 1892 | /** |
mm/mempolicy.c
... | ... | @@ -387,7 +387,7 @@ |
387 | 387 | if (!nodes) |
388 | 388 | return 0; |
389 | 389 | |
390 | - cpuset_update_current_mems_allowed(); | |
390 | + cpuset_update_task_memory_state(); | |
391 | 391 | if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) |
392 | 392 | return -EINVAL; |
393 | 393 | return mpol_check_policy(mode, nodes); |
... | ... | @@ -461,7 +461,7 @@ |
461 | 461 | struct vm_area_struct *vma = NULL; |
462 | 462 | struct mempolicy *pol = current->mempolicy; |
463 | 463 | |
464 | - cpuset_update_current_mems_allowed(); | |
464 | + cpuset_update_task_memory_state(); | |
465 | 465 | if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) |
466 | 466 | return -EINVAL; |
467 | 467 | if (flags & MPOL_F_ADDR) { |
... | ... | @@ -1089,7 +1089,7 @@ |
1089 | 1089 | { |
1090 | 1090 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
1091 | 1091 | |
1092 | - cpuset_update_current_mems_allowed(); | |
1092 | + cpuset_update_task_memory_state(); | |
1093 | 1093 | |
1094 | 1094 | if (unlikely(pol->policy == MPOL_INTERLEAVE)) { |
1095 | 1095 | unsigned nid; |
... | ... | @@ -1115,7 +1115,7 @@ |
1115 | 1115 | * interrupt context and apply the current process NUMA policy. |
1116 | 1116 | * Returns NULL when no page can be allocated. |
1117 | 1117 | * |
1118 | - * Don't call cpuset_update_current_mems_allowed() unless | |
1118 | + * Don't call cpuset_update_task_memory_state() unless | |
1119 | 1119 | * 1) it's ok to take cpuset_sem (can WAIT), and |
1120 | 1120 | * 2) allocating for current task (not interrupt). |
1121 | 1121 | */ |
... | ... | @@ -1124,7 +1124,7 @@ |
1124 | 1124 | struct mempolicy *pol = current->mempolicy; |
1125 | 1125 | |
1126 | 1126 | if ((gfp & __GFP_WAIT) && !in_interrupt()) |
1127 | - cpuset_update_current_mems_allowed(); | |
1127 | + cpuset_update_task_memory_state(); | |
1128 | 1128 | if (!pol || in_interrupt()) |
1129 | 1129 | pol = &default_policy; |
1130 | 1130 | if (pol->policy == MPOL_INTERLEAVE) |