Commit cf2a473c4089aa41c26f653200673f5a4cc25047

Authored by Paul Jackson
Committed by Linus Torvalds
1 parent b4b2641843

[PATCH] cpuset: combine refresh_mems and update_mems

The important code paths through alloc_pages_current() and alloc_page_vma(),
by which most kernel page allocations go, both called
cpuset_update_current_mems_allowed(), which in turn called refresh_mems().
-Both- of these latter two routines did a tasklock, got the tasks cpuset
pointer, and checked for out of date cpuset->mems_generation.

That was a silly duplication of code and waste of CPU cycles on an important
code path.

Consolidated those two routines into a single routine, called
cpuset_update_task_memory_state(), since it updates more than just
mems_allowed.

Changed all callers of either routine to call the new consolidated routine.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

Showing 3 changed files with 48 additions and 61 deletions Side-by-side Diff

include/linux/cpuset.h
... ... @@ -20,7 +20,7 @@
20 20 extern void cpuset_exit(struct task_struct *p);
21 21 extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p);
22 22 void cpuset_init_current_mems_allowed(void);
23   -void cpuset_update_current_mems_allowed(void);
  23 +void cpuset_update_task_memory_state(void);
24 24 #define cpuset_nodes_subset_current_mems_allowed(nodes) \
25 25 nodes_subset((nodes), current->mems_allowed)
26 26 int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
... ... @@ -51,7 +51,7 @@
51 51 }
52 52  
53 53 static inline void cpuset_init_current_mems_allowed(void) {}
54   -static inline void cpuset_update_current_mems_allowed(void) {}
  54 +static inline void cpuset_update_task_memory_state(void) {}
55 55 #define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
56 56  
57 57 static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
... ... @@ -584,14 +584,27 @@
584 584 BUG_ON(!nodes_intersects(*pmask, node_online_map));
585 585 }
586 586  
587   -/*
588   - * Refresh current tasks mems_allowed and mems_generation from current
589   - * tasks cpuset.
  587 +/**
  588 + * cpuset_update_task_memory_state - update task memory placement
590 589 *
591   - * Call without callback_sem or task_lock() held. May be called with
592   - * or without manage_sem held. Will acquire task_lock() and might
593   - * acquire callback_sem during call.
  590 + * If the current tasks cpusets mems_allowed changed behind our
  591 + * backs, update current->mems_allowed, mems_generation and task NUMA
  592 + * mempolicy to the new value.
594 593 *
  594 + * Task mempolicy is updated by rebinding it relative to the
  595 + * current->cpuset if a task has its memory placement changed.
  596 + * Do not call this routine if in_interrupt().
  597 + *
  598 + * Call without callback_sem or task_lock() held. May be called
  599 + * with or without manage_sem held. Except in early boot or
  600 + * an exiting task, when tsk->cpuset is NULL, this routine will
  601 + * acquire task_lock(). We don't need to use task_lock to guard
  602 + * against another task changing a non-NULL cpuset pointer to NULL,
  603 + * as that is only done by a task on itself, and if the current task
  604 + * is here, it is not simultaneously in the exit code NULL'ing its
  605 + * cpuset pointer. This routine also might acquire callback_sem and
  606 + * current->mm->mmap_sem during call.
  607 + *
595 608 * The task_lock() is required to dereference current->cpuset safely.
596 609 * Without it, we could pick up the pointer value of current->cpuset
597 610 * in one instruction, and then attach_task could give us a different
598 611  
599 612  
600 613  
601 614  
602 615  
603 616  
604 617  
... ... @@ -605,32 +618,36 @@
605 618 * task has been modifying its cpuset.
606 619 */
607 620  
608   -static void refresh_mems(void)
  621 +void cpuset_update_task_memory_state()
609 622 {
610 623 int my_cpusets_mem_gen;
  624 + struct task_struct *tsk = current;
  625 + struct cpuset *cs = tsk->cpuset;
611 626  
612   - task_lock(current);
613   - my_cpusets_mem_gen = current->cpuset->mems_generation;
614   - task_unlock(current);
  627 + if (unlikely(!cs))
  628 + return;
615 629  
616   - if (current->cpuset_mems_generation != my_cpusets_mem_gen) {
617   - struct cpuset *cs;
618   - nodemask_t oldmem = current->mems_allowed;
  630 + task_lock(tsk);
  631 + my_cpusets_mem_gen = cs->mems_generation;
  632 + task_unlock(tsk);
  633 +
  634 + if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
  635 + nodemask_t oldmem = tsk->mems_allowed;
619 636 int migrate;
620 637  
621 638 down(&callback_sem);
622   - task_lock(current);
623   - cs = current->cpuset;
  639 + task_lock(tsk);
  640 + cs = tsk->cpuset; /* Maybe changed when task not locked */
624 641 migrate = is_memory_migrate(cs);
625   - guarantee_online_mems(cs, &current->mems_allowed);
626   - current->cpuset_mems_generation = cs->mems_generation;
627   - task_unlock(current);
  642 + guarantee_online_mems(cs, &tsk->mems_allowed);
  643 + tsk->cpuset_mems_generation = cs->mems_generation;
  644 + task_unlock(tsk);
628 645 up(&callback_sem);
629   - if (!nodes_equal(oldmem, current->mems_allowed)) {
630   - numa_policy_rebind(&oldmem, &current->mems_allowed);
  646 + numa_policy_rebind(&oldmem, &tsk->mems_allowed);
  647 + if (!nodes_equal(oldmem, tsk->mems_allowed)) {
631 648 if (migrate) {
632   - do_migrate_pages(current->mm, &oldmem,
633   - &current->mems_allowed,
  649 + do_migrate_pages(tsk->mm, &oldmem,
  650 + &tsk->mems_allowed,
634 651 MPOL_MF_MOVE_ALL);
635 652 }
636 653 }
... ... @@ -1630,7 +1647,7 @@
1630 1647 return -ENOMEM;
1631 1648  
1632 1649 down(&manage_sem);
1633   - refresh_mems();
  1650 + cpuset_update_task_memory_state();
1634 1651 cs->flags = 0;
1635 1652 if (notify_on_release(parent))
1636 1653 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
... ... @@ -1688,7 +1705,7 @@
1688 1705 /* the vfs holds both inode->i_sem already */
1689 1706  
1690 1707 down(&manage_sem);
1691   - refresh_mems();
  1708 + cpuset_update_task_memory_state();
1692 1709 if (atomic_read(&cs->count) > 0) {
1693 1710 up(&manage_sem);
1694 1711 return -EBUSY;
... ... @@ -1870,36 +1887,6 @@
1870 1887 void cpuset_init_current_mems_allowed(void)
1871 1888 {
1872 1889 current->mems_allowed = NODE_MASK_ALL;
1873   -}
1874   -
1875   -/**
1876   - * cpuset_update_current_mems_allowed - update mems parameters to new values
1877   - *
1878   - * If the current tasks cpusets mems_allowed changed behind our backs,
1879   - * update current->mems_allowed and mems_generation to the new value.
1880   - * Do not call this routine if in_interrupt().
1881   - *
1882   - * Call without callback_sem or task_lock() held. May be called
1883   - * with or without manage_sem held. Unless exiting, it will acquire
1884   - * task_lock(). Also might acquire callback_sem during call to
1885   - * refresh_mems().
1886   - */
1887   -
1888   -void cpuset_update_current_mems_allowed(void)
1889   -{
1890   - struct cpuset *cs;
1891   - int need_to_refresh = 0;
1892   -
1893   - task_lock(current);
1894   - cs = current->cpuset;
1895   - if (!cs)
1896   - goto done;
1897   - if (current->cpuset_mems_generation != cs->mems_generation)
1898   - need_to_refresh = 1;
1899   -done:
1900   - task_unlock(current);
1901   - if (need_to_refresh)
1902   - refresh_mems();
1903 1890 }
1904 1891  
1905 1892 /**
... ... @@ -387,7 +387,7 @@
387 387 if (!nodes)
388 388 return 0;
389 389  
390   - cpuset_update_current_mems_allowed();
  390 + cpuset_update_task_memory_state();
391 391 if (!cpuset_nodes_subset_current_mems_allowed(*nodes))
392 392 return -EINVAL;
393 393 return mpol_check_policy(mode, nodes);
... ... @@ -461,7 +461,7 @@
461 461 struct vm_area_struct *vma = NULL;
462 462 struct mempolicy *pol = current->mempolicy;
463 463  
464   - cpuset_update_current_mems_allowed();
  464 + cpuset_update_task_memory_state();
465 465 if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
466 466 return -EINVAL;
467 467 if (flags & MPOL_F_ADDR) {
... ... @@ -1089,7 +1089,7 @@
1089 1089 {
1090 1090 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1091 1091  
1092   - cpuset_update_current_mems_allowed();
  1092 + cpuset_update_task_memory_state();
1093 1093  
1094 1094 if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
1095 1095 unsigned nid;
... ... @@ -1115,7 +1115,7 @@
1115 1115 * interrupt context and apply the current process NUMA policy.
1116 1116 * Returns NULL when no page can be allocated.
1117 1117 *
1118   - * Don't call cpuset_update_current_mems_allowed() unless
  1118 + * Don't call cpuset_update_task_memory_state() unless
1119 1119 * 1) it's ok to take cpuset_sem (can WAIT), and
1120 1120 * 2) allocating for current task (not interrupt).
1121 1121 */
... ... @@ -1124,7 +1124,7 @@
1124 1124 struct mempolicy *pol = current->mempolicy;
1125 1125  
1126 1126 if ((gfp & __GFP_WAIT) && !in_interrupt())
1127   - cpuset_update_current_mems_allowed();
  1127 + cpuset_update_task_memory_state();
1128 1128 if (!pol || in_interrupt())
1129 1129 pol = &default_policy;
1130 1130 if (pol->policy == MPOL_INTERLEAVE)