Commit e4e364e865b382f9d99c7fc230ec2ce7df21257a
Committed by
Linus Torvalds
1 parent
2741a559a0
Exists in
master
and in
7 other branches
[PATCH] cpuset: memory migration interaction fix
Fix memory migration so that it works regardless of what cpuset the invoking task is in. If a task invoked a memory migration, by doing one of: 1) writing a different nodemask to a cpuset 'mems' file, or 2) writing a tasks pid to a different cpuset's 'tasks' file, where the cpuset had its 'memory_migrate' option turned on, then the allocation of the new pages for the migrated task(s) was constrained by the invoking tasks cpuset. If this task wasn't in a cpuset that allowed the requested memory nodes, the memory migration would happen to some other nodes that were in that invoking tasks cpuset. This was usually surprising and puzzling behaviour: Why didn't the pages move? Why did the pages move -there-? To fix this, temporarilly change the invoking tasks 'mems_allowed' task_struct field to the nodes the migrating tasks is moving to, so that new pages can be allocated there. Signed-off-by: Paul Jackson <pj@sgi.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 1 changed file with 52 additions and 5 deletions Side-by-side Diff
kernel/cpuset.c
... | ... | @@ -834,6 +834,55 @@ |
834 | 834 | } |
835 | 835 | |
836 | 836 | /* |
837 | + * cpuset_migrate_mm | |
838 | + * | |
839 | + * Migrate memory region from one set of nodes to another. | |
840 | + * | |
841 | + * Temporarilly set tasks mems_allowed to target nodes of migration, | |
842 | + * so that the migration code can allocate pages on these nodes. | |
843 | + * | |
844 | + * Call holding manage_mutex, so our current->cpuset won't change | |
845 | + * during this call, as manage_mutex holds off any attach_task() | |
846 | + * calls. Therefore we don't need to take task_lock around the | |
847 | + * call to guarantee_online_mems(), as we know no one is changing | |
848 | + * our tasks cpuset. | |
849 | + * | |
850 | + * Hold callback_mutex around the two modifications of our tasks | |
851 | + * mems_allowed to synchronize with cpuset_mems_allowed(). | |
852 | + * | |
853 | + * While the mm_struct we are migrating is typically from some | |
854 | + * other task, the task_struct mems_allowed that we are hacking | |
855 | + * is for our current task, which must allocate new pages for that | |
856 | + * migrating memory region. | |
857 | + * | |
858 | + * We call cpuset_update_task_memory_state() before hacking | |
859 | + * our tasks mems_allowed, so that we are assured of being in | |
860 | + * sync with our tasks cpuset, and in particular, callbacks to | |
861 | + * cpuset_update_task_memory_state() from nested page allocations | |
862 | + * won't see any mismatch of our cpuset and task mems_generation | |
863 | + * values, so won't overwrite our hacked tasks mems_allowed | |
864 | + * nodemask. | |
865 | + */ | |
866 | + | |
867 | +static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |
868 | + const nodemask_t *to) | |
869 | +{ | |
870 | + struct task_struct *tsk = current; | |
871 | + | |
872 | + cpuset_update_task_memory_state(); | |
873 | + | |
874 | + mutex_lock(&callback_mutex); | |
875 | + tsk->mems_allowed = *to; | |
876 | + mutex_unlock(&callback_mutex); | |
877 | + | |
878 | + do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); | |
879 | + | |
880 | + mutex_lock(&callback_mutex); | |
881 | + guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed); | |
882 | + mutex_unlock(&callback_mutex); | |
883 | +} | |
884 | + | |
885 | +/* | |
837 | 886 | * Handle user request to change the 'mems' memory placement |
838 | 887 | * of a cpuset. Needs to validate the request, update the |
839 | 888 | * cpusets mems_allowed and mems_generation, and for each |
... | ... | @@ -945,10 +994,8 @@ |
945 | 994 | struct mm_struct *mm = mmarray[i]; |
946 | 995 | |
947 | 996 | mpol_rebind_mm(mm, &cs->mems_allowed); |
948 | - if (migrate) { | |
949 | - do_migrate_pages(mm, &oldmem, &cs->mems_allowed, | |
950 | - MPOL_MF_MOVE_ALL); | |
951 | - } | |
997 | + if (migrate) | |
998 | + cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); | |
952 | 999 | mmput(mm); |
953 | 1000 | } |
954 | 1001 | |
... | ... | @@ -1184,7 +1231,7 @@ |
1184 | 1231 | if (mm) { |
1185 | 1232 | mpol_rebind_mm(mm, &to); |
1186 | 1233 | if (is_memory_migrate(cs)) |
1187 | - do_migrate_pages(mm, &from, &to, MPOL_MF_MOVE_ALL); | |
1234 | + cpuset_migrate_mm(mm, &from, &to); | |
1188 | 1235 | mmput(mm); |
1189 | 1236 | } |
1190 | 1237 |