Commit 699140ba838dd3fa2c5cce474e14f194b09f91aa

Authored by Tejun Heo
1 parent 3a5a6d0c2b

cpuset: drop async_rebuild_sched_domains()

In general, we want to make cgroup_mutex one of the outermost locks
and be able to use get_online_cpus() and friends from cgroup methods.
With cpuset hotplug made async, get_online_cpus() can now be nested
inside cgroup_mutex.

Currently, cpuset avoids nesting get_online_cpus() inside cgroup_mutex
by bouncing sched_domain rebuilding to a work item.  As such nesting
is allowed now, remove the workqueue bouncing code and always rebuild
sched_domains synchronously.  This also nests sched_domains_mutex
inside cgroup_mutex, which is intended and should be okay.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>

Showing 1 changed file with 16 additions and 60 deletions Side-by-side Diff

... ... @@ -61,14 +61,6 @@
61 61 #include <linux/cgroup.h>
62 62  
63 63 /*
64   - * Workqueue for cpuset related tasks.
65   - *
66   - * Using kevent workqueue may cause deadlock when memory_migrate
67   - * is set. So we create a separate workqueue thread for cpuset.
68   - */
69   -static struct workqueue_struct *cpuset_wq;
70   -
71   -/*
72 64 * Tracks how many cpusets are currently defined in system.
73 65 * When there is only one cpuset (the root cpuset) we can
74 66 * short circuit some hooks.
75 67  
76 68  
77 69  
78 70  
79 71  
... ... @@ -753,25 +745,25 @@
753 745 /*
754 746 * Rebuild scheduler domains.
755 747 *
756   - * Call with neither cgroup_mutex held nor within get_online_cpus().
757   - * Takes both cgroup_mutex and get_online_cpus().
  748 + * If the flag 'sched_load_balance' of any cpuset with non-empty
  749 + * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
  750 + * which has that flag enabled, or if any cpuset with a non-empty
  751 + * 'cpus' is removed, then call this routine to rebuild the
  752 + * scheduler's dynamic sched domains.
758 753 *
759   - * Cannot be directly called from cpuset code handling changes
760   - * to the cpuset pseudo-filesystem, because it cannot be called
761   - * from code that already holds cgroup_mutex.
  754 + * Call with cgroup_mutex held. Takes get_online_cpus().
762 755 */
763   -static void do_rebuild_sched_domains(struct work_struct *unused)
  756 +static void rebuild_sched_domains_locked(void)
764 757 {
765 758 struct sched_domain_attr *attr;
766 759 cpumask_var_t *doms;
767 760 int ndoms;
768 761  
  762 + WARN_ON_ONCE(!cgroup_lock_is_held());
769 763 get_online_cpus();
770 764  
771 765 /* Generate domain masks and attrs */
772   - cgroup_lock();
773 766 ndoms = generate_sched_domains(&doms, &attr);
774   - cgroup_unlock();
775 767  
776 768 /* Have scheduler rebuild the domains */
777 769 partition_sched_domains(ndoms, doms, attr);
... ... @@ -779,7 +771,7 @@
779 771 put_online_cpus();
780 772 }
781 773 #else /* !CONFIG_SMP */
782   -static void do_rebuild_sched_domains(struct work_struct *unused)
  774 +static void rebuild_sched_domains_locked(void)
783 775 {
784 776 }
785 777  
786 778  
... ... @@ -791,44 +783,11 @@
791 783 }
792 784 #endif /* CONFIG_SMP */
793 785  
794   -static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
795   -
796   -/*
797   - * Rebuild scheduler domains, asynchronously via workqueue.
798   - *
799   - * If the flag 'sched_load_balance' of any cpuset with non-empty
800   - * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
801   - * which has that flag enabled, or if any cpuset with a non-empty
802   - * 'cpus' is removed, then call this routine to rebuild the
803   - * scheduler's dynamic sched domains.
804   - *
805   - * The rebuild_sched_domains() and partition_sched_domains()
806   - * routines must nest cgroup_lock() inside get_online_cpus(),
807   - * but such cpuset changes as these must nest that locking the
808   - * other way, holding cgroup_lock() for much of the code.
809   - *
810   - * So in order to avoid an ABBA deadlock, the cpuset code handling
811   - * these user changes delegates the actual sched domain rebuilding
812   - * to a separate workqueue thread, which ends up processing the
813   - * above do_rebuild_sched_domains() function.
814   - */
815   -static void async_rebuild_sched_domains(void)
816   -{
817   - queue_work(cpuset_wq, &rebuild_sched_domains_work);
818   -}
819   -
820   -/*
821   - * Accomplishes the same scheduler domain rebuild as the above
822   - * async_rebuild_sched_domains(), however it directly calls the
823   - * rebuild routine synchronously rather than calling it via an
824   - * asynchronous work thread.
825   - *
826   - * This can only be called from code that is not holding
827   - * cgroup_mutex (not nested in a cgroup_lock() call.)
828   - */
829 786 void rebuild_sched_domains(void)
830 787 {
831   - do_rebuild_sched_domains(NULL);
  788 + cgroup_lock();
  789 + rebuild_sched_domains_locked();
  790 + cgroup_unlock();
832 791 }
833 792  
834 793 /**
... ... @@ -948,7 +907,7 @@
948 907 heap_free(&heap);
949 908  
950 909 if (is_load_balanced)
951   - async_rebuild_sched_domains();
  910 + rebuild_sched_domains_locked();
952 911 return 0;
953 912 }
954 913  
... ... @@ -1196,7 +1155,7 @@
1196 1155 cs->relax_domain_level = val;
1197 1156 if (!cpumask_empty(cs->cpus_allowed) &&
1198 1157 is_sched_load_balance(cs))
1199   - async_rebuild_sched_domains();
  1158 + rebuild_sched_domains_locked();
1200 1159 }
1201 1160  
1202 1161 return 0;
... ... @@ -1288,7 +1247,7 @@
1288 1247 mutex_unlock(&callback_mutex);
1289 1248  
1290 1249 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
1291   - async_rebuild_sched_domains();
  1250 + rebuild_sched_domains_locked();
1292 1251  
1293 1252 if (spread_flag_changed)
1294 1253 update_tasks_flags(cs, &heap);
... ... @@ -1925,7 +1884,7 @@
1925 1884 /*
1926 1885 * If the cpuset being removed has its flag 'sched_load_balance'
1927 1886 * enabled, then simulate turning sched_load_balance off, which
1928   - * will call async_rebuild_sched_domains().
  1887 + * will call rebuild_sched_domains_locked().
1929 1888 */
1930 1889  
1931 1890 static void cpuset_css_free(struct cgroup *cont)
... ... @@ -2237,9 +2196,6 @@
2237 2196 top_cpuset.mems_allowed = node_states[N_MEMORY];
2238 2197  
2239 2198 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2240   -
2241   - cpuset_wq = create_singlethread_workqueue("cpuset");
2242   - BUG_ON(!cpuset_wq);
2243 2199 }
2244 2200  
2245 2201 /**