Commit 3d3f26a7baaa921a0e790b4c72d20f0de91a5d65
Committed by
Linus Torvalds
1 parent
6362e4d4ed
Exists in
master
and in
39 other branches
[PATCH] kernel/cpuset.c, mutex conversion
convert cpuset.c's callback_sem and manage_sem to mutexes. Build and boot tested by Ingo. Build, boot, unit and stress tested by pj. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Showing 1 changed file with 103 additions and 109 deletions Side-by-side Diff
kernel/cpuset.c
... | ... | @@ -53,7 +53,7 @@ |
53 | 53 | |
54 | 54 | #include <asm/uaccess.h> |
55 | 55 | #include <asm/atomic.h> |
56 | -#include <asm/semaphore.h> | |
56 | +#include <linux/mutex.h> | |
57 | 57 | |
58 | 58 | #define CPUSET_SUPER_MAGIC 0x27e0eb |
59 | 59 | |
60 | 60 | |
61 | 61 | |
62 | 62 | |
63 | 63 | |
64 | 64 | |
65 | 65 | |
66 | 66 | |
67 | 67 | |
68 | 68 | |
69 | 69 | |
70 | 70 | |
71 | 71 | |
72 | 72 | |
73 | 73 | |
74 | 74 | |
... | ... | @@ -168,63 +168,57 @@ |
168 | 168 | static struct super_block *cpuset_sb; |
169 | 169 | |
170 | 170 | /* |
171 | - * We have two global cpuset semaphores below. They can nest. | |
172 | - * It is ok to first take manage_sem, then nest callback_sem. We also | |
171 | + * We have two global cpuset mutexes below. They can nest. | |
172 | + * It is ok to first take manage_mutex, then nest callback_mutex. We also | |
173 | 173 | * require taking task_lock() when dereferencing a tasks cpuset pointer. |
174 | 174 | * See "The task_lock() exception", at the end of this comment. |
175 | 175 | * |
176 | - * A task must hold both semaphores to modify cpusets. If a task | |
177 | - * holds manage_sem, then it blocks others wanting that semaphore, | |
178 | - * ensuring that it is the only task able to also acquire callback_sem | |
176 | + * A task must hold both mutexes to modify cpusets. If a task | |
177 | + * holds manage_mutex, then it blocks others wanting that mutex, | |
178 | + * ensuring that it is the only task able to also acquire callback_mutex | |
179 | 179 | * and be able to modify cpusets. It can perform various checks on |
180 | 180 | * the cpuset structure first, knowing nothing will change. It can |
181 | - * also allocate memory while just holding manage_sem. While it is | |
181 | + * also allocate memory while just holding manage_mutex. While it is | |
182 | 182 | * performing these checks, various callback routines can briefly |
183 | - * acquire callback_sem to query cpusets. Once it is ready to make | |
184 | - * the changes, it takes callback_sem, blocking everyone else. | |
183 | + * acquire callback_mutex to query cpusets. Once it is ready to make | |
184 | + * the changes, it takes callback_mutex, blocking everyone else. | |
185 | 185 | * |
186 | 186 | * Calls to the kernel memory allocator can not be made while holding |
187 | - * callback_sem, as that would risk double tripping on callback_sem | |
187 | + * callback_mutex, as that would risk double tripping on callback_mutex | |
188 | 188 | * from one of the callbacks into the cpuset code from within |
189 | 189 | * __alloc_pages(). |
190 | 190 | * |
191 | - * If a task is only holding callback_sem, then it has read-only | |
191 | + * If a task is only holding callback_mutex, then it has read-only | |
192 | 192 | * access to cpusets. |
193 | 193 | * |
194 | 194 | * The task_struct fields mems_allowed and mems_generation may only |
195 | 195 | * be accessed in the context of that task, so require no locks. |
196 | 196 | * |
197 | 197 | * Any task can increment and decrement the count field without lock. |
198 | - * So in general, code holding manage_sem or callback_sem can't rely | |
198 | + * So in general, code holding manage_mutex or callback_mutex can't rely | |
199 | 199 | * on the count field not changing. However, if the count goes to |
200 | - * zero, then only attach_task(), which holds both semaphores, can | |
200 | + * zero, then only attach_task(), which holds both mutexes, can | |
201 | 201 | * increment it again. Because a count of zero means that no tasks |
202 | 202 | * are currently attached, therefore there is no way a task attached |
203 | 203 | * to that cpuset can fork (the other way to increment the count). |
204 | - * So code holding manage_sem or callback_sem can safely assume that | |
204 | + * So code holding manage_mutex or callback_mutex can safely assume that | |
205 | 205 | * if the count is zero, it will stay zero. Similarly, if a task |
206 | - * holds manage_sem or callback_sem on a cpuset with zero count, it | |
206 | + * holds manage_mutex or callback_mutex on a cpuset with zero count, it | |
207 | 207 | * knows that the cpuset won't be removed, as cpuset_rmdir() needs |
208 | - * both of those semaphores. | |
208 | + * both of those mutexes. | |
209 | 209 | * |
210 | - * A possible optimization to improve parallelism would be to make | |
211 | - * callback_sem a R/W semaphore (rwsem), allowing the callback routines | |
212 | - * to proceed in parallel, with read access, until the holder of | |
213 | - * manage_sem needed to take this rwsem for exclusive write access | |
214 | - * and modify some cpusets. | |
215 | - * | |
216 | 210 | * The cpuset_common_file_write handler for operations that modify |
217 | - * the cpuset hierarchy holds manage_sem across the entire operation, | |
211 | + * the cpuset hierarchy holds manage_mutex across the entire operation, | |
218 | 212 | * single threading all such cpuset modifications across the system. |
219 | 213 | * |
220 | - * The cpuset_common_file_read() handlers only hold callback_sem across | |
214 | + * The cpuset_common_file_read() handlers only hold callback_mutex across | |
221 | 215 | * small pieces of code, such as when reading out possibly multi-word |
222 | 216 | * cpumasks and nodemasks. |
223 | 217 | * |
224 | 218 | * The fork and exit callbacks cpuset_fork() and cpuset_exit(), don't |
225 | - * (usually) take either semaphore. These are the two most performance | |
219 | + * (usually) take either mutex. These are the two most performance | |
226 | 220 | * critical pieces of code here. The exception occurs on cpuset_exit(), |
227 | - * when a task in a notify_on_release cpuset exits. Then manage_sem | |
221 | + * when a task in a notify_on_release cpuset exits. Then manage_mutex | |
228 | 222 | * is taken, and if the cpuset count is zero, a usermode call made |
229 | 223 | * to /sbin/cpuset_release_agent with the name of the cpuset (path |
230 | 224 | * relative to the root of cpuset file system) as the argument. |
231 | 225 | |
... | ... | @@ -242,9 +236,9 @@ |
242 | 236 | * |
243 | 237 | * The need for this exception arises from the action of attach_task(), |
244 | 238 | * which overwrites one tasks cpuset pointer with another. It does |
245 | - * so using both semaphores, however there are several performance | |
239 | + * so using both mutexes, however there are several performance | |
246 | 240 | * critical places that need to reference task->cpuset without the |
247 | - * expense of grabbing a system global semaphore. Therefore except as | |
241 | + * expense of grabbing a system global mutex. Therefore except as | |
248 | 242 | * noted below, when dereferencing or, as in attach_task(), modifying |
249 | 243 | * a tasks cpuset pointer we use task_lock(), which acts on a spinlock |
250 | 244 | * (task->alloc_lock) already in the task_struct routinely used for |
... | ... | @@ -256,8 +250,8 @@ |
256 | 250 | * the routine cpuset_update_task_memory_state(). |
257 | 251 | */ |
258 | 252 | |
259 | -static DECLARE_MUTEX(manage_sem); | |
260 | -static DECLARE_MUTEX(callback_sem); | |
253 | +static DEFINE_MUTEX(manage_mutex); | |
254 | +static DEFINE_MUTEX(callback_mutex); | |
261 | 255 | |
262 | 256 | /* |
263 | 257 | * A couple of forward declarations required, due to cyclic reference loop: |
... | ... | @@ -432,7 +426,7 @@ |
432 | 426 | } |
433 | 427 | |
434 | 428 | /* |
435 | - * Call with manage_sem held. Writes path of cpuset into buf. | |
429 | + * Call with manage_mutex held. Writes path of cpuset into buf. | |
436 | 430 | * Returns 0 on success, -errno on error. |
437 | 431 | */ |
438 | 432 | |
439 | 433 | |
... | ... | @@ -484,11 +478,11 @@ |
484 | 478 | * status of the /sbin/cpuset_release_agent task, so no sense holding |
485 | 479 | * our caller up for that. |
486 | 480 | * |
487 | - * When we had only one cpuset semaphore, we had to call this | |
481 | + * When we had only one cpuset mutex, we had to call this | |
488 | 482 | * without holding it, to avoid deadlock when call_usermodehelper() |
489 | 483 | * allocated memory. With two locks, we could now call this while |
490 | - * holding manage_sem, but we still don't, so as to minimize | |
491 | - * the time manage_sem is held. | |
484 | + * holding manage_mutex, but we still don't, so as to minimize | |
485 | + * the time manage_mutex is held. | |
492 | 486 | */ |
493 | 487 | |
494 | 488 | static void cpuset_release_agent(const char *pathbuf) |
495 | 489 | |
... | ... | @@ -520,15 +514,15 @@ |
520 | 514 | * cs is notify_on_release() and now both the user count is zero and |
521 | 515 | * the list of children is empty, prepare cpuset path in a kmalloc'd |
522 | 516 | * buffer, to be returned via ppathbuf, so that the caller can invoke |
523 | - * cpuset_release_agent() with it later on, once manage_sem is dropped. | |
524 | - * Call here with manage_sem held. | |
517 | + * cpuset_release_agent() with it later on, once manage_mutex is dropped. | |
518 | + * Call here with manage_mutex held. | |
525 | 519 | * |
526 | 520 | * This check_for_release() routine is responsible for kmalloc'ing |
527 | 521 | * pathbuf. The above cpuset_release_agent() is responsible for |
528 | 522 | * kfree'ing pathbuf. The caller of these routines is responsible |
529 | 523 | * for providing a pathbuf pointer, initialized to NULL, then |
530 | - * calling check_for_release() with manage_sem held and the address | |
531 | - * of the pathbuf pointer, then dropping manage_sem, then calling | |
524 | + * calling check_for_release() with manage_mutex held and the address | |
525 | + * of the pathbuf pointer, then dropping manage_mutex, then calling | |
532 | 526 | * cpuset_release_agent() with pathbuf, as set by check_for_release(). |
533 | 527 | */ |
534 | 528 | |
... | ... | @@ -559,7 +553,7 @@ |
559 | 553 | * One way or another, we guarantee to return some non-empty subset |
560 | 554 | * of cpu_online_map. |
561 | 555 | * |
562 | - * Call with callback_sem held. | |
556 | + * Call with callback_mutex held. | |
563 | 557 | */ |
564 | 558 | |
565 | 559 | static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) |
... | ... | @@ -583,7 +577,7 @@ |
583 | 577 | * One way or another, we guarantee to return some non-empty subset |
584 | 578 | * of node_online_map. |
585 | 579 | * |
586 | - * Call with callback_sem held. | |
580 | + * Call with callback_mutex held. | |
587 | 581 | */ |
588 | 582 | |
589 | 583 | static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) |
590 | 584 | |
... | ... | @@ -608,12 +602,12 @@ |
608 | 602 | * current->cpuset if a task has its memory placement changed. |
609 | 603 | * Do not call this routine if in_interrupt(). |
610 | 604 | * |
611 | - * Call without callback_sem or task_lock() held. May be called | |
612 | - * with or without manage_sem held. Doesn't need task_lock to guard | |
605 | + * Call without callback_mutex or task_lock() held. May be called | |
606 | + * with or without manage_mutex held. Doesn't need task_lock to guard | |
613 | 607 | * against another task changing a non-NULL cpuset pointer to NULL, |
614 | 608 | * as that is only done by a task on itself, and if the current task |
615 | 609 | * is here, it is not simultaneously in the exit code NULL'ing its |
616 | - * cpuset pointer. This routine also might acquire callback_sem and | |
610 | + * cpuset pointer. This routine also might acquire callback_mutex and | |
617 | 611 | * current->mm->mmap_sem during call. |
618 | 612 | * |
619 | 613 | * Reading current->cpuset->mems_generation doesn't need task_lock |
620 | 614 | |
... | ... | @@ -658,13 +652,13 @@ |
658 | 652 | } |
659 | 653 | |
660 | 654 | if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { |
661 | - down(&callback_sem); | |
655 | + mutex_lock(&callback_mutex); | |
662 | 656 | task_lock(tsk); |
663 | 657 | cs = tsk->cpuset; /* Maybe changed when task not locked */ |
664 | 658 | guarantee_online_mems(cs, &tsk->mems_allowed); |
665 | 659 | tsk->cpuset_mems_generation = cs->mems_generation; |
666 | 660 | task_unlock(tsk); |
667 | - up(&callback_sem); | |
661 | + mutex_unlock(&callback_mutex); | |
668 | 662 | mpol_rebind_task(tsk, &tsk->mems_allowed); |
669 | 663 | } |
670 | 664 | } |
... | ... | @@ -674,7 +668,7 @@ |
674 | 668 | * |
675 | 669 | * One cpuset is a subset of another if all its allowed CPUs and |
676 | 670 | * Memory Nodes are a subset of the other, and its exclusive flags |
677 | - * are only set if the other's are set. Call holding manage_sem. | |
671 | + * are only set if the other's are set. Call holding manage_mutex. | |
678 | 672 | */ |
679 | 673 | |
680 | 674 | static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) |
... | ... | @@ -692,7 +686,7 @@ |
692 | 686 | * If we replaced the flag and mask values of the current cpuset |
693 | 687 | * (cur) with those values in the trial cpuset (trial), would |
694 | 688 | * our various subset and exclusive rules still be valid? Presumes |
695 | - * manage_sem held. | |
689 | + * manage_mutex held. | |
696 | 690 | * |
697 | 691 | * 'cur' is the address of an actual, in-use cpuset. Operations |
698 | 692 | * such as list traversal that depend on the actual address of the |
... | ... | @@ -746,7 +740,7 @@ |
746 | 740 | * exclusive child cpusets |
747 | 741 | * Build these two partitions by calling partition_sched_domains |
748 | 742 | * |
749 | - * Call with manage_sem held. May nest a call to the | |
743 | + * Call with manage_mutex held. May nest a call to the | |
750 | 744 | * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. |
751 | 745 | */ |
752 | 746 | |
... | ... | @@ -792,7 +786,7 @@ |
792 | 786 | } |
793 | 787 | |
794 | 788 | /* |
795 | - * Call with manage_sem held. May take callback_sem during call. | |
789 | + * Call with manage_mutex held. May take callback_mutex during call. | |
796 | 790 | */ |
797 | 791 | |
798 | 792 | static int update_cpumask(struct cpuset *cs, char *buf) |
799 | 793 | |
... | ... | @@ -811,9 +805,9 @@ |
811 | 805 | if (retval < 0) |
812 | 806 | return retval; |
813 | 807 | cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed); |
814 | - down(&callback_sem); | |
808 | + mutex_lock(&callback_mutex); | |
815 | 809 | cs->cpus_allowed = trialcs.cpus_allowed; |
816 | - up(&callback_sem); | |
810 | + mutex_unlock(&callback_mutex); | |
817 | 811 | if (is_cpu_exclusive(cs) && !cpus_unchanged) |
818 | 812 | update_cpu_domains(cs); |
819 | 813 | return 0; |
... | ... | @@ -827,7 +821,7 @@ |
827 | 821 | * the cpuset is marked 'memory_migrate', migrate the tasks |
828 | 822 | * pages to the new memory. |
829 | 823 | * |
830 | - * Call with manage_sem held. May take callback_sem during call. | |
824 | + * Call with manage_mutex held. May take callback_mutex during call. | |
831 | 825 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, |
832 | 826 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind |
833 | 827 | * their mempolicies to the cpusets new mems_allowed. |
834 | 828 | |
... | ... | @@ -862,11 +856,11 @@ |
862 | 856 | if (retval < 0) |
863 | 857 | goto done; |
864 | 858 | |
865 | - down(&callback_sem); | |
859 | + mutex_lock(&callback_mutex); | |
866 | 860 | cs->mems_allowed = trialcs.mems_allowed; |
867 | 861 | atomic_inc(&cpuset_mems_generation); |
868 | 862 | cs->mems_generation = atomic_read(&cpuset_mems_generation); |
869 | - up(&callback_sem); | |
863 | + mutex_unlock(&callback_mutex); | |
870 | 864 | |
871 | 865 | set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */ |
872 | 866 | |
... | ... | @@ -922,7 +916,7 @@ |
922 | 916 | * tasklist_lock. Forks can happen again now - the mpol_copy() |
923 | 917 | * cpuset_being_rebound check will catch such forks, and rebind |
924 | 918 | * their vma mempolicies too. Because we still hold the global |
925 | - * cpuset manage_sem, we know that no other rebind effort will | |
919 | + * cpuset manage_mutex, we know that no other rebind effort will | |
926 | 920 | * be contending for the global variable cpuset_being_rebound. |
927 | 921 | * It's ok if we rebind the same mm twice; mpol_rebind_mm() |
928 | 922 | * is idempotent. Also migrate pages in each mm to new nodes. |
... | ... | @@ -948,7 +942,7 @@ |
948 | 942 | } |
949 | 943 | |
950 | 944 | /* |
951 | - * Call with manage_sem held. | |
945 | + * Call with manage_mutex held. | |
952 | 946 | */ |
953 | 947 | |
954 | 948 | static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) |
... | ... | @@ -967,7 +961,7 @@ |
967 | 961 | * cs: the cpuset to update |
968 | 962 | * buf: the buffer where we read the 0 or 1 |
969 | 963 | * |
970 | - * Call with manage_sem held. | |
964 | + * Call with manage_mutex held. | |
971 | 965 | */ |
972 | 966 | |
973 | 967 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) |
974 | 968 | |
... | ... | @@ -989,12 +983,12 @@ |
989 | 983 | return err; |
990 | 984 | cpu_exclusive_changed = |
991 | 985 | (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); |
992 | - down(&callback_sem); | |
986 | + mutex_lock(&callback_mutex); | |
993 | 987 | if (turning_on) |
994 | 988 | set_bit(bit, &cs->flags); |
995 | 989 | else |
996 | 990 | clear_bit(bit, &cs->flags); |
997 | - up(&callback_sem); | |
991 | + mutex_unlock(&callback_mutex); | |
998 | 992 | |
999 | 993 | if (cpu_exclusive_changed) |
1000 | 994 | update_cpu_domains(cs); |
... | ... | @@ -1104,7 +1098,7 @@ |
1104 | 1098 | * writing the path of the old cpuset in 'ppathbuf' if it needs to be |
1105 | 1099 | * notified on release. |
1106 | 1100 | * |
1107 | - * Call holding manage_sem. May take callback_sem and task_lock of | |
1101 | + * Call holding manage_mutex. May take callback_mutex and task_lock of | |
1108 | 1102 | * the task 'pid' during call. |
1109 | 1103 | */ |
1110 | 1104 | |
1111 | 1105 | |
... | ... | @@ -1144,13 +1138,13 @@ |
1144 | 1138 | get_task_struct(tsk); |
1145 | 1139 | } |
1146 | 1140 | |
1147 | - down(&callback_sem); | |
1141 | + mutex_lock(&callback_mutex); | |
1148 | 1142 | |
1149 | 1143 | task_lock(tsk); |
1150 | 1144 | oldcs = tsk->cpuset; |
1151 | 1145 | if (!oldcs) { |
1152 | 1146 | task_unlock(tsk); |
1153 | - up(&callback_sem); | |
1147 | + mutex_unlock(&callback_mutex); | |
1154 | 1148 | put_task_struct(tsk); |
1155 | 1149 | return -ESRCH; |
1156 | 1150 | } |
... | ... | @@ -1164,7 +1158,7 @@ |
1164 | 1158 | from = oldcs->mems_allowed; |
1165 | 1159 | to = cs->mems_allowed; |
1166 | 1160 | |
1167 | - up(&callback_sem); | |
1161 | + mutex_unlock(&callback_mutex); | |
1168 | 1162 | |
1169 | 1163 | mm = get_task_mm(tsk); |
1170 | 1164 | if (mm) { |
... | ... | @@ -1221,7 +1215,7 @@ |
1221 | 1215 | } |
1222 | 1216 | buffer[nbytes] = 0; /* nul-terminate */ |
1223 | 1217 | |
1224 | - down(&manage_sem); | |
1218 | + mutex_lock(&manage_mutex); | |
1225 | 1219 | |
1226 | 1220 | if (is_removed(cs)) { |
1227 | 1221 | retval = -ENODEV; |
... | ... | @@ -1264,7 +1258,7 @@ |
1264 | 1258 | if (retval == 0) |
1265 | 1259 | retval = nbytes; |
1266 | 1260 | out2: |
1267 | - up(&manage_sem); | |
1261 | + mutex_unlock(&manage_mutex); | |
1268 | 1262 | cpuset_release_agent(pathbuf); |
1269 | 1263 | out1: |
1270 | 1264 | kfree(buffer); |
1271 | 1265 | |
... | ... | @@ -1304,9 +1298,9 @@ |
1304 | 1298 | { |
1305 | 1299 | cpumask_t mask; |
1306 | 1300 | |
1307 | - down(&callback_sem); | |
1301 | + mutex_lock(&callback_mutex); | |
1308 | 1302 | mask = cs->cpus_allowed; |
1309 | - up(&callback_sem); | |
1303 | + mutex_unlock(&callback_mutex); | |
1310 | 1304 | |
1311 | 1305 | return cpulist_scnprintf(page, PAGE_SIZE, mask); |
1312 | 1306 | } |
1313 | 1307 | |
... | ... | @@ -1315,9 +1309,9 @@ |
1315 | 1309 | { |
1316 | 1310 | nodemask_t mask; |
1317 | 1311 | |
1318 | - down(&callback_sem); | |
1312 | + mutex_lock(&callback_mutex); | |
1319 | 1313 | mask = cs->mems_allowed; |
1320 | - up(&callback_sem); | |
1314 | + mutex_unlock(&callback_mutex); | |
1321 | 1315 | |
1322 | 1316 | return nodelist_scnprintf(page, PAGE_SIZE, mask); |
1323 | 1317 | } |
... | ... | @@ -1598,7 +1592,7 @@ |
1598 | 1592 | * Handle an open on 'tasks' file. Prepare a buffer listing the |
1599 | 1593 | * process id's of tasks currently attached to the cpuset being opened. |
1600 | 1594 | * |
1601 | - * Does not require any specific cpuset semaphores, and does not take any. | |
1595 | + * Does not require any specific cpuset mutexes, and does not take any. | |
1602 | 1596 | */ |
1603 | 1597 | static int cpuset_tasks_open(struct inode *unused, struct file *file) |
1604 | 1598 | { |
... | ... | @@ -1754,7 +1748,7 @@ |
1754 | 1748 | * name: name of the new cpuset. Will be strcpy'ed. |
1755 | 1749 | * mode: mode to set on new inode |
1756 | 1750 | * |
1757 | - * Must be called with the semaphore on the parent inode held | |
1751 | + * Must be called with the mutex on the parent inode held | |
1758 | 1752 | */ |
1759 | 1753 | |
1760 | 1754 | static long cpuset_create(struct cpuset *parent, const char *name, int mode) |
... | ... | @@ -1766,7 +1760,7 @@ |
1766 | 1760 | if (!cs) |
1767 | 1761 | return -ENOMEM; |
1768 | 1762 | |
1769 | - down(&manage_sem); | |
1763 | + mutex_lock(&manage_mutex); | |
1770 | 1764 | cpuset_update_task_memory_state(); |
1771 | 1765 | cs->flags = 0; |
1772 | 1766 | if (notify_on_release(parent)) |
1773 | 1767 | |
1774 | 1768 | |
1775 | 1769 | |
1776 | 1770 | |
... | ... | @@ -1782,28 +1776,28 @@ |
1782 | 1776 | |
1783 | 1777 | cs->parent = parent; |
1784 | 1778 | |
1785 | - down(&callback_sem); | |
1779 | + mutex_lock(&callback_mutex); | |
1786 | 1780 | list_add(&cs->sibling, &cs->parent->children); |
1787 | 1781 | number_of_cpusets++; |
1788 | - up(&callback_sem); | |
1782 | + mutex_unlock(&callback_mutex); | |
1789 | 1783 | |
1790 | 1784 | err = cpuset_create_dir(cs, name, mode); |
1791 | 1785 | if (err < 0) |
1792 | 1786 | goto err; |
1793 | 1787 | |
1794 | 1788 | /* |
1795 | - * Release manage_sem before cpuset_populate_dir() because it | |
1789 | + * Release manage_mutex before cpuset_populate_dir() because it | |
1796 | 1790 | * will down() this new directory's i_mutex and if we race with |
1797 | 1791 | * another mkdir, we might deadlock. |
1798 | 1792 | */ |
1799 | - up(&manage_sem); | |
1793 | + mutex_unlock(&manage_mutex); | |
1800 | 1794 | |
1801 | 1795 | err = cpuset_populate_dir(cs->dentry); |
1802 | 1796 | /* If err < 0, we have a half-filled directory - oh well ;) */ |
1803 | 1797 | return 0; |
1804 | 1798 | err: |
1805 | 1799 | list_del(&cs->sibling); |
1806 | - up(&manage_sem); | |
1800 | + mutex_unlock(&manage_mutex); | |
1807 | 1801 | kfree(cs); |
1808 | 1802 | return err; |
1809 | 1803 | } |
1810 | 1804 | |
1811 | 1805 | |
1812 | 1806 | |
... | ... | @@ -1825,18 +1819,18 @@ |
1825 | 1819 | |
1826 | 1820 | /* the vfs holds both inode->i_mutex already */ |
1827 | 1821 | |
1828 | - down(&manage_sem); | |
1822 | + mutex_lock(&manage_mutex); | |
1829 | 1823 | cpuset_update_task_memory_state(); |
1830 | 1824 | if (atomic_read(&cs->count) > 0) { |
1831 | - up(&manage_sem); | |
1825 | + mutex_unlock(&manage_mutex); | |
1832 | 1826 | return -EBUSY; |
1833 | 1827 | } |
1834 | 1828 | if (!list_empty(&cs->children)) { |
1835 | - up(&manage_sem); | |
1829 | + mutex_unlock(&manage_mutex); | |
1836 | 1830 | return -EBUSY; |
1837 | 1831 | } |
1838 | 1832 | parent = cs->parent; |
1839 | - down(&callback_sem); | |
1833 | + mutex_lock(&callback_mutex); | |
1840 | 1834 | set_bit(CS_REMOVED, &cs->flags); |
1841 | 1835 | if (is_cpu_exclusive(cs)) |
1842 | 1836 | update_cpu_domains(cs); |
1843 | 1837 | |
... | ... | @@ -1848,10 +1842,10 @@ |
1848 | 1842 | cpuset_d_remove_dir(d); |
1849 | 1843 | dput(d); |
1850 | 1844 | number_of_cpusets--; |
1851 | - up(&callback_sem); | |
1845 | + mutex_unlock(&callback_mutex); | |
1852 | 1846 | if (list_empty(&parent->children)) |
1853 | 1847 | check_for_release(parent, &pathbuf); |
1854 | - up(&manage_sem); | |
1848 | + mutex_unlock(&manage_mutex); | |
1855 | 1849 | cpuset_release_agent(pathbuf); |
1856 | 1850 | return 0; |
1857 | 1851 | } |
1858 | 1852 | |
1859 | 1853 | |
... | ... | @@ -1960,19 +1954,19 @@ |
1960 | 1954 | * Description: Detach cpuset from @tsk and release it. |
1961 | 1955 | * |
1962 | 1956 | * Note that cpusets marked notify_on_release force every task in |
1963 | - * them to take the global manage_sem semaphore when exiting. | |
1957 | + * them to take the global manage_mutex mutex when exiting. | |
1964 | 1958 | * This could impact scaling on very large systems. Be reluctant to |
1965 | 1959 | * use notify_on_release cpusets where very high task exit scaling |
1966 | 1960 | * is required on large systems. |
1967 | 1961 | * |
1968 | 1962 | * Don't even think about derefencing 'cs' after the cpuset use count |
1969 | - * goes to zero, except inside a critical section guarded by manage_sem | |
1970 | - * or callback_sem. Otherwise a zero cpuset use count is a license to | |
1963 | + * goes to zero, except inside a critical section guarded by manage_mutex | |
1964 | + * or callback_mutex. Otherwise a zero cpuset use count is a license to | |
1971 | 1965 | * any other task to nuke the cpuset immediately, via cpuset_rmdir(). |
1972 | 1966 | * |
1973 | - * This routine has to take manage_sem, not callback_sem, because | |
1974 | - * it is holding that semaphore while calling check_for_release(), | |
1975 | - * which calls kmalloc(), so can't be called holding callback__sem(). | |
1967 | + * This routine has to take manage_mutex, not callback_mutex, because | |
1968 | + * it is holding that mutex while calling check_for_release(), | |
1969 | + * which calls kmalloc(), so can't be called holding callback_mutex(). | |
1976 | 1970 | * |
1977 | 1971 | * We don't need to task_lock() this reference to tsk->cpuset, |
1978 | 1972 | * because tsk is already marked PF_EXITING, so attach_task() won't |
1979 | 1973 | |
... | ... | @@ -2022,10 +2016,10 @@ |
2022 | 2016 | if (notify_on_release(cs)) { |
2023 | 2017 | char *pathbuf = NULL; |
2024 | 2018 | |
2025 | - down(&manage_sem); | |
2019 | + mutex_lock(&manage_mutex); | |
2026 | 2020 | if (atomic_dec_and_test(&cs->count)) |
2027 | 2021 | check_for_release(cs, &pathbuf); |
2028 | - up(&manage_sem); | |
2022 | + mutex_unlock(&manage_mutex); | |
2029 | 2023 | cpuset_release_agent(pathbuf); |
2030 | 2024 | } else { |
2031 | 2025 | atomic_dec(&cs->count); |
2032 | 2026 | |
... | ... | @@ -2046,11 +2040,11 @@ |
2046 | 2040 | { |
2047 | 2041 | cpumask_t mask; |
2048 | 2042 | |
2049 | - down(&callback_sem); | |
2043 | + mutex_lock(&callback_mutex); | |
2050 | 2044 | task_lock(tsk); |
2051 | 2045 | guarantee_online_cpus(tsk->cpuset, &mask); |
2052 | 2046 | task_unlock(tsk); |
2053 | - up(&callback_sem); | |
2047 | + mutex_unlock(&callback_mutex); | |
2054 | 2048 | |
2055 | 2049 | return mask; |
2056 | 2050 | } |
2057 | 2051 | |
... | ... | @@ -2074,11 +2068,11 @@ |
2074 | 2068 | { |
2075 | 2069 | nodemask_t mask; |
2076 | 2070 | |
2077 | - down(&callback_sem); | |
2071 | + mutex_lock(&callback_mutex); | |
2078 | 2072 | task_lock(tsk); |
2079 | 2073 | guarantee_online_mems(tsk->cpuset, &mask); |
2080 | 2074 | task_unlock(tsk); |
2081 | - up(&callback_sem); | |
2075 | + mutex_unlock(&callback_mutex); | |
2082 | 2076 | |
2083 | 2077 | return mask; |
2084 | 2078 | } |
... | ... | @@ -2104,7 +2098,7 @@ |
2104 | 2098 | |
2105 | 2099 | /* |
2106 | 2100 | * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive |
2107 | - * ancestor to the specified cpuset. Call holding callback_sem. | |
2101 | + * ancestor to the specified cpuset. Call holding callback_mutex. | |
2108 | 2102 | * If no ancestor is mem_exclusive (an unusual configuration), then |
2109 | 2103 | * returns the root cpuset. |
2110 | 2104 | */ |
2111 | 2105 | |
... | ... | @@ -2131,12 +2125,12 @@ |
2131 | 2125 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2132 | 2126 | * nearest mem_exclusive ancestor cpuset. |
2133 | 2127 | * |
2134 | - * Scanning up parent cpusets requires callback_sem. The __alloc_pages() | |
2128 | + * Scanning up parent cpusets requires callback_mutex. The __alloc_pages() | |
2135 | 2129 | * routine only calls here with __GFP_HARDWALL bit _not_ set if |
2136 | 2130 | * it's a GFP_KERNEL allocation, and all nodes in the current tasks |
2137 | 2131 | * mems_allowed came up empty on the first pass over the zonelist. |
2138 | 2132 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are |
2139 | - * short of memory, might require taking the callback_sem semaphore. | |
2133 | + * short of memory, might require taking the callback_mutex mutex. | |
2140 | 2134 | * |
2141 | 2135 | * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() |
2142 | 2136 | * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing |
2143 | 2137 | |
2144 | 2138 | |
2145 | 2139 | |
2146 | 2140 | |
2147 | 2141 | |
... | ... | @@ -2171,31 +2165,31 @@ |
2171 | 2165 | return 1; |
2172 | 2166 | |
2173 | 2167 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ |
2174 | - down(&callback_sem); | |
2168 | + mutex_lock(&callback_mutex); | |
2175 | 2169 | |
2176 | 2170 | task_lock(current); |
2177 | 2171 | cs = nearest_exclusive_ancestor(current->cpuset); |
2178 | 2172 | task_unlock(current); |
2179 | 2173 | |
2180 | 2174 | allowed = node_isset(node, cs->mems_allowed); |
2181 | - up(&callback_sem); | |
2175 | + mutex_unlock(&callback_mutex); | |
2182 | 2176 | return allowed; |
2183 | 2177 | } |
2184 | 2178 | |
2185 | 2179 | /** |
2186 | 2180 | * cpuset_lock - lock out any changes to cpuset structures |
2187 | 2181 | * |
2188 | - * The out of memory (oom) code needs to lock down cpusets | |
2182 | + * The out of memory (oom) code needs to mutex_lock cpusets | |
2189 | 2183 | * from being changed while it scans the tasklist looking for a |
2190 | - * task in an overlapping cpuset. Expose callback_sem via this | |
2184 | + * task in an overlapping cpuset. Expose callback_mutex via this | |
2191 | 2185 | * cpuset_lock() routine, so the oom code can lock it, before |
2192 | 2186 | * locking the task list. The tasklist_lock is a spinlock, so |
2193 | - * must be taken inside callback_sem. | |
2187 | + * must be taken inside callback_mutex. | |
2194 | 2188 | */ |
2195 | 2189 | |
2196 | 2190 | void cpuset_lock(void) |
2197 | 2191 | { |
2198 | - down(&callback_sem); | |
2192 | + mutex_lock(&callback_mutex); | |
2199 | 2193 | } |
2200 | 2194 | |
2201 | 2195 | /** |
... | ... | @@ -2206,7 +2200,7 @@ |
2206 | 2200 | |
2207 | 2201 | void cpuset_unlock(void) |
2208 | 2202 | { |
2209 | - up(&callback_sem); | |
2203 | + mutex_unlock(&callback_mutex); | |
2210 | 2204 | } |
2211 | 2205 | |
2212 | 2206 | /** |
... | ... | @@ -2218,7 +2212,7 @@ |
2218 | 2212 | * determine if task @p's memory usage might impact the memory |
2219 | 2213 | * available to the current task. |
2220 | 2214 | * |
2221 | - * Call while holding callback_sem. | |
2215 | + * Call while holding callback_mutex. | |
2222 | 2216 | **/ |
2223 | 2217 | |
2224 | 2218 | int cpuset_excl_nodes_overlap(const struct task_struct *p) |
... | ... | @@ -2289,7 +2283,7 @@ |
2289 | 2283 | * - Used for /proc/<pid>/cpuset. |
2290 | 2284 | * - No need to task_lock(tsk) on this tsk->cpuset reference, as it |
2291 | 2285 | * doesn't really matter if tsk->cpuset changes after we read it, |
2292 | - * and we take manage_sem, keeping attach_task() from changing it | |
2286 | + * and we take manage_mutex, keeping attach_task() from changing it | |
2293 | 2287 | * anyway. |
2294 | 2288 | */ |
2295 | 2289 | |
... | ... | @@ -2305,7 +2299,7 @@ |
2305 | 2299 | return -ENOMEM; |
2306 | 2300 | |
2307 | 2301 | tsk = m->private; |
2308 | - down(&manage_sem); | |
2302 | + mutex_lock(&manage_mutex); | |
2309 | 2303 | cs = tsk->cpuset; |
2310 | 2304 | if (!cs) { |
2311 | 2305 | retval = -EINVAL; |
... | ... | @@ -2318,7 +2312,7 @@ |
2318 | 2312 | seq_puts(m, buf); |
2319 | 2313 | seq_putc(m, '\n'); |
2320 | 2314 | out: |
2321 | - up(&manage_sem); | |
2315 | + mutex_unlock(&manage_mutex); | |
2322 | 2316 | kfree(buf); |
2323 | 2317 | return retval; |
2324 | 2318 | } |