Commit 4301065920b0cbde3986519582347e883b166f3e

Authored by Peter Williams
Committed by Ingo Molnar
1 parent f1a438d813

sched: simplify move_tasks()

The move_tasks() function is currently multiplexed with two distinct
capabilities:

1. attempt to move a specified amount of weighted load from one run
queue to another; and
2. attempt to move a specified number of tasks from one run queue to
another.

The first of these capabilities is used in two places, load_balance()
and load_balance_idle(), and in both of these cases the return value of
move_tasks() is used purely to decide if tasks/load were moved and no
notice of the actual number of tasks moved is taken.

The second capability is used in exactly one place,
active_load_balance(), to attempt to move exactly one task and, as
before, the return value is only used as an indicator of success or failure.

This multiplexing of sched_task() was introduced, by me, as part of the
smpnice patches and was motivated by the fact that the alternative, one
function to move specified load and one to move a single task, would
have led to two functions of roughly the same complexity as the old
move_tasks() (or the new balance_tasks()).  However, the new modular
design of the new CFS scheduler allows a simpler solution to be adopted
and this patch addresses that solution by:

1. adding a new function, move_one_task(), to be used by
active_load_balance(); and
2. making move_tasks() a single purpose function that tries to move a
specified weighted load and returns 1 for success and 0 for failure.

One of the consequences of these changes is that neither move_one_task()
or the new move_tasks() care how many tasks sched_class.load_balance()
moves and this enables its interface to be simplified by returning the
amount of load moved as its result and removing the load_moved pointer
from the argument list.  This helps simplify the new move_tasks() and
slightly reduces the amount of work done in each of
sched_class.load_balance()'s implementations.

Further simplification, e.g. changes to balance_tasks(), are possible
but (slightly) complicated by the special needs of load_balance_fair()
so I've left them to a later patch (if this one gets accepted).

NB Since move_tasks() gets called with two run queue locks held even
small reductions in overhead are worthwhile.

[ mingo@elte.hu ]

this change also reduces code size nicely:

   text    data     bss     dec     hex filename
   39216    3618      24   42858    a76a sched.o.before
   39173    3618      24   42815    a73f sched.o.after

Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 5 changed files with 58 additions and 49 deletions Side-by-side Diff

include/linux/sched.h
... ... @@ -866,11 +866,11 @@
866 866 struct task_struct * (*pick_next_task) (struct rq *rq, u64 now);
867 867 void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now);
868 868  
869   - int (*load_balance) (struct rq *this_rq, int this_cpu,
  869 + unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
870 870 struct rq *busiest,
871 871 unsigned long max_nr_move, unsigned long max_load_move,
872 872 struct sched_domain *sd, enum cpu_idle_type idle,
873   - int *all_pinned, unsigned long *total_load_moved);
  873 + int *all_pinned);
874 874  
875 875 void (*set_curr_task) (struct rq *rq);
876 876 void (*task_tick) (struct rq *rq, struct task_struct *p);
... ... @@ -2231,35 +2231,52 @@
2231 2231 }
2232 2232  
2233 2233 /*
2234   - * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
2235   - * load from busiest to this_rq, as part of a balancing operation within
2236   - * "domain". Returns the number of tasks moved.
  2234 + * move_tasks tries to move up to max_load_move weighted load from busiest to
  2235 + * this_rq, as part of a balancing operation within domain "sd".
  2236 + * Returns 1 if successful and 0 otherwise.
2237 2237 *
2238 2238 * Called with both runqueues locked.
2239 2239 */
2240 2240 static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2241   - unsigned long max_nr_move, unsigned long max_load_move,
  2241 + unsigned long max_load_move,
2242 2242 struct sched_domain *sd, enum cpu_idle_type idle,
2243 2243 int *all_pinned)
2244 2244 {
2245 2245 struct sched_class *class = sched_class_highest;
2246   - unsigned long load_moved, total_nr_moved = 0, nr_moved;
2247   - long rem_load_move = max_load_move;
  2246 + unsigned long total_load_moved = 0;
2248 2247  
2249 2248 do {
2250   - nr_moved = class->load_balance(this_rq, this_cpu, busiest,
2251   - max_nr_move, (unsigned long)rem_load_move,
2252   - sd, idle, all_pinned, &load_moved);
2253   - total_nr_moved += nr_moved;
2254   - max_nr_move -= nr_moved;
2255   - rem_load_move -= load_moved;
  2249 + total_load_moved +=
  2250 + class->load_balance(this_rq, this_cpu, busiest,
  2251 + ULONG_MAX, max_load_move - total_load_moved,
  2252 + sd, idle, all_pinned);
2256 2253 class = class->next;
2257   - } while (class && max_nr_move && rem_load_move > 0);
  2254 + } while (class && max_load_move > total_load_moved);
2258 2255  
2259   - return total_nr_moved;
  2256 + return total_load_moved > 0;
2260 2257 }
2261 2258  
2262 2259 /*
  2260 + * move_one_task tries to move exactly one task from busiest to this_rq, as
  2261 + * part of active balancing operations within "domain".
  2262 + * Returns 1 if successful and 0 otherwise.
  2263 + *
  2264 + * Called with both runqueues locked.
  2265 + */
  2266 +static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
  2267 + struct sched_domain *sd, enum cpu_idle_type idle)
  2268 +{
  2269 + struct sched_class *class;
  2270 +
  2271 + for (class = sched_class_highest; class; class = class->next)
  2272 + if (class->load_balance(this_rq, this_cpu, busiest,
  2273 + 1, ULONG_MAX, sd, idle, NULL))
  2274 + return 1;
  2275 +
  2276 + return 0;
  2277 +}
  2278 +
  2279 +/*
2263 2280 * find_busiest_group finds and returns the busiest CPU group within the
2264 2281 * domain. It calculates and returns the amount of weighted load which
2265 2282 * should be moved to restore balance via the imbalance parameter.
... ... @@ -2588,11 +2605,6 @@
2588 2605 */
2589 2606 #define MAX_PINNED_INTERVAL 512
2590 2607  
2591   -static inline unsigned long minus_1_or_zero(unsigned long n)
2592   -{
2593   - return n > 0 ? n - 1 : 0;
2594   -}
2595   -
2596 2608 /*
2597 2609 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2598 2610 * tasks if there is an imbalance.
... ... @@ -2601,7 +2613,7 @@
2601 2613 struct sched_domain *sd, enum cpu_idle_type idle,
2602 2614 int *balance)
2603 2615 {
2604   - int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
  2616 + int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
2605 2617 struct sched_group *group;
2606 2618 unsigned long imbalance;
2607 2619 struct rq *busiest;
2608 2620  
2609 2621  
... ... @@ -2642,18 +2654,17 @@
2642 2654  
2643 2655 schedstat_add(sd, lb_imbalance[idle], imbalance);
2644 2656  
2645   - nr_moved = 0;
  2657 + ld_moved = 0;
2646 2658 if (busiest->nr_running > 1) {
2647 2659 /*
2648 2660 * Attempt to move tasks. If find_busiest_group has found
2649 2661 * an imbalance but busiest->nr_running <= 1, the group is
2650   - * still unbalanced. nr_moved simply stays zero, so it is
  2662 + * still unbalanced. ld_moved simply stays zero, so it is
2651 2663 * correctly treated as an imbalance.
2652 2664 */
2653 2665 local_irq_save(flags);
2654 2666 double_rq_lock(this_rq, busiest);
2655   - nr_moved = move_tasks(this_rq, this_cpu, busiest,
2656   - minus_1_or_zero(busiest->nr_running),
  2667 + ld_moved = move_tasks(this_rq, this_cpu, busiest,
2657 2668 imbalance, sd, idle, &all_pinned);
2658 2669 double_rq_unlock(this_rq, busiest);
2659 2670 local_irq_restore(flags);
... ... @@ -2661,7 +2672,7 @@
2661 2672 /*
2662 2673 * some other cpu did the load balance for us.
2663 2674 */
2664   - if (nr_moved && this_cpu != smp_processor_id())
  2675 + if (ld_moved && this_cpu != smp_processor_id())
2665 2676 resched_cpu(this_cpu);
2666 2677  
2667 2678 /* All tasks on this runqueue were pinned by CPU affinity */
... ... @@ -2673,7 +2684,7 @@
2673 2684 }
2674 2685 }
2675 2686  
2676   - if (!nr_moved) {
  2687 + if (!ld_moved) {
2677 2688 schedstat_inc(sd, lb_failed[idle]);
2678 2689 sd->nr_balance_failed++;
2679 2690  
2680 2691  
... ... @@ -2722,10 +2733,10 @@
2722 2733 sd->balance_interval *= 2;
2723 2734 }
2724 2735  
2725   - if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
  2736 + if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2726 2737 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2727 2738 return -1;
2728   - return nr_moved;
  2739 + return ld_moved;
2729 2740  
2730 2741 out_balanced:
2731 2742 schedstat_inc(sd, lb_balanced[idle]);
... ... @@ -2757,7 +2768,7 @@
2757 2768 struct sched_group *group;
2758 2769 struct rq *busiest = NULL;
2759 2770 unsigned long imbalance;
2760   - int nr_moved = 0;
  2771 + int ld_moved = 0;
2761 2772 int sd_idle = 0;
2762 2773 int all_pinned = 0;
2763 2774 cpumask_t cpus = CPU_MASK_ALL;
2764 2775  
... ... @@ -2792,12 +2803,11 @@
2792 2803  
2793 2804 schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance);
2794 2805  
2795   - nr_moved = 0;
  2806 + ld_moved = 0;
2796 2807 if (busiest->nr_running > 1) {
2797 2808 /* Attempt to move tasks */
2798 2809 double_lock_balance(this_rq, busiest);
2799   - nr_moved = move_tasks(this_rq, this_cpu, busiest,
2800   - minus_1_or_zero(busiest->nr_running),
  2810 + ld_moved = move_tasks(this_rq, this_cpu, busiest,
2801 2811 imbalance, sd, CPU_NEWLY_IDLE,
2802 2812 &all_pinned);
2803 2813 spin_unlock(&busiest->lock);
... ... @@ -2809,7 +2819,7 @@
2809 2819 }
2810 2820 }
2811 2821  
2812   - if (!nr_moved) {
  2822 + if (!ld_moved) {
2813 2823 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
2814 2824 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2815 2825 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
... ... @@ -2817,7 +2827,7 @@
2817 2827 } else
2818 2828 sd->nr_balance_failed = 0;
2819 2829  
2820   - return nr_moved;
  2830 + return ld_moved;
2821 2831  
2822 2832 out_balanced:
2823 2833 schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]);
... ... @@ -2905,8 +2915,8 @@
2905 2915 if (likely(sd)) {
2906 2916 schedstat_inc(sd, alb_cnt);
2907 2917  
2908   - if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
2909   - ULONG_MAX, sd, CPU_IDLE, NULL))
  2918 + if (move_one_task(target_rq, target_cpu, busiest_rq,
  2919 + sd, CPU_IDLE))
2910 2920 schedstat_inc(sd, alb_pushed);
2911 2921 else
2912 2922 schedstat_inc(sd, alb_failed);
... ... @@ -944,11 +944,11 @@
944 944 return p->prio;
945 945 }
946 946  
947   -static int
  947 +static unsigned long
948 948 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
949 949 unsigned long max_nr_move, unsigned long max_load_move,
950 950 struct sched_domain *sd, enum cpu_idle_type idle,
951   - int *all_pinned, unsigned long *total_load_moved)
  951 + int *all_pinned)
952 952 {
953 953 struct cfs_rq *busy_cfs_rq;
954 954 unsigned long load_moved, total_nr_moved = 0, nr_moved;
... ... @@ -1006,9 +1006,7 @@
1006 1006 break;
1007 1007 }
1008 1008  
1009   - *total_load_moved = max_load_move - rem_load_move;
1010   -
1011   - return total_nr_moved;
  1009 + return max_load_move - rem_load_move;
1012 1010 }
1013 1011  
1014 1012 /*
kernel/sched_idletask.c
... ... @@ -37,11 +37,11 @@
37 37 {
38 38 }
39 39  
40   -static int
  40 +static unsigned long
41 41 load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
42 42 unsigned long max_nr_move, unsigned long max_load_move,
43 43 struct sched_domain *sd, enum cpu_idle_type idle,
44   - int *all_pinned, unsigned long *total_load_moved)
  44 + int *all_pinned)
45 45 {
46 46 return 0;
47 47 }
... ... @@ -172,15 +172,16 @@
172 172 return p;
173 173 }
174 174  
175   -static int
  175 +static unsigned long
176 176 load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
177 177 unsigned long max_nr_move, unsigned long max_load_move,
178 178 struct sched_domain *sd, enum cpu_idle_type idle,
179   - int *all_pinned, unsigned long *load_moved)
  179 + int *all_pinned)
180 180 {
181 181 int this_best_prio, best_prio, best_prio_seen = 0;
182 182 int nr_moved;
183 183 struct rq_iterator rt_rq_iterator;
  184 + unsigned long load_moved;
184 185  
185 186 best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
186 187 this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
187 188  
... ... @@ -203,11 +204,11 @@
203 204 rt_rq_iterator.arg = busiest;
204 205  
205 206 nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
206   - max_load_move, sd, idle, all_pinned, load_moved,
  207 + max_load_move, sd, idle, all_pinned, &load_moved,
207 208 this_best_prio, best_prio, best_prio_seen,
208 209 &rt_rq_iterator);
209 210  
210   - return nr_moved;
  211 + return load_moved;
211 212 }
212 213  
213 214 static void task_tick_rt(struct rq *rq, struct task_struct *p)