Commit 9c3f75cbd144014bea6af866a154cc2e73ab2287

Authored by Peter Zijlstra
Committed by Ingo Molnar
1 parent e6625fa48e

sched: Break out cpu_power from the sched_group structure

In order to prepare for non-unique sched_groups per domain, we need to
carry the cpu_power elsewhere, so put a level of indirection in.

Reported-and-tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-qkho2byuhe4482fuknss40ad@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 3 changed files with 58 additions and 34 deletions Side-by-side Diff

include/linux/sched.h
... ... @@ -893,16 +893,20 @@
893 893 return 0;
894 894 }
895 895  
896   -struct sched_group {
897   - struct sched_group *next; /* Must be a circular list */
898   - atomic_t ref;
899   -
  896 +struct sched_group_power {
900 897 /*
901 898 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
902 899 * single CPU.
903 900 */
904   - unsigned int cpu_power, cpu_power_orig;
  901 + unsigned int power, power_orig;
  902 +};
  903 +
  904 +struct sched_group {
  905 + struct sched_group *next; /* Must be a circular list */
  906 + atomic_t ref;
  907 +
905 908 unsigned int group_weight;
  909 + struct sched_group_power *sgp;
906 910  
907 911 /*
908 912 * The CPUs this group covers.
... ... @@ -6557,7 +6557,7 @@
6557 6557 break;
6558 6558 }
6559 6559  
6560   - if (!group->cpu_power) {
  6560 + if (!group->sgp->power) {
6561 6561 printk(KERN_CONT "\n");
6562 6562 printk(KERN_ERR "ERROR: domain->cpu_power not "
6563 6563 "set\n");
6564 6564  
... ... @@ -6581,9 +6581,9 @@
6581 6581 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
6582 6582  
6583 6583 printk(KERN_CONT " %s", str);
6584   - if (group->cpu_power != SCHED_POWER_SCALE) {
  6584 + if (group->sgp->power != SCHED_POWER_SCALE) {
6585 6585 printk(KERN_CONT " (cpu_power = %d)",
6586   - group->cpu_power);
  6586 + group->sgp->power);
6587 6587 }
6588 6588  
6589 6589 group = group->next;
6590 6590  
... ... @@ -6777,8 +6777,10 @@
6777 6777 static void free_sched_domain(struct rcu_head *rcu)
6778 6778 {
6779 6779 struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
6780   - if (atomic_dec_and_test(&sd->groups->ref))
  6780 + if (atomic_dec_and_test(&sd->groups->ref)) {
  6781 + kfree(sd->groups->sgp);
6781 6782 kfree(sd->groups);
  6783 + }
6782 6784 kfree(sd);
6783 6785 }
6784 6786  
... ... @@ -6945,6 +6947,7 @@
6945 6947 struct sd_data {
6946 6948 struct sched_domain **__percpu sd;
6947 6949 struct sched_group **__percpu sg;
  6950 + struct sched_group_power **__percpu sgp;
6948 6951 };
6949 6952  
6950 6953 struct s_data {
6951 6954  
... ... @@ -6981,8 +6984,10 @@
6981 6984 if (child)
6982 6985 cpu = cpumask_first(sched_domain_span(child));
6983 6986  
6984   - if (sg)
  6987 + if (sg) {
6985 6988 *sg = *per_cpu_ptr(sdd->sg, cpu);
  6989 + (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
  6990 + }
6986 6991  
6987 6992 return cpu;
6988 6993 }
... ... @@ -7020,7 +7025,7 @@
7020 7025 continue;
7021 7026  
7022 7027 cpumask_clear(sched_group_cpus(sg));
7023   - sg->cpu_power = 0;
  7028 + sg->sgp->power = 0;
7024 7029  
7025 7030 for_each_cpu(j, span) {
7026 7031 if (get_group(j, sdd, NULL) != group)
... ... @@ -7185,6 +7190,7 @@
7185 7190 if (cpu == cpumask_first(sched_group_cpus(sg))) {
7186 7191 WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
7187 7192 *per_cpu_ptr(sdd->sg, cpu) = NULL;
  7193 + *per_cpu_ptr(sdd->sgp, cpu) = NULL;
7188 7194 }
7189 7195 }
7190 7196  
7191 7197  
... ... @@ -7234,9 +7240,14 @@
7234 7240 if (!sdd->sg)
7235 7241 return -ENOMEM;
7236 7242  
  7243 + sdd->sgp = alloc_percpu(struct sched_group_power *);
  7244 + if (!sdd->sgp)
  7245 + return -ENOMEM;
  7246 +
7237 7247 for_each_cpu(j, cpu_map) {
7238 7248 struct sched_domain *sd;
7239 7249 struct sched_group *sg;
  7250 + struct sched_group_power *sgp;
7240 7251  
7241 7252 sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
7242 7253 GFP_KERNEL, cpu_to_node(j));
... ... @@ -7251,6 +7262,13 @@
7251 7262 return -ENOMEM;
7252 7263  
7253 7264 *per_cpu_ptr(sdd->sg, j) = sg;
  7265 +
  7266 + sgp = kzalloc_node(sizeof(struct sched_group_power),
  7267 + GFP_KERNEL, cpu_to_node(j));
  7268 + if (!sgp)
  7269 + return -ENOMEM;
  7270 +
  7271 + *per_cpu_ptr(sdd->sgp, j) = sgp;
7254 7272 }
7255 7273 }
7256 7274  
7257 7275  
... ... @@ -7268,9 +7286,11 @@
7268 7286 for_each_cpu(j, cpu_map) {
7269 7287 kfree(*per_cpu_ptr(sdd->sd, j));
7270 7288 kfree(*per_cpu_ptr(sdd->sg, j));
  7289 + kfree(*per_cpu_ptr(sdd->sgp, j));
7271 7290 }
7272 7291 free_percpu(sdd->sd);
7273 7292 free_percpu(sdd->sg);
  7293 + free_percpu(sdd->sgp);
7274 7294 }
7275 7295 }
7276 7296  
... ... @@ -1585,7 +1585,7 @@
1585 1585 }
1586 1586  
1587 1587 /* Adjust by relative CPU power of the group */
1588   - avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power;
  1588 + avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
1589 1589  
1590 1590 if (local_group) {
1591 1591 this_load = avg_load;
... ... @@ -2631,7 +2631,7 @@
2631 2631 power >>= SCHED_POWER_SHIFT;
2632 2632 }
2633 2633  
2634   - sdg->cpu_power_orig = power;
  2634 + sdg->sgp->power_orig = power;
2635 2635  
2636 2636 if (sched_feat(ARCH_POWER))
2637 2637 power *= arch_scale_freq_power(sd, cpu);
... ... @@ -2647,7 +2647,7 @@
2647 2647 power = 1;
2648 2648  
2649 2649 cpu_rq(cpu)->cpu_power = power;
2650   - sdg->cpu_power = power;
  2650 + sdg->sgp->power = power;
2651 2651 }
2652 2652  
2653 2653 static void update_group_power(struct sched_domain *sd, int cpu)
2654 2654  
... ... @@ -2665,11 +2665,11 @@
2665 2665  
2666 2666 group = child->groups;
2667 2667 do {
2668   - power += group->cpu_power;
  2668 + power += group->sgp->power;
2669 2669 group = group->next;
2670 2670 } while (group != child->groups);
2671 2671  
2672   - sdg->cpu_power = power;
  2672 + sdg->sgp->power = power;
2673 2673 }
2674 2674  
2675 2675 /*
... ... @@ -2691,7 +2691,7 @@
2691 2691 /*
2692 2692 * If ~90% of the cpu_power is still there, we're good.
2693 2693 */
2694   - if (group->cpu_power * 32 > group->cpu_power_orig * 29)
  2694 + if (group->sgp->power * 32 > group->sgp->power_orig * 29)
2695 2695 return 1;
2696 2696  
2697 2697 return 0;
... ... @@ -2771,7 +2771,7 @@
2771 2771 }
2772 2772  
2773 2773 /* Adjust by relative CPU power of the group */
2774   - sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power;
  2774 + sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
2775 2775  
2776 2776 /*
2777 2777 * Consider the group unbalanced when the imbalance is larger
... ... @@ -2788,7 +2788,7 @@
2788 2788 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
2789 2789 sgs->group_imb = 1;
2790 2790  
2791   - sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power,
  2791 + sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
2792 2792 SCHED_POWER_SCALE);
2793 2793 if (!sgs->group_capacity)
2794 2794 sgs->group_capacity = fix_small_capacity(sd, group);
... ... @@ -2877,7 +2877,7 @@
2877 2877 return;
2878 2878  
2879 2879 sds->total_load += sgs.group_load;
2880   - sds->total_pwr += sg->cpu_power;
  2880 + sds->total_pwr += sg->sgp->power;
2881 2881  
2882 2882 /*
2883 2883 * In case the child domain prefers tasks go to siblings
... ... @@ -2962,7 +2962,7 @@
2962 2962 if (this_cpu > busiest_cpu)
2963 2963 return 0;
2964 2964  
2965   - *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
  2965 + *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power,
2966 2966 SCHED_POWER_SCALE);
2967 2967 return 1;
2968 2968 }
... ... @@ -2993,7 +2993,7 @@
2993 2993  
2994 2994 scaled_busy_load_per_task = sds->busiest_load_per_task
2995 2995 * SCHED_POWER_SCALE;
2996   - scaled_busy_load_per_task /= sds->busiest->cpu_power;
  2996 + scaled_busy_load_per_task /= sds->busiest->sgp->power;
2997 2997  
2998 2998 if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
2999 2999 (scaled_busy_load_per_task * imbn)) {
3000 3000  
3001 3001  
3002 3002  
3003 3003  
3004 3004  
3005 3005  
... ... @@ -3007,28 +3007,28 @@
3007 3007 * moving them.
3008 3008 */
3009 3009  
3010   - pwr_now += sds->busiest->cpu_power *
  3010 + pwr_now += sds->busiest->sgp->power *
3011 3011 min(sds->busiest_load_per_task, sds->max_load);
3012   - pwr_now += sds->this->cpu_power *
  3012 + pwr_now += sds->this->sgp->power *
3013 3013 min(sds->this_load_per_task, sds->this_load);
3014 3014 pwr_now /= SCHED_POWER_SCALE;
3015 3015  
3016 3016 /* Amount of load we'd subtract */
3017 3017 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3018   - sds->busiest->cpu_power;
  3018 + sds->busiest->sgp->power;
3019 3019 if (sds->max_load > tmp)
3020   - pwr_move += sds->busiest->cpu_power *
  3020 + pwr_move += sds->busiest->sgp->power *
3021 3021 min(sds->busiest_load_per_task, sds->max_load - tmp);
3022 3022  
3023 3023 /* Amount of load we'd add */
3024   - if (sds->max_load * sds->busiest->cpu_power <
  3024 + if (sds->max_load * sds->busiest->sgp->power <
3025 3025 sds->busiest_load_per_task * SCHED_POWER_SCALE)
3026   - tmp = (sds->max_load * sds->busiest->cpu_power) /
3027   - sds->this->cpu_power;
  3026 + tmp = (sds->max_load * sds->busiest->sgp->power) /
  3027 + sds->this->sgp->power;
3028 3028 else
3029 3029 tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
3030   - sds->this->cpu_power;
3031   - pwr_move += sds->this->cpu_power *
  3030 + sds->this->sgp->power;
  3031 + pwr_move += sds->this->sgp->power *
3032 3032 min(sds->this_load_per_task, sds->this_load + tmp);
3033 3033 pwr_move /= SCHED_POWER_SCALE;
3034 3034  
... ... @@ -3074,7 +3074,7 @@
3074 3074  
3075 3075 load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
3076 3076  
3077   - load_above_capacity /= sds->busiest->cpu_power;
  3077 + load_above_capacity /= sds->busiest->sgp->power;
3078 3078 }
3079 3079  
3080 3080 /*
... ... @@ -3090,8 +3090,8 @@
3090 3090 max_pull = min(sds->max_load - sds->avg_load, load_above_capacity);
3091 3091  
3092 3092 /* How much load to actually move to equalise the imbalance */
3093   - *imbalance = min(max_pull * sds->busiest->cpu_power,
3094   - (sds->avg_load - sds->this_load) * sds->this->cpu_power)
  3093 + *imbalance = min(max_pull * sds->busiest->sgp->power,
  3094 + (sds->avg_load - sds->this_load) * sds->this->sgp->power)
3095 3095 / SCHED_POWER_SCALE;
3096 3096  
3097 3097 /*