Commit 01a08546af311c065f34727787dd0cc8dc0c216f

Authored by Heiko Carstens
Committed by Ingo Molnar
1 parent f269893c57

sched: Add book scheduling domain

On top of the SMT and MC scheduling domains this adds the BOOK scheduling
domain. This is useful for NUMA like machines which do not have an interface
which tells which piece of memory is attached to which node or where the
hardware performs striping.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100831082844.253053798@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 3 changed files with 82 additions and 2 deletions Side-by-side Diff

include/linux/sched.h
... ... @@ -875,6 +875,7 @@
875 875 SD_LV_NONE = 0,
876 876 SD_LV_SIBLING,
877 877 SD_LV_MC,
  878 + SD_LV_BOOK,
878 879 SD_LV_CPU,
879 880 SD_LV_NODE,
880 881 SD_LV_ALLNODES,
include/linux/topology.h
... ... @@ -201,6 +201,12 @@
201 201 .balance_interval = 64, \
202 202 }
203 203  
  204 +#ifdef CONFIG_SCHED_BOOK
  205 +#ifndef SD_BOOK_INIT
  206 +#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
  207 +#endif
  208 +#endif /* CONFIG_SCHED_BOOK */
  209 +
204 210 #ifdef CONFIG_NUMA
205 211 #ifndef SD_NODE_INIT
206 212 #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
... ... @@ -6506,6 +6506,7 @@
6506 6506 cpumask_var_t nodemask;
6507 6507 cpumask_var_t this_sibling_map;
6508 6508 cpumask_var_t this_core_map;
  6509 + cpumask_var_t this_book_map;
6509 6510 cpumask_var_t send_covered;
6510 6511 cpumask_var_t tmpmask;
6511 6512 struct sched_group **sched_group_nodes;
... ... @@ -6517,6 +6518,7 @@
6517 6518 sa_rootdomain,
6518 6519 sa_tmpmask,
6519 6520 sa_send_covered,
  6521 + sa_this_book_map,
6520 6522 sa_this_core_map,
6521 6523 sa_this_sibling_map,
6522 6524 sa_nodemask,
... ... @@ -6570,6 +6572,31 @@
6570 6572 }
6571 6573 #endif /* CONFIG_SCHED_MC */
6572 6574  
  6575 +/*
  6576 + * book sched-domains:
  6577 + */
  6578 +#ifdef CONFIG_SCHED_BOOK
  6579 +static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
  6580 +static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
  6581 +
  6582 +static int
  6583 +cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
  6584 + struct sched_group **sg, struct cpumask *mask)
  6585 +{
  6586 + int group = cpu;
  6587 +#ifdef CONFIG_SCHED_MC
  6588 + cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
  6589 + group = cpumask_first(mask);
  6590 +#elif defined(CONFIG_SCHED_SMT)
  6591 + cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
  6592 + group = cpumask_first(mask);
  6593 +#endif
  6594 + if (sg)
  6595 + *sg = &per_cpu(sched_group_book, group).sg;
  6596 + return group;
  6597 +}
  6598 +#endif /* CONFIG_SCHED_BOOK */
  6599 +
6573 6600 static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
6574 6601 static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
6575 6602  
... ... @@ -6578,7 +6605,10 @@
6578 6605 struct sched_group **sg, struct cpumask *mask)
6579 6606 {
6580 6607 int group;
6581   -#ifdef CONFIG_SCHED_MC
  6608 +#ifdef CONFIG_SCHED_BOOK
  6609 + cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
  6610 + group = cpumask_first(mask);
  6611 +#elif defined(CONFIG_SCHED_MC)
6582 6612 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
6583 6613 group = cpumask_first(mask);
6584 6614 #elif defined(CONFIG_SCHED_SMT)
... ... @@ -6839,6 +6869,9 @@
6839 6869 #ifdef CONFIG_SCHED_MC
6840 6870 SD_INIT_FUNC(MC)
6841 6871 #endif
  6872 +#ifdef CONFIG_SCHED_BOOK
  6873 + SD_INIT_FUNC(BOOK)
  6874 +#endif
6842 6875  
6843 6876 static int default_relax_domain_level = -1;
6844 6877  
... ... @@ -6888,6 +6921,8 @@
6888 6921 free_cpumask_var(d->tmpmask); /* fall through */
6889 6922 case sa_send_covered:
6890 6923 free_cpumask_var(d->send_covered); /* fall through */
  6924 + case sa_this_book_map:
  6925 + free_cpumask_var(d->this_book_map); /* fall through */
6891 6926 case sa_this_core_map:
6892 6927 free_cpumask_var(d->this_core_map); /* fall through */
6893 6928 case sa_this_sibling_map:
6894 6929  
... ... @@ -6934,8 +6969,10 @@
6934 6969 return sa_nodemask;
6935 6970 if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
6936 6971 return sa_this_sibling_map;
6937   - if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
  6972 + if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL))
6938 6973 return sa_this_core_map;
  6974 + if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
  6975 + return sa_this_book_map;
6939 6976 if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
6940 6977 return sa_send_covered;
6941 6978 d->rd = alloc_rootdomain();
... ... @@ -6993,6 +7030,23 @@
6993 7030 return sd;
6994 7031 }
6995 7032  
  7033 +static struct sched_domain *__build_book_sched_domain(struct s_data *d,
  7034 + const struct cpumask *cpu_map, struct sched_domain_attr *attr,
  7035 + struct sched_domain *parent, int i)
  7036 +{
  7037 + struct sched_domain *sd = parent;
  7038 +#ifdef CONFIG_SCHED_BOOK
  7039 + sd = &per_cpu(book_domains, i).sd;
  7040 + SD_INIT(sd, BOOK);
  7041 + set_domain_attribute(sd, attr);
  7042 + cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
  7043 + sd->parent = parent;
  7044 + parent->child = sd;
  7045 + cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
  7046 +#endif
  7047 + return sd;
  7048 +}
  7049 +
6996 7050 static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
6997 7051 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
6998 7052 struct sched_domain *parent, int i)
... ... @@ -7050,6 +7104,15 @@
7050 7104 d->send_covered, d->tmpmask);
7051 7105 break;
7052 7106 #endif
  7107 +#ifdef CONFIG_SCHED_BOOK
  7108 + case SD_LV_BOOK: /* set up book groups */
  7109 + cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu));
  7110 + if (cpu == cpumask_first(d->this_book_map))
  7111 + init_sched_build_groups(d->this_book_map, cpu_map,
  7112 + &cpu_to_book_group,
  7113 + d->send_covered, d->tmpmask);
  7114 + break;
  7115 +#endif
7053 7116 case SD_LV_CPU: /* set up physical groups */
7054 7117 cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
7055 7118 if (!cpumask_empty(d->nodemask))
7056 7119  
... ... @@ -7097,12 +7160,14 @@
7097 7160  
7098 7161 sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
7099 7162 sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
  7163 + sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i);
7100 7164 sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
7101 7165 sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
7102 7166 }
7103 7167  
7104 7168 for_each_cpu(i, cpu_map) {
7105 7169 build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
  7170 + build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
7106 7171 build_sched_groups(&d, SD_LV_MC, cpu_map, i);
7107 7172 }
7108 7173  
... ... @@ -7133,6 +7198,12 @@
7133 7198 init_sched_groups_power(i, sd);
7134 7199 }
7135 7200 #endif
  7201 +#ifdef CONFIG_SCHED_BOOK
  7202 + for_each_cpu(i, cpu_map) {
  7203 + sd = &per_cpu(book_domains, i).sd;
  7204 + init_sched_groups_power(i, sd);
  7205 + }
  7206 +#endif
7136 7207  
7137 7208 for_each_cpu(i, cpu_map) {
7138 7209 sd = &per_cpu(phys_domains, i).sd;
... ... @@ -7158,6 +7229,8 @@
7158 7229 sd = &per_cpu(cpu_domains, i).sd;
7159 7230 #elif defined(CONFIG_SCHED_MC)
7160 7231 sd = &per_cpu(core_domains, i).sd;
  7232 +#elif defined(CONFIG_SCHED_BOOK)
  7233 + sd = &per_cpu(book_domains, i).sd;
7161 7234 #else
7162 7235 sd = &per_cpu(phys_domains, i).sd;
7163 7236 #endif