Commit 3461b0af025251bbc6b3d56c821c6ac2de6f7209

Authored by Mike Travis
Committed by Ingo Molnar
1 parent 9f248bde9d

x86: remove static boot_cpu_pda array v2

* Remove the boot_cpu_pda array and pointer table from the data section.
    Allocate the pointer table and array during init.  do_boot_cpu()
    will reallocate the pda in node local memory and if the cpu is being
    brought up before the bootmem array is released (after_bootmem = 0),
    then it will free the initial pda.  This will happen for all cpus
    present at system startup.

    This removes 512k + 32k bytes from the data section.

For inclusion into sched-devel/latest tree.

Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Showing 6 changed files with 135 additions and 38 deletions Side-by-side Diff

arch/x86/kernel/head64.c
... ... @@ -25,6 +25,24 @@
25 25 #include <asm/e820.h>
26 26 #include <asm/bios_ebda.h>
27 27  
  28 +/* boot cpu pda */
  29 +static struct x8664_pda _boot_cpu_pda __read_mostly;
  30 +
  31 +#ifdef CONFIG_SMP
  32 +#ifdef CONFIG_DEBUG_PER_CPU_MAPS
  33 +/*
  34 + * We install an empty cpu_pda pointer table to trap references before
  35 + * the actual cpu_pda pointer table is created in setup_cpu_pda_map().
  36 + */
  37 +static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
  38 +#else
  39 +static struct x8664_pda *__cpu_pda[1] __read_mostly;
  40 +#endif
  41 +
  42 +#else /* !CONFIG_SMP (NR_CPUS will be 1) */
  43 +static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
  44 +#endif
  45 +
28 46 static void __init zap_identity_mappings(void)
29 47 {
30 48 pgd_t *pgd = pgd_offset_k(0UL);
31 49  
... ... @@ -156,10 +174,12 @@
156 174  
157 175 early_printk("Kernel alive\n");
158 176  
159   - for (i = 0; i < NR_CPUS; i++)
160   - cpu_pda(i) = &boot_cpu_pda[i];
161   -
  177 + _cpu_pda = __cpu_pda;
  178 + cpu_pda(0) = &_boot_cpu_pda;
162 179 pda_init(0);
  180 +
  181 + early_printk("Kernel really alive\n");
  182 +
163 183 copy_bootdata(__va(real_mode_data));
164 184  
165 185 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
arch/x86/kernel/setup.c
... ... @@ -101,6 +101,50 @@
101 101 */
102 102 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
103 103 EXPORT_SYMBOL(__per_cpu_offset);
  104 +static inline void setup_cpu_pda_map(void) { }
  105 +
  106 +#elif !defined(CONFIG_SMP)
  107 +static inline void setup_cpu_pda_map(void) { }
  108 +
  109 +#else /* CONFIG_SMP && CONFIG_X86_64 */
  110 +
  111 +/*
  112 + * Allocate cpu_pda pointer table and array via alloc_bootmem.
  113 + */
  114 +static void __init setup_cpu_pda_map(void)
  115 +{
  116 + char *pda;
  117 + struct x8664_pda **new_cpu_pda;
  118 + unsigned long size;
  119 + int cpu;
  120 +
  121 + size = roundup(sizeof(struct x8664_pda), cache_line_size());
  122 +
  123 + /* allocate cpu_pda array and pointer table */
  124 + {
  125 + unsigned long tsize = nr_cpu_ids * sizeof(void *);
  126 + unsigned long asize = size * (nr_cpu_ids - 1);
  127 +
  128 + tsize = roundup(tsize, cache_line_size());
  129 + new_cpu_pda = alloc_bootmem(tsize + asize);
  130 + pda = (char *)new_cpu_pda + tsize;
  131 + }
  132 +
  133 + /* initialize pointer table to static pda's */
  134 + for_each_possible_cpu(cpu) {
  135 + if (cpu == 0) {
  136 + /* leave boot cpu pda in place */
  137 + new_cpu_pda[0] = cpu_pda(0);
  138 + continue;
  139 + }
  140 + new_cpu_pda[cpu] = (struct x8664_pda *)pda;
  141 + new_cpu_pda[cpu]->in_bootmem = 1;
  142 + pda += size;
  143 + }
  144 +
  145 + /* point to new pointer table */
  146 + _cpu_pda = new_cpu_pda;
  147 +}
104 148 #endif
105 149  
106 150 /*
107 151  
108 152  
109 153  
110 154  
111 155  
112 156  
113 157  
114 158  
... ... @@ -110,46 +154,43 @@
110 154 */
111 155 void __init setup_per_cpu_areas(void)
112 156 {
113   - int i, highest_cpu = 0;
114   - unsigned long size;
  157 + ssize_t size = PERCPU_ENOUGH_ROOM;
  158 + char *ptr;
  159 + int cpu;
115 160  
116 161 #ifdef CONFIG_HOTPLUG_CPU
117 162 prefill_possible_map();
  163 +#else
  164 + nr_cpu_ids = num_processors;
118 165 #endif
119 166  
  167 + /* Setup cpu_pda map */
  168 + setup_cpu_pda_map();
  169 +
120 170 /* Copy section for each CPU (we discard the original) */
121 171 size = PERCPU_ENOUGH_ROOM;
122 172 printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
123 173 size);
124 174  
125   - for_each_possible_cpu(i) {
126   - char *ptr;
  175 + for_each_possible_cpu(cpu) {
127 176 #ifndef CONFIG_NEED_MULTIPLE_NODES
128 177 ptr = alloc_bootmem_pages(size);
129 178 #else
130   - int node = early_cpu_to_node(i);
  179 + int node = early_cpu_to_node(cpu);
131 180 if (!node_online(node) || !NODE_DATA(node)) {
132 181 ptr = alloc_bootmem_pages(size);
133 182 printk(KERN_INFO
134 183 "cpu %d has no node %d or node-local memory\n",
135   - i, node);
  184 + cpu, node);
136 185 }
137 186 else
138 187 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
139 188 #endif
140   - if (!ptr)
141   - panic("Cannot allocate cpu data for CPU %d\n", i);
142   -#ifdef CONFIG_X86_64
143   - cpu_pda(i)->data_offset = ptr - __per_cpu_start;
144   -#else
145   - __per_cpu_offset[i] = ptr - __per_cpu_start;
146   -#endif
  189 + per_cpu_offset(cpu) = ptr - __per_cpu_start;
147 190 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
148 191  
149   - highest_cpu = i;
150 192 }
151 193  
152   - nr_cpu_ids = highest_cpu + 1;
153 194 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
154 195 NR_CPUS, nr_cpu_ids, nr_node_ids);
155 196  
... ... @@ -199,7 +240,7 @@
199 240 {
200 241 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
201 242  
202   - if (node != NUMA_NO_NODE)
  243 + if (cpu_pda(cpu) && node != NUMA_NO_NODE)
203 244 cpu_pda(cpu)->nodenumber = node;
204 245  
205 246 if (cpu_to_node_map)
arch/x86/kernel/setup64.c
... ... @@ -12,6 +12,7 @@
12 12 #include <linux/bitops.h>
13 13 #include <linux/module.h>
14 14 #include <linux/kgdb.h>
  15 +#include <linux/topology.h>
15 16 #include <asm/pda.h>
16 17 #include <asm/pgtable.h>
17 18 #include <asm/processor.h>
18 19  
... ... @@ -34,9 +35,8 @@
34 35  
35 36 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
36 37  
37   -struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
  38 +struct x8664_pda **_cpu_pda __read_mostly;
38 39 EXPORT_SYMBOL(_cpu_pda);
39   -struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
40 40  
41 41 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
42 42  
43 43  
... ... @@ -114,8 +114,10 @@
114 114 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
115 115 if (!pda->irqstackptr)
116 116 panic("cannot allocate irqstack for cpu %d", cpu);
117   - }
118 117  
  118 + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
  119 + pda->nodenumber = cpu_to_node(cpu);
  120 + }
119 121  
120 122 pda->irqstackptr += IRQSTACKSIZE-64;
121 123 }
arch/x86/kernel/smpboot.c
... ... @@ -816,6 +816,43 @@
816 816 complete(&c_idle->done);
817 817 }
818 818  
  819 +/*
  820 + * Allocate node local memory for the AP pda.
  821 + *
  822 + * Must be called after the _cpu_pda pointer table is initialized.
  823 + */
  824 +static int __cpuinit get_local_pda(int cpu)
  825 +{
  826 + struct x8664_pda *oldpda, *newpda;
  827 + unsigned long size = sizeof(struct x8664_pda);
  828 + int node = cpu_to_node(cpu);
  829 +
  830 + if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
  831 + return 0;
  832 +
  833 + oldpda = cpu_pda(cpu);
  834 + newpda = kmalloc_node(size, GFP_ATOMIC, node);
  835 + if (!newpda) {
  836 + printk(KERN_ERR "Could not allocate node local PDA "
  837 + "for CPU %d on node %d\n", cpu, node);
  838 +
  839 + if (oldpda)
  840 + return 0; /* have a usable pda */
  841 + else
  842 + return -1;
  843 + }
  844 +
  845 + if (oldpda) {
  846 + memcpy(newpda, oldpda, size);
  847 + if (!after_bootmem)
  848 + free_bootmem((unsigned long)oldpda, size);
  849 + }
  850 +
  851 + newpda->in_bootmem = 0;
  852 + cpu_pda(cpu) = newpda;
  853 + return 0;
  854 +}
  855 +
819 856 static int __cpuinit do_boot_cpu(int apicid, int cpu)
820 857 /*
821 858 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
... ... @@ -841,19 +878,11 @@
841 878 }
842 879  
843 880 /* Allocate node local memory for AP pdas */
844   - if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
845   - struct x8664_pda *newpda, *pda;
846   - int node = cpu_to_node(cpu);
847   - pda = cpu_pda(cpu);
848   - newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC,
849   - node);
850   - if (newpda) {
851   - memcpy(newpda, pda, sizeof(struct x8664_pda));
852   - cpu_pda(cpu) = newpda;
853   - } else
854   - printk(KERN_ERR
855   - "Could not allocate node local PDA for CPU %d on node %d\n",
856   - cpu, node);
  881 + if (cpu > 0) {
  882 + boot_error = get_local_pda(cpu);
  883 + if (boot_error)
  884 + goto restore_state;
  885 + /* if can't get pda memory, can't start cpu */
857 886 }
858 887 #endif
859 888  
... ... @@ -972,6 +1001,8 @@
972 1001 }
973 1002 }
974 1003  
  1004 +restore_state:
  1005 +
975 1006 if (boot_error) {
976 1007 /* Try to put things back the way they were before ... */
977 1008 unmap_cpu_to_logical_apicid(cpu);
... ... @@ -1347,6 +1378,8 @@
1347 1378  
1348 1379 for (i = 0; i < possible; i++)
1349 1380 cpu_set(i, cpu_possible_map);
  1381 +
  1382 + nr_cpu_ids = possible;
1350 1383 }
1351 1384  
1352 1385 static void __ref remove_cpu_from_maps(int cpu)
include/asm-x86/pda.h
... ... @@ -22,7 +22,8 @@
22 22 offset 40!!! */
23 23 #endif
24 24 char *irqstackptr;
25   - int nodenumber; /* number of current node */
  25 + short nodenumber; /* number of current node (32k max) */
  26 + short in_bootmem; /* pda lives in bootmem */
26 27 unsigned int __softirq_pending;
27 28 unsigned int __nmi_count; /* number of NMI on this CPUs */
28 29 short mmu_state;
... ... @@ -38,8 +39,7 @@
38 39 unsigned irq_spurious_count;
39 40 } ____cacheline_aligned_in_smp;
40 41  
41   -extern struct x8664_pda *_cpu_pda[];
42   -extern struct x8664_pda boot_cpu_pda[];
  42 +extern struct x8664_pda **_cpu_pda;
43 43 extern void pda_init(int);
44 44  
45 45 #define cpu_pda(i) (_cpu_pda[i])
... ... @@ -1024,6 +1024,7 @@
1024 1024 extern void show_mem(void);
1025 1025 extern void si_meminfo(struct sysinfo * val);
1026 1026 extern void si_meminfo_node(struct sysinfo *val, int nid);
  1027 +extern int after_bootmem;
1027 1028  
1028 1029 #ifdef CONFIG_NUMA
1029 1030 extern void setup_per_cpu_pageset(void);