Commit 0fc0531e0a2174377a86fd6953ecaa00287d8f70
Exists in
master
and in
7 other branches
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: percpu: update comments to reflect that percpu allocations are always zero-filled percpu: Optimize __get_cpu_var() x86, percpu: Optimize this_cpu_ptr percpu: clear memory allocated with the km allocator percpu: fix build breakage on s390 and cleanup build configuration tests percpu: use percpu allocator on UP too percpu: reduce PCPU_MIN_UNIT_SIZE to 32k vmalloc: pcpu_get/free_vm_areas() aren't needed on UP Fixed up trivial conflicts in include/linux/percpu.h
Showing 10 changed files Side-by-side Diff
arch/x86/include/asm/percpu.h
... | ... | @@ -47,6 +47,20 @@ |
47 | 47 | #ifdef CONFIG_SMP |
48 | 48 | #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x |
49 | 49 | #define __my_cpu_offset percpu_read(this_cpu_off) |
50 | + | |
51 | +/* | |
52 | + * Compared to the generic __my_cpu_offset version, the following | |
53 | + * saves one instruction and avoids clobbering a temp register. | |
54 | + */ | |
55 | +#define __this_cpu_ptr(ptr) \ | |
56 | +({ \ | |
57 | + unsigned long tcp_ptr__; \ | |
58 | + __verify_pcpu_ptr(ptr); \ | |
59 | + asm volatile("add " __percpu_arg(1) ", %0" \ | |
60 | + : "=r" (tcp_ptr__) \ | |
61 | + : "m" (this_cpu_off), "0" (ptr)); \ | |
62 | + (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ | |
63 | +}) | |
50 | 64 | #else |
51 | 65 | #define __percpu_arg(x) "%P" #x |
52 | 66 | #endif |
include/asm-generic/percpu.h
... | ... | @@ -55,14 +55,18 @@ |
55 | 55 | */ |
56 | 56 | #define per_cpu(var, cpu) \ |
57 | 57 | (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) |
58 | -#define __get_cpu_var(var) \ | |
59 | - (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset)) | |
60 | -#define __raw_get_cpu_var(var) \ | |
61 | - (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset)) | |
62 | 58 | |
63 | -#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) | |
59 | +#ifndef __this_cpu_ptr | |
64 | 60 | #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) |
61 | +#endif | |
62 | +#ifdef CONFIG_DEBUG_PREEMPT | |
63 | +#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) | |
64 | +#else | |
65 | +#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) | |
66 | +#endif | |
65 | 67 | |
68 | +#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) | |
69 | +#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var))) | |
66 | 70 | |
67 | 71 | #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA |
68 | 72 | extern void setup_per_cpu_areas(void); |
include/linux/percpu.h
... | ... | @@ -48,10 +48,8 @@ |
48 | 48 | preempt_enable(); \ |
49 | 49 | } while (0) |
50 | 50 | |
51 | -#ifdef CONFIG_SMP | |
52 | - | |
53 | 51 | /* minimum unit size, also is the maximum supported allocation size */ |
54 | -#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) | |
52 | +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) | |
55 | 53 | |
56 | 54 | /* |
57 | 55 | * Percpu allocator can serve percpu allocations before slab is |
58 | 56 | |
59 | 57 | |
60 | 58 | |
... | ... | @@ -146,36 +144,19 @@ |
146 | 144 | * dynamically allocated. Non-atomic access to the current CPU's |
147 | 145 | * version should probably be combined with get_cpu()/put_cpu(). |
148 | 146 | */ |
147 | +#ifdef CONFIG_SMP | |
149 | 148 | #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) |
149 | +#else | |
150 | +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) | |
151 | +#endif | |
150 | 152 | |
151 | 153 | extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); |
152 | 154 | extern bool is_kernel_percpu_address(unsigned long addr); |
153 | 155 | |
154 | -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | |
156 | +#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) | |
155 | 157 | extern void __init setup_per_cpu_areas(void); |
156 | 158 | #endif |
157 | 159 | extern void __init percpu_init_late(void); |
158 | - | |
159 | -#else /* CONFIG_SMP */ | |
160 | - | |
161 | -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) | |
162 | - | |
163 | -/* can't distinguish from other static vars, always false */ | |
164 | -static inline bool is_kernel_percpu_address(unsigned long addr) | |
165 | -{ | |
166 | - return false; | |
167 | -} | |
168 | - | |
169 | -static inline void __init setup_per_cpu_areas(void) { } | |
170 | - | |
171 | -static inline void __init percpu_init_late(void) { } | |
172 | - | |
173 | -static inline void *pcpu_lpage_remapped(void *kaddr) | |
174 | -{ | |
175 | - return NULL; | |
176 | -} | |
177 | - | |
178 | -#endif /* CONFIG_SMP */ | |
179 | 160 | |
180 | 161 | extern void __percpu *__alloc_percpu(size_t size, size_t align); |
181 | 162 | extern void free_percpu(void __percpu *__pdata); |
include/linux/vmalloc.h
... | ... | @@ -117,11 +117,13 @@ |
117 | 117 | extern struct vm_struct *vmlist; |
118 | 118 | extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); |
119 | 119 | |
120 | +#ifdef CONFIG_SMP | |
120 | 121 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, |
121 | 122 | const size_t *sizes, int nr_vms, |
122 | 123 | size_t align, gfp_t gfp_mask); |
123 | 124 | |
124 | 125 | void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); |
126 | +#endif | |
125 | 127 | |
126 | 128 | #endif /* _LINUX_VMALLOC_H */ |
mm/Kconfig
... | ... | @@ -301,4 +301,12 @@ |
301 | 301 | of 1 says that all excess pages should be trimmed. |
302 | 302 | |
303 | 303 | See Documentation/nommu-mmap.txt for more information. |
304 | + | |
305 | +# | |
306 | +# UP and nommu archs use km based percpu allocator | |
307 | +# | |
308 | +config NEED_PER_CPU_KM | |
309 | + depends on !SMP | |
310 | + bool | |
311 | + default y |
mm/Makefile
... | ... | @@ -11,7 +11,7 @@ |
11 | 11 | maccess.o page_alloc.o page-writeback.o \ |
12 | 12 | readahead.o swap.o truncate.o vmscan.o shmem.o \ |
13 | 13 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ |
14 | - page_isolation.o mm_init.o mmu_context.o \ | |
14 | + page_isolation.o mm_init.o mmu_context.o percpu.o \ | |
15 | 15 | $(mmu-y) |
16 | 16 | obj-y += init-mm.o |
17 | 17 | |
... | ... | @@ -36,11 +36,6 @@ |
36 | 36 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
37 | 37 | obj-$(CONFIG_FS_XIP) += filemap_xip.o |
38 | 38 | obj-$(CONFIG_MIGRATION) += migrate.o |
39 | -ifdef CONFIG_SMP | |
40 | -obj-y += percpu.o | |
41 | -else | |
42 | -obj-y += percpu_up.o | |
43 | -endif | |
44 | 39 | obj-$(CONFIG_QUICKLIST) += quicklist.o |
45 | 40 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o |
46 | 41 | obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o |
mm/percpu-km.c
... | ... | @@ -27,7 +27,7 @@ |
27 | 27 | * chunk size is not aligned. percpu-km code will whine about it. |
28 | 28 | */ |
29 | 29 | |
30 | -#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | |
30 | +#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) | |
31 | 31 | #error "contiguous percpu allocation is incompatible with paged first chunk" |
32 | 32 | #endif |
33 | 33 | |
... | ... | @@ -35,7 +35,11 @@ |
35 | 35 | |
36 | 36 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) |
37 | 37 | { |
38 | - /* noop */ | |
38 | + unsigned int cpu; | |
39 | + | |
40 | + for_each_possible_cpu(cpu) | |
41 | + memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); | |
42 | + | |
39 | 43 | return 0; |
40 | 44 | } |
41 | 45 |
mm/percpu.c
... | ... | @@ -76,6 +76,7 @@ |
76 | 76 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ |
77 | 77 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ |
78 | 78 | |
79 | +#ifdef CONFIG_SMP | |
79 | 80 | /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ |
80 | 81 | #ifndef __addr_to_pcpu_ptr |
81 | 82 | #define __addr_to_pcpu_ptr(addr) \ |
... | ... | @@ -89,6 +90,11 @@ |
89 | 90 | (unsigned long)pcpu_base_addr - \ |
90 | 91 | (unsigned long)__per_cpu_start) |
91 | 92 | #endif |
93 | +#else /* CONFIG_SMP */ | |
94 | +/* on UP, it's always identity mapped */ | |
95 | +#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr) | |
96 | +#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) | |
97 | +#endif /* CONFIG_SMP */ | |
92 | 98 | |
93 | 99 | struct pcpu_chunk { |
94 | 100 | struct list_head list; /* linked to pcpu_slot lists */ |
... | ... | @@ -820,8 +826,8 @@ |
820 | 826 | * @size: size of area to allocate in bytes |
821 | 827 | * @align: alignment of area (max PAGE_SIZE) |
822 | 828 | * |
823 | - * Allocate percpu area of @size bytes aligned at @align. Might | |
824 | - * sleep. Might trigger writeouts. | |
829 | + * Allocate zero-filled percpu area of @size bytes aligned at @align. | |
830 | + * Might sleep. Might trigger writeouts. | |
825 | 831 | * |
826 | 832 | * CONTEXT: |
827 | 833 | * Does GFP_KERNEL allocation. |
... | ... | @@ -840,9 +846,10 @@ |
840 | 846 | * @size: size of area to allocate in bytes |
841 | 847 | * @align: alignment of area (max PAGE_SIZE) |
842 | 848 | * |
843 | - * Allocate percpu area of @size bytes aligned at @align from reserved | |
844 | - * percpu area if arch has set it up; otherwise, allocation is served | |
845 | - * from the same dynamic area. Might sleep. Might trigger writeouts. | |
849 | + * Allocate zero-filled percpu area of @size bytes aligned at @align | |
850 | + * from reserved percpu area if arch has set it up; otherwise, | |
851 | + * allocation is served from the same dynamic area. Might sleep. | |
852 | + * Might trigger writeouts. | |
846 | 853 | * |
847 | 854 | * CONTEXT: |
848 | 855 | * Does GFP_KERNEL allocation. |
... | ... | @@ -949,6 +956,7 @@ |
949 | 956 | */ |
950 | 957 | bool is_kernel_percpu_address(unsigned long addr) |
951 | 958 | { |
959 | +#ifdef CONFIG_SMP | |
952 | 960 | const size_t static_size = __per_cpu_end - __per_cpu_start; |
953 | 961 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); |
954 | 962 | unsigned int cpu; |
... | ... | @@ -959,6 +967,8 @@ |
959 | 967 | if ((void *)addr >= start && (void *)addr < start + static_size) |
960 | 968 | return true; |
961 | 969 | } |
970 | +#endif | |
971 | + /* on UP, can't distinguish from other static vars, always false */ | |
962 | 972 | return false; |
963 | 973 | } |
964 | 974 | |
... | ... | @@ -1067,161 +1077,6 @@ |
1067 | 1077 | } |
1068 | 1078 | |
1069 | 1079 | /** |
1070 | - * pcpu_build_alloc_info - build alloc_info considering distances between CPUs | |
1071 | - * @reserved_size: the size of reserved percpu area in bytes | |
1072 | - * @dyn_size: minimum free size for dynamic allocation in bytes | |
1073 | - * @atom_size: allocation atom size | |
1074 | - * @cpu_distance_fn: callback to determine distance between cpus, optional | |
1075 | - * | |
1076 | - * This function determines grouping of units, their mappings to cpus | |
1077 | - * and other parameters considering needed percpu size, allocation | |
1078 | - * atom size and distances between CPUs. | |
1079 | - * | |
1080 | - * Groups are always mutliples of atom size and CPUs which are of | |
1081 | - * LOCAL_DISTANCE both ways are grouped together and share space for | |
1082 | - * units in the same group. The returned configuration is guaranteed | |
1083 | - * to have CPUs on different nodes on different groups and >=75% usage | |
1084 | - * of allocated virtual address space. | |
1085 | - * | |
1086 | - * RETURNS: | |
1087 | - * On success, pointer to the new allocation_info is returned. On | |
1088 | - * failure, ERR_PTR value is returned. | |
1089 | - */ | |
1090 | -static struct pcpu_alloc_info * __init pcpu_build_alloc_info( | |
1091 | - size_t reserved_size, size_t dyn_size, | |
1092 | - size_t atom_size, | |
1093 | - pcpu_fc_cpu_distance_fn_t cpu_distance_fn) | |
1094 | -{ | |
1095 | - static int group_map[NR_CPUS] __initdata; | |
1096 | - static int group_cnt[NR_CPUS] __initdata; | |
1097 | - const size_t static_size = __per_cpu_end - __per_cpu_start; | |
1098 | - int nr_groups = 1, nr_units = 0; | |
1099 | - size_t size_sum, min_unit_size, alloc_size; | |
1100 | - int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ | |
1101 | - int last_allocs, group, unit; | |
1102 | - unsigned int cpu, tcpu; | |
1103 | - struct pcpu_alloc_info *ai; | |
1104 | - unsigned int *cpu_map; | |
1105 | - | |
1106 | - /* this function may be called multiple times */ | |
1107 | - memset(group_map, 0, sizeof(group_map)); | |
1108 | - memset(group_cnt, 0, sizeof(group_cnt)); | |
1109 | - | |
1110 | - /* calculate size_sum and ensure dyn_size is enough for early alloc */ | |
1111 | - size_sum = PFN_ALIGN(static_size + reserved_size + | |
1112 | - max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE)); | |
1113 | - dyn_size = size_sum - static_size - reserved_size; | |
1114 | - | |
1115 | - /* | |
1116 | - * Determine min_unit_size, alloc_size and max_upa such that | |
1117 | - * alloc_size is multiple of atom_size and is the smallest | |
1118 | - * which can accomodate 4k aligned segments which are equal to | |
1119 | - * or larger than min_unit_size. | |
1120 | - */ | |
1121 | - min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | |
1122 | - | |
1123 | - alloc_size = roundup(min_unit_size, atom_size); | |
1124 | - upa = alloc_size / min_unit_size; | |
1125 | - while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | |
1126 | - upa--; | |
1127 | - max_upa = upa; | |
1128 | - | |
1129 | - /* group cpus according to their proximity */ | |
1130 | - for_each_possible_cpu(cpu) { | |
1131 | - group = 0; | |
1132 | - next_group: | |
1133 | - for_each_possible_cpu(tcpu) { | |
1134 | - if (cpu == tcpu) | |
1135 | - break; | |
1136 | - if (group_map[tcpu] == group && cpu_distance_fn && | |
1137 | - (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || | |
1138 | - cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { | |
1139 | - group++; | |
1140 | - nr_groups = max(nr_groups, group + 1); | |
1141 | - goto next_group; | |
1142 | - } | |
1143 | - } | |
1144 | - group_map[cpu] = group; | |
1145 | - group_cnt[group]++; | |
1146 | - } | |
1147 | - | |
1148 | - /* | |
1149 | - * Expand unit size until address space usage goes over 75% | |
1150 | - * and then as much as possible without using more address | |
1151 | - * space. | |
1152 | - */ | |
1153 | - last_allocs = INT_MAX; | |
1154 | - for (upa = max_upa; upa; upa--) { | |
1155 | - int allocs = 0, wasted = 0; | |
1156 | - | |
1157 | - if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | |
1158 | - continue; | |
1159 | - | |
1160 | - for (group = 0; group < nr_groups; group++) { | |
1161 | - int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); | |
1162 | - allocs += this_allocs; | |
1163 | - wasted += this_allocs * upa - group_cnt[group]; | |
1164 | - } | |
1165 | - | |
1166 | - /* | |
1167 | - * Don't accept if wastage is over 1/3. The | |
1168 | - * greater-than comparison ensures upa==1 always | |
1169 | - * passes the following check. | |
1170 | - */ | |
1171 | - if (wasted > num_possible_cpus() / 3) | |
1172 | - continue; | |
1173 | - | |
1174 | - /* and then don't consume more memory */ | |
1175 | - if (allocs > last_allocs) | |
1176 | - break; | |
1177 | - last_allocs = allocs; | |
1178 | - best_upa = upa; | |
1179 | - } | |
1180 | - upa = best_upa; | |
1181 | - | |
1182 | - /* allocate and fill alloc_info */ | |
1183 | - for (group = 0; group < nr_groups; group++) | |
1184 | - nr_units += roundup(group_cnt[group], upa); | |
1185 | - | |
1186 | - ai = pcpu_alloc_alloc_info(nr_groups, nr_units); | |
1187 | - if (!ai) | |
1188 | - return ERR_PTR(-ENOMEM); | |
1189 | - cpu_map = ai->groups[0].cpu_map; | |
1190 | - | |
1191 | - for (group = 0; group < nr_groups; group++) { | |
1192 | - ai->groups[group].cpu_map = cpu_map; | |
1193 | - cpu_map += roundup(group_cnt[group], upa); | |
1194 | - } | |
1195 | - | |
1196 | - ai->static_size = static_size; | |
1197 | - ai->reserved_size = reserved_size; | |
1198 | - ai->dyn_size = dyn_size; | |
1199 | - ai->unit_size = alloc_size / upa; | |
1200 | - ai->atom_size = atom_size; | |
1201 | - ai->alloc_size = alloc_size; | |
1202 | - | |
1203 | - for (group = 0, unit = 0; group_cnt[group]; group++) { | |
1204 | - struct pcpu_group_info *gi = &ai->groups[group]; | |
1205 | - | |
1206 | - /* | |
1207 | - * Initialize base_offset as if all groups are located | |
1208 | - * back-to-back. The caller should update this to | |
1209 | - * reflect actual allocation. | |
1210 | - */ | |
1211 | - gi->base_offset = unit * ai->unit_size; | |
1212 | - | |
1213 | - for_each_possible_cpu(cpu) | |
1214 | - if (group_map[cpu] == group) | |
1215 | - gi->cpu_map[gi->nr_units++] = cpu; | |
1216 | - gi->nr_units = roundup(gi->nr_units, upa); | |
1217 | - unit += gi->nr_units; | |
1218 | - } | |
1219 | - BUG_ON(unit != nr_units); | |
1220 | - | |
1221 | - return ai; | |
1222 | -} | |
1223 | - | |
1224 | -/** | |
1225 | 1080 | * pcpu_dump_alloc_info - print out information about pcpu_alloc_info |
1226 | 1081 | * @lvl: loglevel |
1227 | 1082 | * @ai: allocation info to dump |
1228 | 1083 | |
... | ... | @@ -1363,7 +1218,9 @@ |
1363 | 1218 | |
1364 | 1219 | /* sanity checks */ |
1365 | 1220 | PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); |
1221 | +#ifdef CONFIG_SMP | |
1366 | 1222 | PCPU_SETUP_BUG_ON(!ai->static_size); |
1223 | +#endif | |
1367 | 1224 | PCPU_SETUP_BUG_ON(!base_addr); |
1368 | 1225 | PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); |
1369 | 1226 | PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); |
... | ... | @@ -1488,6 +1345,8 @@ |
1488 | 1345 | return 0; |
1489 | 1346 | } |
1490 | 1347 | |
1348 | +#ifdef CONFIG_SMP | |
1349 | + | |
1491 | 1350 | const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { |
1492 | 1351 | [PCPU_FC_AUTO] = "auto", |
1493 | 1352 | [PCPU_FC_EMBED] = "embed", |
1494 | 1353 | |
1495 | 1354 | |
... | ... | @@ -1515,9 +1374,181 @@ |
1515 | 1374 | } |
1516 | 1375 | early_param("percpu_alloc", percpu_alloc_setup); |
1517 | 1376 | |
1377 | +/* | |
1378 | + * pcpu_embed_first_chunk() is used by the generic percpu setup. | |
1379 | + * Build it if needed by the arch config or the generic setup is going | |
1380 | + * to be used. | |
1381 | + */ | |
1518 | 1382 | #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ |
1519 | 1383 | !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) |
1384 | +#define BUILD_EMBED_FIRST_CHUNK | |
1385 | +#endif | |
1386 | + | |
1387 | +/* build pcpu_page_first_chunk() iff needed by the arch config */ | |
1388 | +#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) | |
1389 | +#define BUILD_PAGE_FIRST_CHUNK | |
1390 | +#endif | |
1391 | + | |
1392 | +/* pcpu_build_alloc_info() is used by both embed and page first chunk */ | |
1393 | +#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK) | |
1520 | 1394 | /** |
1395 | + * pcpu_build_alloc_info - build alloc_info considering distances between CPUs | |
1396 | + * @reserved_size: the size of reserved percpu area in bytes | |
1397 | + * @dyn_size: minimum free size for dynamic allocation in bytes | |
1398 | + * @atom_size: allocation atom size | |
1399 | + * @cpu_distance_fn: callback to determine distance between cpus, optional | |
1400 | + * | |
1401 | + * This function determines grouping of units, their mappings to cpus | |
1402 | + * and other parameters considering needed percpu size, allocation | |
1403 | + * atom size and distances between CPUs. | |
1404 | + * | |
1405 | + * Groups are always mutliples of atom size and CPUs which are of | |
1406 | + * LOCAL_DISTANCE both ways are grouped together and share space for | |
1407 | + * units in the same group. The returned configuration is guaranteed | |
1408 | + * to have CPUs on different nodes on different groups and >=75% usage | |
1409 | + * of allocated virtual address space. | |
1410 | + * | |
1411 | + * RETURNS: | |
1412 | + * On success, pointer to the new allocation_info is returned. On | |
1413 | + * failure, ERR_PTR value is returned. | |
1414 | + */ | |
1415 | +static struct pcpu_alloc_info * __init pcpu_build_alloc_info( | |
1416 | + size_t reserved_size, size_t dyn_size, | |
1417 | + size_t atom_size, | |
1418 | + pcpu_fc_cpu_distance_fn_t cpu_distance_fn) | |
1419 | +{ | |
1420 | + static int group_map[NR_CPUS] __initdata; | |
1421 | + static int group_cnt[NR_CPUS] __initdata; | |
1422 | + const size_t static_size = __per_cpu_end - __per_cpu_start; | |
1423 | + int nr_groups = 1, nr_units = 0; | |
1424 | + size_t size_sum, min_unit_size, alloc_size; | |
1425 | + int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ | |
1426 | + int last_allocs, group, unit; | |
1427 | + unsigned int cpu, tcpu; | |
1428 | + struct pcpu_alloc_info *ai; | |
1429 | + unsigned int *cpu_map; | |
1430 | + | |
1431 | + /* this function may be called multiple times */ | |
1432 | + memset(group_map, 0, sizeof(group_map)); | |
1433 | + memset(group_cnt, 0, sizeof(group_cnt)); | |
1434 | + | |
1435 | + /* calculate size_sum and ensure dyn_size is enough for early alloc */ | |
1436 | + size_sum = PFN_ALIGN(static_size + reserved_size + | |
1437 | + max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE)); | |
1438 | + dyn_size = size_sum - static_size - reserved_size; | |
1439 | + | |
1440 | + /* | |
1441 | + * Determine min_unit_size, alloc_size and max_upa such that | |
1442 | + * alloc_size is multiple of atom_size and is the smallest | |
1443 | + * which can accomodate 4k aligned segments which are equal to | |
1444 | + * or larger than min_unit_size. | |
1445 | + */ | |
1446 | + min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); | |
1447 | + | |
1448 | + alloc_size = roundup(min_unit_size, atom_size); | |
1449 | + upa = alloc_size / min_unit_size; | |
1450 | + while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | |
1451 | + upa--; | |
1452 | + max_upa = upa; | |
1453 | + | |
1454 | + /* group cpus according to their proximity */ | |
1455 | + for_each_possible_cpu(cpu) { | |
1456 | + group = 0; | |
1457 | + next_group: | |
1458 | + for_each_possible_cpu(tcpu) { | |
1459 | + if (cpu == tcpu) | |
1460 | + break; | |
1461 | + if (group_map[tcpu] == group && cpu_distance_fn && | |
1462 | + (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || | |
1463 | + cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { | |
1464 | + group++; | |
1465 | + nr_groups = max(nr_groups, group + 1); | |
1466 | + goto next_group; | |
1467 | + } | |
1468 | + } | |
1469 | + group_map[cpu] = group; | |
1470 | + group_cnt[group]++; | |
1471 | + } | |
1472 | + | |
1473 | + /* | |
1474 | + * Expand unit size until address space usage goes over 75% | |
1475 | + * and then as much as possible without using more address | |
1476 | + * space. | |
1477 | + */ | |
1478 | + last_allocs = INT_MAX; | |
1479 | + for (upa = max_upa; upa; upa--) { | |
1480 | + int allocs = 0, wasted = 0; | |
1481 | + | |
1482 | + if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) | |
1483 | + continue; | |
1484 | + | |
1485 | + for (group = 0; group < nr_groups; group++) { | |
1486 | + int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); | |
1487 | + allocs += this_allocs; | |
1488 | + wasted += this_allocs * upa - group_cnt[group]; | |
1489 | + } | |
1490 | + | |
1491 | + /* | |
1492 | + * Don't accept if wastage is over 1/3. The | |
1493 | + * greater-than comparison ensures upa==1 always | |
1494 | + * passes the following check. | |
1495 | + */ | |
1496 | + if (wasted > num_possible_cpus() / 3) | |
1497 | + continue; | |
1498 | + | |
1499 | + /* and then don't consume more memory */ | |
1500 | + if (allocs > last_allocs) | |
1501 | + break; | |
1502 | + last_allocs = allocs; | |
1503 | + best_upa = upa; | |
1504 | + } | |
1505 | + upa = best_upa; | |
1506 | + | |
1507 | + /* allocate and fill alloc_info */ | |
1508 | + for (group = 0; group < nr_groups; group++) | |
1509 | + nr_units += roundup(group_cnt[group], upa); | |
1510 | + | |
1511 | + ai = pcpu_alloc_alloc_info(nr_groups, nr_units); | |
1512 | + if (!ai) | |
1513 | + return ERR_PTR(-ENOMEM); | |
1514 | + cpu_map = ai->groups[0].cpu_map; | |
1515 | + | |
1516 | + for (group = 0; group < nr_groups; group++) { | |
1517 | + ai->groups[group].cpu_map = cpu_map; | |
1518 | + cpu_map += roundup(group_cnt[group], upa); | |
1519 | + } | |
1520 | + | |
1521 | + ai->static_size = static_size; | |
1522 | + ai->reserved_size = reserved_size; | |
1523 | + ai->dyn_size = dyn_size; | |
1524 | + ai->unit_size = alloc_size / upa; | |
1525 | + ai->atom_size = atom_size; | |
1526 | + ai->alloc_size = alloc_size; | |
1527 | + | |
1528 | + for (group = 0, unit = 0; group_cnt[group]; group++) { | |
1529 | + struct pcpu_group_info *gi = &ai->groups[group]; | |
1530 | + | |
1531 | + /* | |
1532 | + * Initialize base_offset as if all groups are located | |
1533 | + * back-to-back. The caller should update this to | |
1534 | + * reflect actual allocation. | |
1535 | + */ | |
1536 | + gi->base_offset = unit * ai->unit_size; | |
1537 | + | |
1538 | + for_each_possible_cpu(cpu) | |
1539 | + if (group_map[cpu] == group) | |
1540 | + gi->cpu_map[gi->nr_units++] = cpu; | |
1541 | + gi->nr_units = roundup(gi->nr_units, upa); | |
1542 | + unit += gi->nr_units; | |
1543 | + } | |
1544 | + BUG_ON(unit != nr_units); | |
1545 | + | |
1546 | + return ai; | |
1547 | +} | |
1548 | +#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */ | |
1549 | + | |
1550 | +#if defined(BUILD_EMBED_FIRST_CHUNK) | |
1551 | +/** | |
1521 | 1552 | * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem |
1522 | 1553 | * @reserved_size: the size of reserved percpu area in bytes |
1523 | 1554 | * @dyn_size: minimum free size for dynamic allocation in bytes |
1524 | 1555 | |
... | ... | @@ -1645,10 +1676,9 @@ |
1645 | 1676 | free_bootmem(__pa(areas), areas_size); |
1646 | 1677 | return rc; |
1647 | 1678 | } |
1648 | -#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || | |
1649 | - !CONFIG_HAVE_SETUP_PER_CPU_AREA */ | |
1679 | +#endif /* BUILD_EMBED_FIRST_CHUNK */ | |
1650 | 1680 | |
1651 | -#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | |
1681 | +#ifdef BUILD_PAGE_FIRST_CHUNK | |
1652 | 1682 | /** |
1653 | 1683 | * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages |
1654 | 1684 | * @reserved_size: the size of reserved percpu area in bytes |
1655 | 1685 | |
1656 | 1686 | |
... | ... | @@ -1756,10 +1786,11 @@ |
1756 | 1786 | pcpu_free_alloc_info(ai); |
1757 | 1787 | return rc; |
1758 | 1788 | } |
1759 | -#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ | |
1789 | +#endif /* BUILD_PAGE_FIRST_CHUNK */ | |
1760 | 1790 | |
1791 | +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | |
1761 | 1792 | /* |
1762 | - * Generic percpu area setup. | |
1793 | + * Generic SMP percpu area setup. | |
1763 | 1794 | * |
1764 | 1795 | * The embedding helper is used because its behavior closely resembles |
1765 | 1796 | * the original non-dynamic generic percpu area setup. This is |
... | ... | @@ -1770,7 +1801,6 @@ |
1770 | 1801 | * on the physical linear memory mapping which uses large page |
1771 | 1802 | * mappings on applicable archs. |
1772 | 1803 | */ |
1773 | -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | |
1774 | 1804 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
1775 | 1805 | EXPORT_SYMBOL(__per_cpu_offset); |
1776 | 1806 | |
1777 | 1807 | |
... | ... | @@ -1799,13 +1829,48 @@ |
1799 | 1829 | PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, |
1800 | 1830 | pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); |
1801 | 1831 | if (rc < 0) |
1802 | - panic("Failed to initialized percpu areas."); | |
1832 | + panic("Failed to initialize percpu areas."); | |
1803 | 1833 | |
1804 | 1834 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; |
1805 | 1835 | for_each_possible_cpu(cpu) |
1806 | 1836 | __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; |
1807 | 1837 | } |
1808 | -#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ | |
1838 | +#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ | |
1839 | + | |
1840 | +#else /* CONFIG_SMP */ | |
1841 | + | |
1842 | +/* | |
1843 | + * UP percpu area setup. | |
1844 | + * | |
1845 | + * UP always uses km-based percpu allocator with identity mapping. | |
1846 | + * Static percpu variables are indistinguishable from the usual static | |
1847 | + * variables and don't require any special preparation. | |
1848 | + */ | |
1849 | +void __init setup_per_cpu_areas(void) | |
1850 | +{ | |
1851 | + const size_t unit_size = | |
1852 | + roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE, | |
1853 | + PERCPU_DYNAMIC_RESERVE)); | |
1854 | + struct pcpu_alloc_info *ai; | |
1855 | + void *fc; | |
1856 | + | |
1857 | + ai = pcpu_alloc_alloc_info(1, 1); | |
1858 | + fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); | |
1859 | + if (!ai || !fc) | |
1860 | + panic("Failed to allocate memory for percpu areas."); | |
1861 | + | |
1862 | + ai->dyn_size = unit_size; | |
1863 | + ai->unit_size = unit_size; | |
1864 | + ai->atom_size = unit_size; | |
1865 | + ai->alloc_size = unit_size; | |
1866 | + ai->groups[0].nr_units = 1; | |
1867 | + ai->groups[0].cpu_map[0] = 0; | |
1868 | + | |
1869 | + if (pcpu_setup_first_chunk(ai, fc) < 0) | |
1870 | + panic("Failed to initialize percpu areas."); | |
1871 | +} | |
1872 | + | |
1873 | +#endif /* CONFIG_SMP */ | |
1809 | 1874 | |
1810 | 1875 | /* |
1811 | 1876 | * First and reserved chunks are initialized with temporary allocation |
mm/percpu_up.c
1 | -/* | |
2 | - * mm/percpu_up.c - dummy percpu memory allocator implementation for UP | |
3 | - */ | |
4 | - | |
5 | -#include <linux/module.h> | |
6 | -#include <linux/percpu.h> | |
7 | -#include <linux/slab.h> | |
8 | - | |
9 | -void __percpu *__alloc_percpu(size_t size, size_t align) | |
10 | -{ | |
11 | - /* | |
12 | - * Can't easily make larger alignment work with kmalloc. WARN | |
13 | - * on it. Larger alignment should only be used for module | |
14 | - * percpu sections on SMP for which this path isn't used. | |
15 | - */ | |
16 | - WARN_ON_ONCE(align > SMP_CACHE_BYTES); | |
17 | - return (void __percpu __force *)kzalloc(size, GFP_KERNEL); | |
18 | -} | |
19 | -EXPORT_SYMBOL_GPL(__alloc_percpu); | |
20 | - | |
21 | -void free_percpu(void __percpu *p) | |
22 | -{ | |
23 | - kfree(this_cpu_ptr(p)); | |
24 | -} | |
25 | -EXPORT_SYMBOL_GPL(free_percpu); | |
26 | - | |
27 | -phys_addr_t per_cpu_ptr_to_phys(void *addr) | |
28 | -{ | |
29 | - return __pa(addr); | |
30 | -} |
mm/vmalloc.c
... | ... | @@ -2065,6 +2065,7 @@ |
2065 | 2065 | } |
2066 | 2066 | EXPORT_SYMBOL_GPL(free_vm_area); |
2067 | 2067 | |
2068 | +#ifdef CONFIG_SMP | |
2068 | 2069 | static struct vmap_area *node_to_va(struct rb_node *n) |
2069 | 2070 | { |
2070 | 2071 | return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; |
... | ... | @@ -2345,6 +2346,7 @@ |
2345 | 2346 | free_vm_area(vms[i]); |
2346 | 2347 | kfree(vms); |
2347 | 2348 | } |
2349 | +#endif /* CONFIG_SMP */ | |
2348 | 2350 | |
2349 | 2351 | #ifdef CONFIG_PROC_FS |
2350 | 2352 | static void *s_start(struct seq_file *m, loff_t *pos) |