Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: percpu: update comments to reflect that percpu allocations are always zero-filled percpu: Optimize __get_cpu_var() x86, percpu: Optimize this_cpu_ptr percpu: clear memory allocated with the km allocator percpu: fix build breakage on s390 and cleanup build configuration tests percpu: use percpu allocator on UP too percpu: reduce PCPU_MIN_UNIT_SIZE to 32k vmalloc: pcpu_get/free_vm_areas() aren't needed on UP Fixed up trivial conflicts in include/linux/percpu.h

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: percpu: update comments to reflect that percpu allocations are always zero-filled percpu: Optimize __get_cpu_var() x86, percpu: Optimize this_cpu_ptr percpu: clear memory allocated with the km allocator percpu: fix build breakage on s390 and cleanup build configuration tests percpu: use percpu allocator on UP too percpu: reduce PCPU_MIN_UNIT_SIZE to 32k vmalloc: pcpu_get/free_vm_areas() aren't needed on UP Fixed up trivial conflicts in include/linux/percpu.h
Linus Torvalds
2 parents 91b745016c 9329ba9704
Showing 10 changed files Side-by-side Diff
arch/x86/include/asm/percpu.h
include/asm-generic/percpu.h
include/linux/percpu.h
include/linux/vmalloc.h
mm/Kconfig
mm/Makefile
mm/percpu-km.c
mm/percpu.c
mm/percpu_up.c
mm/vmalloc.c
@@ -47,6 +47,20 @@
 #ifdef CONFIG_SMP
 #define __percpu_arg(x)		"%%"__stringify(__percpu_seg)":%P" #x
 #define __my_cpu_offset		percpu_read(this_cpu_off)
+
+/*
+ * Compared to the generic __my_cpu_offset version, the following
+ * saves one instruction and avoids clobbering a temp register.
+ */
+#define __this_cpu_ptr(ptr)				\
+({							\
+	unsigned long tcp_ptr__;			\
+	__verify_pcpu_ptr(ptr);				\
+	asm volatile("add " __percpu_arg(1) ", %0"	\
+		     : "=r" (tcp_ptr__)			\
+		     : "m" (this_cpu_off), "0" (ptr));	\
+	(typeof(*(ptr)) __kernel __force *)tcp_ptr__;	\
+})
 #else
 #define __percpu_arg(x)		"%P" #x
 #endif
@@ -55,14 +55,18 @@
  */
 #define per_cpu(var, cpu) \
 	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
-#define __get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
-#define __raw_get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
  
-#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#ifndef __this_cpu_ptr
 #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
+#endif
+#ifdef CONFIG_DEBUG_PREEMPT
+#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
+#else
+#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
+#endif
  
+#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
+#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
  
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void setup_per_cpu_areas(void);
@@ -48,10 +48,8 @@
 	preempt_enable();				\
 } while (0)
  
-#ifdef CONFIG_SMP
-
 /* minimum unit size, also is the maximum supported allocation size */
-#define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10)
+#define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
  
 /*
  * Percpu allocator can serve percpu allocations before slab is
  
  
  
@@ -146,36 +144,19 @@
  * dynamically allocated. Non-atomic access to the current CPU's
  * version should probably be combined with get_cpu()/put_cpu().
  */
+#ifdef CONFIG_SMP
 #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
+#else
+#define per_cpu_ptr(ptr, cpu)	({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
+#endif
  
 extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
 extern bool is_kernel_percpu_address(unsigned long addr);
  
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
 extern void __init setup_per_cpu_areas(void);
 #endif
 extern void __init percpu_init_late(void);
-
-#else /* CONFIG_SMP */
-
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
-
-/* can't distinguish from other static vars, always false */
-static inline bool is_kernel_percpu_address(unsigned long addr)
-{
-	return false;
-}
-
-static inline void __init setup_per_cpu_areas(void) { }
-
-static inline void __init percpu_init_late(void) { }
-
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-	return NULL;
-}
-
-#endif /* CONFIG_SMP */
  
 extern void __percpu *__alloc_percpu(size_t size, size_t align);
 extern void free_percpu(void __percpu *__pdata);
@@ -117,11 +117,13 @@
 extern struct vm_struct *vmlist;
 extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
  
+#ifdef CONFIG_SMP
 struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 				     const size_t *sizes, int nr_vms,
 				     size_t align, gfp_t gfp_mask);
  
 void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
+#endif
  
 #endif /* _LINUX_VMALLOC_H */
@@ -301,4 +301,12 @@
 	  of 1 says that all excess pages should be trimmed.
  
 	  See Documentation/nommu-mmap.txt for more information.
+
+#
+# UP and nommu archs use km based percpu allocator
+#
+config NEED_PER_CPU_KM
+	depends on !SMP
+	bool
+	default y
@@ -11,7 +11,7 @@
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o mmu_context.o \
+			   page_isolation.o mm_init.o mmu_context.o percpu.o \
 			   $(mmu-y)
 obj-y += init-mm.o
  
@@ -36,11 +36,6 @@
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-ifdef CONFIG_SMP
-obj-y += percpu.o
-else
-obj-y += percpu_up.o
-endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
@@ -27,7 +27,7 @@
  *   chunk size is not aligned.  percpu-km code will whine about it.
  */
  
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
 #error "contiguous percpu allocation is incompatible with paged first chunk"
 #endif
  
@@ -35,7 +35,11 @@
  
 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
 {
-	/* noop */
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu)
+		memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
+
 	return 0;
 }
  
@@ -76,6 +76,7 @@
 #define PCPU_SLOT_BASE_SHIFT		5	/* 1-31 shares the same slot */
 #define PCPU_DFL_MAP_ALLOC		16	/* start a map with 16 ents */
  
+#ifdef CONFIG_SMP
 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
 #ifndef __addr_to_pcpu_ptr
 #define __addr_to_pcpu_ptr(addr)					\
@@ -89,6 +90,11 @@
 			 (unsigned long)pcpu_base_addr -		\
 			 (unsigned long)__per_cpu_start)
 #endif
+#else	/* CONFIG_SMP */
+/* on UP, it's always identity mapped */
+#define __addr_to_pcpu_ptr(addr)	(void __percpu *)(addr)
+#define __pcpu_ptr_to_addr(ptr)		(void __force *)(ptr)
+#endif	/* CONFIG_SMP */
  
 struct pcpu_chunk {
 	struct list_head	list;		/* linked to pcpu_slot lists */
@@ -820,8 +826,8 @@
  * @size: size of area to allocate in bytes
  * @align: alignment of area (max PAGE_SIZE)
  *
- * Allocate percpu area of @size bytes aligned at @align.  Might
- * sleep.  Might trigger writeouts.
+ * Allocate zero-filled percpu area of @size bytes aligned at @align.
+ * Might sleep.  Might trigger writeouts.
  *
  * CONTEXT:
  * Does GFP_KERNEL allocation.
@@ -840,9 +846,10 @@
  * @size: size of area to allocate in bytes
  * @align: alignment of area (max PAGE_SIZE)
  *
- * Allocate percpu area of @size bytes aligned at @align from reserved
- * percpu area if arch has set it up; otherwise, allocation is served
- * from the same dynamic area.  Might sleep.  Might trigger writeouts.
+ * Allocate zero-filled percpu area of @size bytes aligned at @align
+ * from reserved percpu area if arch has set it up; otherwise,
+ * allocation is served from the same dynamic area.  Might sleep.
+ * Might trigger writeouts.
  *
  * CONTEXT:
  * Does GFP_KERNEL allocation.
@@ -949,6 +956,7 @@
  */
 bool is_kernel_percpu_address(unsigned long addr)
 {
+#ifdef CONFIG_SMP
 	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
 	unsigned int cpu;
@@ -959,6 +967,8 @@
 		if ((void *)addr >= start && (void *)addr < start + static_size)
 			return true;
         }
+#endif
+	/* on UP, can't distinguish from other static vars, always false */
 	return false;
 }
  
@@ -1067,161 +1077,6 @@
 }
  
 /**
- * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
- * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: minimum free size for dynamic allocation in bytes
- * @atom_size: allocation atom size
- * @cpu_distance_fn: callback to determine distance between cpus, optional
- *
- * This function determines grouping of units, their mappings to cpus
- * and other parameters considering needed percpu size, allocation
- * atom size and distances between CPUs.
- *
- * Groups are always mutliples of atom size and CPUs which are of
- * LOCAL_DISTANCE both ways are grouped together and share space for
- * units in the same group.  The returned configuration is guaranteed
- * to have CPUs on different nodes on different groups and >=75% usage
- * of allocated virtual address space.
- *
- * RETURNS:
- * On success, pointer to the new allocation_info is returned.  On
- * failure, ERR_PTR value is returned.
- */
-static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
-				size_t reserved_size, size_t dyn_size,
-				size_t atom_size,
-				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
-{
-	static int group_map[NR_CPUS] __initdata;
-	static int group_cnt[NR_CPUS] __initdata;
-	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	int nr_groups = 1, nr_units = 0;
-	size_t size_sum, min_unit_size, alloc_size;
-	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
-	int last_allocs, group, unit;
-	unsigned int cpu, tcpu;
-	struct pcpu_alloc_info *ai;
-	unsigned int *cpu_map;
-
-	/* this function may be called multiple times */
-	memset(group_map, 0, sizeof(group_map));
-	memset(group_cnt, 0, sizeof(group_cnt));
-
-	/* calculate size_sum and ensure dyn_size is enough for early alloc */
-	size_sum = PFN_ALIGN(static_size + reserved_size +
-			    max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
-	dyn_size = size_sum - static_size - reserved_size;
-
-	/*
-	 * Determine min_unit_size, alloc_size and max_upa such that
-	 * alloc_size is multiple of atom_size and is the smallest
-	 * which can accomodate 4k aligned segments which are equal to
-	 * or larger than min_unit_size.
-	 */
-	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-
-	alloc_size = roundup(min_unit_size, atom_size);
-	upa = alloc_size / min_unit_size;
-	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
-		upa--;
-	max_upa = upa;
-
-	/* group cpus according to their proximity */
-	for_each_possible_cpu(cpu) {
-		group = 0;
-	next_group:
-		for_each_possible_cpu(tcpu) {
-			if (cpu == tcpu)
-				break;
-			if (group_map[tcpu] == group && cpu_distance_fn &&
-			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
-			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
-				group++;
-				nr_groups = max(nr_groups, group + 1);
-				goto next_group;
-			}
-		}
-		group_map[cpu] = group;
-		group_cnt[group]++;
-	}
-
-	/*
-	 * Expand unit size until address space usage goes over 75%
-	 * and then as much as possible without using more address
-	 * space.
-	 */
-	last_allocs = INT_MAX;
-	for (upa = max_upa; upa; upa--) {
-		int allocs = 0, wasted = 0;
-
-		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
-			continue;
-
-		for (group = 0; group < nr_groups; group++) {
-			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
-			allocs += this_allocs;
-			wasted += this_allocs * upa - group_cnt[group];
-		}
-
-		/*
-		 * Don't accept if wastage is over 1/3.  The
-		 * greater-than comparison ensures upa==1 always
-		 * passes the following check.
-		 */
-		if (wasted > num_possible_cpus() / 3)
-			continue;
-
-		/* and then don't consume more memory */
-		if (allocs > last_allocs)
-			break;
-		last_allocs = allocs;
-		best_upa = upa;
-	}
-	upa = best_upa;
-
-	/* allocate and fill alloc_info */
-	for (group = 0; group < nr_groups; group++)
-		nr_units += roundup(group_cnt[group], upa);
-
-	ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
-	if (!ai)
-		return ERR_PTR(-ENOMEM);
-	cpu_map = ai->groups[0].cpu_map;
-
-	for (group = 0; group < nr_groups; group++) {
-		ai->groups[group].cpu_map = cpu_map;
-		cpu_map += roundup(group_cnt[group], upa);
-	}
-
-	ai->static_size = static_size;
-	ai->reserved_size = reserved_size;
-	ai->dyn_size = dyn_size;
-	ai->unit_size = alloc_size / upa;
-	ai->atom_size = atom_size;
-	ai->alloc_size = alloc_size;
-
-	for (group = 0, unit = 0; group_cnt[group]; group++) {
-		struct pcpu_group_info *gi = &ai->groups[group];
-
-		/*
-		 * Initialize base_offset as if all groups are located
-		 * back-to-back.  The caller should update this to
-		 * reflect actual allocation.
-		 */
-		gi->base_offset = unit * ai->unit_size;
-
-		for_each_possible_cpu(cpu)
-			if (group_map[cpu] == group)
-				gi->cpu_map[gi->nr_units++] = cpu;
-		gi->nr_units = roundup(gi->nr_units, upa);
-		unit += gi->nr_units;
-	}
-	BUG_ON(unit != nr_units);
-
-	return ai;
-}
-
-/**
  * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
  * @lvl: loglevel
  * @ai: allocation info to dump
  
@@ -1363,7 +1218,9 @@
  
 	/* sanity checks */
 	PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
+#ifdef CONFIG_SMP
 	PCPU_SETUP_BUG_ON(!ai->static_size);
+#endif
 	PCPU_SETUP_BUG_ON(!base_addr);
 	PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
 	PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
@@ -1488,6 +1345,8 @@
 	return 0;
 }
  
+#ifdef CONFIG_SMP
+
 const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
 	[PCPU_FC_AUTO]	= "auto",
 	[PCPU_FC_EMBED]	= "embed",
  
  
@@ -1515,9 +1374,181 @@
 }
 early_param("percpu_alloc", percpu_alloc_setup);
  
+/*
+ * pcpu_embed_first_chunk() is used by the generic percpu setup.
+ * Build it if needed by the arch config or the generic setup is going
+ * to be used.
+ */
 #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
 	!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
+#define BUILD_EMBED_FIRST_CHUNK
+#endif
+
+/* build pcpu_page_first_chunk() iff needed by the arch config */
+#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
+#define BUILD_PAGE_FIRST_CHUNK
+#endif
+
+/* pcpu_build_alloc_info() is used by both embed and page first chunk */
+#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
 /**
+ * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_size: minimum free size for dynamic allocation in bytes
+ * @atom_size: allocation atom size
+ * @cpu_distance_fn: callback to determine distance between cpus, optional
+ *
+ * This function determines grouping of units, their mappings to cpus
+ * and other parameters considering needed percpu size, allocation
+ * atom size and distances between CPUs.
+ *
+ * Groups are always mutliples of atom size and CPUs which are of
+ * LOCAL_DISTANCE both ways are grouped together and share space for
+ * units in the same group.  The returned configuration is guaranteed
+ * to have CPUs on different nodes on different groups and >=75% usage
+ * of allocated virtual address space.
+ *
+ * RETURNS:
+ * On success, pointer to the new allocation_info is returned.  On
+ * failure, ERR_PTR value is returned.
+ */
+static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+				size_t reserved_size, size_t dyn_size,
+				size_t atom_size,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+	static int group_map[NR_CPUS] __initdata;
+	static int group_cnt[NR_CPUS] __initdata;
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
+	int nr_groups = 1, nr_units = 0;
+	size_t size_sum, min_unit_size, alloc_size;
+	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
+	int last_allocs, group, unit;
+	unsigned int cpu, tcpu;
+	struct pcpu_alloc_info *ai;
+	unsigned int *cpu_map;
+
+	/* this function may be called multiple times */
+	memset(group_map, 0, sizeof(group_map));
+	memset(group_cnt, 0, sizeof(group_cnt));
+
+	/* calculate size_sum and ensure dyn_size is enough for early alloc */
+	size_sum = PFN_ALIGN(static_size + reserved_size +
+			    max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
+	dyn_size = size_sum - static_size - reserved_size;
+
+	/*
+	 * Determine min_unit_size, alloc_size and max_upa such that
+	 * alloc_size is multiple of atom_size and is the smallest
+	 * which can accomodate 4k aligned segments which are equal to
+	 * or larger than min_unit_size.
+	 */
+	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+
+	alloc_size = roundup(min_unit_size, atom_size);
+	upa = alloc_size / min_unit_size;
+	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+		upa--;
+	max_upa = upa;
+
+	/* group cpus according to their proximity */
+	for_each_possible_cpu(cpu) {
+		group = 0;
+	next_group:
+		for_each_possible_cpu(tcpu) {
+			if (cpu == tcpu)
+				break;
+			if (group_map[tcpu] == group && cpu_distance_fn &&
+			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
+			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
+				group++;
+				nr_groups = max(nr_groups, group + 1);
+				goto next_group;
+			}
+		}
+		group_map[cpu] = group;
+		group_cnt[group]++;
+	}
+
+	/*
+	 * Expand unit size until address space usage goes over 75%
+	 * and then as much as possible without using more address
+	 * space.
+	 */
+	last_allocs = INT_MAX;
+	for (upa = max_upa; upa; upa--) {
+		int allocs = 0, wasted = 0;
+
+		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+			continue;
+
+		for (group = 0; group < nr_groups; group++) {
+			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
+			allocs += this_allocs;
+			wasted += this_allocs * upa - group_cnt[group];
+		}
+
+		/*
+		 * Don't accept if wastage is over 1/3.  The
+		 * greater-than comparison ensures upa==1 always
+		 * passes the following check.
+		 */
+		if (wasted > num_possible_cpus() / 3)
+			continue;
+
+		/* and then don't consume more memory */
+		if (allocs > last_allocs)
+			break;
+		last_allocs = allocs;
+		best_upa = upa;
+	}
+	upa = best_upa;
+
+	/* allocate and fill alloc_info */
+	for (group = 0; group < nr_groups; group++)
+		nr_units += roundup(group_cnt[group], upa);
+
+	ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
+	if (!ai)
+		return ERR_PTR(-ENOMEM);
+	cpu_map = ai->groups[0].cpu_map;
+
+	for (group = 0; group < nr_groups; group++) {
+		ai->groups[group].cpu_map = cpu_map;
+		cpu_map += roundup(group_cnt[group], upa);
+	}
+
+	ai->static_size = static_size;
+	ai->reserved_size = reserved_size;
+	ai->dyn_size = dyn_size;
+	ai->unit_size = alloc_size / upa;
+	ai->atom_size = atom_size;
+	ai->alloc_size = alloc_size;
+
+	for (group = 0, unit = 0; group_cnt[group]; group++) {
+		struct pcpu_group_info *gi = &ai->groups[group];
+
+		/*
+		 * Initialize base_offset as if all groups are located
+		 * back-to-back.  The caller should update this to
+		 * reflect actual allocation.
+		 */
+		gi->base_offset = unit * ai->unit_size;
+
+		for_each_possible_cpu(cpu)
+			if (group_map[cpu] == group)
+				gi->cpu_map[gi->nr_units++] = cpu;
+		gi->nr_units = roundup(gi->nr_units, upa);
+		unit += gi->nr_units;
+	}
+	BUG_ON(unit != nr_units);
+
+	return ai;
+}
+#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
+
+#if defined(BUILD_EMBED_FIRST_CHUNK)
+/**
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_size: minimum free size for dynamic allocation in bytes
  
@@ -1645,10 +1676,9 @@
 		free_bootmem(__pa(areas), areas_size);
 	return rc;
 }
-#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
-	  !CONFIG_HAVE_SETUP_PER_CPU_AREA */
+#endif /* BUILD_EMBED_FIRST_CHUNK */
  
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+#ifdef BUILD_PAGE_FIRST_CHUNK
 /**
  * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
  * @reserved_size: the size of reserved percpu area in bytes
  
  
@@ -1756,10 +1786,11 @@
 	pcpu_free_alloc_info(ai);
 	return rc;
 }
-#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
+#endif /* BUILD_PAGE_FIRST_CHUNK */
  
+#ifndef	CONFIG_HAVE_SETUP_PER_CPU_AREA
 /*
- * Generic percpu area setup.
+ * Generic SMP percpu area setup.
  *
  * The embedding helper is used because its behavior closely resembles
  * the original non-dynamic generic percpu area setup.  This is
@@ -1770,7 +1801,6 @@
  * on the physical linear memory mapping which uses large page
  * mappings on applicable archs.
  */
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
  
  
@@ -1799,13 +1829,48 @@
 				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
 				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
 	if (rc < 0)
-		panic("Failed to initialized percpu areas.");
+		panic("Failed to initialize percpu areas.");
  
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu)
 		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
 }
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+#endif	/* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+
+#else	/* CONFIG_SMP */
+
+/*
+ * UP percpu area setup.
+ *
+ * UP always uses km-based percpu allocator with identity mapping.
+ * Static percpu variables are indistinguishable from the usual static
+ * variables and don't require any special preparation.
+ */
+void __init setup_per_cpu_areas(void)
+{
+	const size_t unit_size =
+		roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
+					 PERCPU_DYNAMIC_RESERVE));
+	struct pcpu_alloc_info *ai;
+	void *fc;
+
+	ai = pcpu_alloc_alloc_info(1, 1);
+	fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	if (!ai || !fc)
+		panic("Failed to allocate memory for percpu areas.");
+
+	ai->dyn_size = unit_size;
+	ai->unit_size = unit_size;
+	ai->atom_size = unit_size;
+	ai->alloc_size = unit_size;
+	ai->groups[0].nr_units = 1;
+	ai->groups[0].cpu_map[0] = 0;
+
+	if (pcpu_setup_first_chunk(ai, fc) < 0)
+		panic("Failed to initialize percpu areas.");
+}
+
+#endif	/* CONFIG_SMP */
  
 /*
  * First and reserved chunks are initialized with temporary allocation
-/*
- * mm/percpu_up.c - dummy percpu memory allocator implementation for UP
- */
-
-#include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/slab.h>
-
-void __percpu *__alloc_percpu(size_t size, size_t align)
-{
-	/*
-	 * Can't easily make larger alignment work with kmalloc.  WARN
-	 * on it.  Larger alignment should only be used for module
-	 * percpu sections on SMP for which this path isn't used.
-	 */
-	WARN_ON_ONCE(align > SMP_CACHE_BYTES);
-	return (void __percpu __force *)kzalloc(size, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(__alloc_percpu);
-
-void free_percpu(void __percpu *p)
-{
-	kfree(this_cpu_ptr(p));
-}
-EXPORT_SYMBOL_GPL(free_percpu);
-
-phys_addr_t per_cpu_ptr_to_phys(void *addr)
-{
-	return __pa(addr);
-}
@@ -2065,6 +2065,7 @@
 }
 EXPORT_SYMBOL_GPL(free_vm_area);
  
+#ifdef CONFIG_SMP
 static struct vmap_area *node_to_va(struct rb_node *n)
 {
 	return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
@@ -2345,6 +2346,7 @@
 		free_vm_area(vms[i]);
 	kfree(vms);
 }
+#endif	/* CONFIG_SMP */
  
 #ifdef CONFIG_PROC_FS
 static void *s_start(struct seq_file *m, loff_t *pos)
...	...	@@ -47,6 +47,20 @@
47	47	#ifdef CONFIG_SMP
48	48	#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
49	49	#define __my_cpu_offset percpu_read(this_cpu_off)
	50	+
	51	+/*
	52	+ * Compared to the generic __my_cpu_offset version, the following
	53	+ * saves one instruction and avoids clobbering a temp register.
	54	+ */
	55	+#define __this_cpu_ptr(ptr) \
	56	+({ \
	57	+ unsigned long tcp_ptr__; \
	58	+ __verify_pcpu_ptr(ptr); \
	59	+ asm volatile("add " __percpu_arg(1) ", %0" \
	60	+ : "=r" (tcp_ptr__) \
	61	+ : "m" (this_cpu_off), "0" (ptr)); \
	62	+ (typeof((ptr)) __kernel __force )tcp_ptr__; \
	63	+})
50	64	#else
51	65	#define __percpu_arg(x) "%P" #x
52	66	#endif
...	...	@@ -55,14 +55,18 @@
55	55	*/
56	56	#define per_cpu(var, cpu) \
57	57	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
58		-#define __get_cpu_var(var) \
59		- (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
60		-#define __raw_get_cpu_var(var) \
61		- (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
62	58
63		-#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
	59	+#ifndef __this_cpu_ptr
64	60	#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
	61	+#endif
	62	+#ifdef CONFIG_DEBUG_PREEMPT
	63	+#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
	64	+#else
	65	+#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
	66	+#endif
65	67
	68	+#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
	69	+#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
66	70
67	71	#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
68	72	extern void setup_per_cpu_areas(void);
...	...	@@ -48,10 +48,8 @@
48	48	preempt_enable(); \
49	49	} while (0)
50	50
51		-#ifdef CONFIG_SMP
52		-
53	51	/* minimum unit size, also is the maximum supported allocation size */
54		-#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10)
	52	+#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
55	53
56	54	/*
57	55	* Percpu allocator can serve percpu allocations before slab is
58	56
59	57
60	58
...	...	@@ -146,36 +144,19 @@
146	144	* dynamically allocated. Non-atomic access to the current CPU's
147	145	* version should probably be combined with get_cpu()/put_cpu().
148	146	*/
	147	+#ifdef CONFIG_SMP
149	148	#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
	149	+#else
	150	+#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
	151	+#endif
150	152
151	153	extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
152	154	extern bool is_kernel_percpu_address(unsigned long addr);
153	155
154		-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
	156	+#if !defined(CONFIG_SMP) \|\| !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
155	157	extern void __init setup_per_cpu_areas(void);
156	158	#endif
157	159	extern void __init percpu_init_late(void);
158		-
159		-#else /* CONFIG_SMP */
160		-
161		-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
162		-
163		-/* can't distinguish from other static vars, always false */
164		-static inline bool is_kernel_percpu_address(unsigned long addr)
165		-{
166		- return false;
167		-}
168		-
169		-static inline void __init setup_per_cpu_areas(void) { }
170		-
171		-static inline void __init percpu_init_late(void) { }
172		-
173		-static inline void pcpu_lpage_remapped(void kaddr)
174		-{
175		- return NULL;
176		-}
177		-
178		-#endif /* CONFIG_SMP */
179	160
180	161	extern void __percpu *__alloc_percpu(size_t size, size_t align);
181	162	extern void free_percpu(void __percpu *__pdata);
...	...	@@ -117,11 +117,13 @@
117	117	extern struct vm_struct *vmlist;
118	118	extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
119	119
	120	+#ifdef CONFIG_SMP
120	121	struct vm_struct *pcpu_get_vm_areas(const unsigned long offsets,
121	122	const size_t *sizes, int nr_vms,
122	123	size_t align, gfp_t gfp_mask);
123	124
124	125	void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
	126	+#endif
125	127
126	128	#endif /* _LINUX_VMALLOC_H */
...	...	@@ -301,4 +301,12 @@
301	301	of 1 says that all excess pages should be trimmed.
302	302
303	303	See Documentation/nommu-mmap.txt for more information.
	304	+
	305	+#
	306	+# UP and nommu archs use km based percpu allocator
	307	+#
	308	+config NEED_PER_CPU_KM
	309	+ depends on !SMP
	310	+ bool
	311	+ default y
...	...	@@ -11,7 +11,7 @@
11	11	maccess.o page_alloc.o page-writeback.o \
12	12	readahead.o swap.o truncate.o vmscan.o shmem.o \
13	13	prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
14		- page_isolation.o mm_init.o mmu_context.o \
	14	+ page_isolation.o mm_init.o mmu_context.o percpu.o \
15	15	$(mmu-y)
16	16	obj-y += init-mm.o
17	17
...	...	@@ -36,11 +36,6 @@
36	36	obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
37	37	obj-$(CONFIG_FS_XIP) += filemap_xip.o
38	38	obj-$(CONFIG_MIGRATION) += migrate.o
39		-ifdef CONFIG_SMP
40		-obj-y += percpu.o
41		-else
42		-obj-y += percpu_up.o
43		-endif
44	39	obj-$(CONFIG_QUICKLIST) += quicklist.o
45	40	obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
46	41	obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
...	...	@@ -27,7 +27,7 @@
27	27	* chunk size is not aligned. percpu-km code will whine about it.
28	28	*/
29	29
30		-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
	30	+#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
31	31	#error "contiguous percpu allocation is incompatible with paged first chunk"
32	32	#endif
33	33
...	...	@@ -35,7 +35,11 @@
35	35
36	36	static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
37	37	{
38		- /* noop */
	38	+ unsigned int cpu;
	39	+
	40	+ for_each_possible_cpu(cpu)
	41	+ memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
	42	+
39	43	return 0;
40	44	}
41	45
...	...	@@ -76,6 +76,7 @@
76	76	#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
77	77	#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
78	78
	79	+#ifdef CONFIG_SMP
79	80	/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
80	81	#ifndef __addr_to_pcpu_ptr
81	82	#define __addr_to_pcpu_ptr(addr) \
...	...	@@ -89,6 +90,11 @@
89	90	(unsigned long)pcpu_base_addr - \
90	91	(unsigned long)__per_cpu_start)
91	92	#endif
	93	+#else /* CONFIG_SMP */
	94	+/* on UP, it's always identity mapped */
	95	+#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr)
	96	+#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
	97	+#endif /* CONFIG_SMP */
92	98
93	99	struct pcpu_chunk {
94	100	struct list_head list; /* linked to pcpu_slot lists */
...	...	@@ -820,8 +826,8 @@
820	826	* @size: size of area to allocate in bytes
821	827	* @align: alignment of area (max PAGE_SIZE)
822	828	*
823		- * Allocate percpu area of @size bytes aligned at @align. Might
824		- * sleep. Might trigger writeouts.
	829	+ * Allocate zero-filled percpu area of @size bytes aligned at @align.
	830	+ * Might sleep. Might trigger writeouts.
825	831	*
826	832	* CONTEXT:
827	833	* Does GFP_KERNEL allocation.
...	...	@@ -840,9 +846,10 @@
840	846	* @size: size of area to allocate in bytes
841	847	* @align: alignment of area (max PAGE_SIZE)
842	848	*
843		- * Allocate percpu area of @size bytes aligned at @align from reserved
844		- * percpu area if arch has set it up; otherwise, allocation is served
845		- * from the same dynamic area. Might sleep. Might trigger writeouts.
	849	+ * Allocate zero-filled percpu area of @size bytes aligned at @align
	850	+ * from reserved percpu area if arch has set it up; otherwise,
	851	+ * allocation is served from the same dynamic area. Might sleep.
	852	+ * Might trigger writeouts.
846	853	*
847	854	* CONTEXT:
848	855	* Does GFP_KERNEL allocation.
...	...	@@ -949,6 +956,7 @@
949	956	*/
950	957	bool is_kernel_percpu_address(unsigned long addr)
951	958	{
	959	+#ifdef CONFIG_SMP
952	960	const size_t static_size = __per_cpu_end - __per_cpu_start;
953	961	void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
954	962	unsigned int cpu;
...	...	@@ -959,6 +967,8 @@
959	967	if ((void )addr >= start && (void )addr < start + static_size)
960	968	return true;
961	969	}
	970	+#endif
	971	+ /* on UP, can't distinguish from other static vars, always false */
962	972	return false;
963	973	}
964	974
...	...	@@ -1067,161 +1077,6 @@
1067	1077	}
1068	1078
1069	1079	/**
1070		- * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
1071		- * @reserved_size: the size of reserved percpu area in bytes
1072		- * @dyn_size: minimum free size for dynamic allocation in bytes
1073		- * @atom_size: allocation atom size
1074		- * @cpu_distance_fn: callback to determine distance between cpus, optional
1075		- *
1076		- * This function determines grouping of units, their mappings to cpus
1077		- * and other parameters considering needed percpu size, allocation
1078		- * atom size and distances between CPUs.
1079		- *
1080		- * Groups are always mutliples of atom size and CPUs which are of
1081		- * LOCAL_DISTANCE both ways are grouped together and share space for
1082		- * units in the same group. The returned configuration is guaranteed
1083		- * to have CPUs on different nodes on different groups and >=75% usage
1084		- * of allocated virtual address space.
1085		- *
1086		- * RETURNS:
1087		- * On success, pointer to the new allocation_info is returned. On
1088		- * failure, ERR_PTR value is returned.
1089		- */
1090		-static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1091		- size_t reserved_size, size_t dyn_size,
1092		- size_t atom_size,
1093		- pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1094		-{
1095		- static int group_map[NR_CPUS] __initdata;
1096		- static int group_cnt[NR_CPUS] __initdata;
1097		- const size_t static_size = __per_cpu_end - __per_cpu_start;
1098		- int nr_groups = 1, nr_units = 0;
1099		- size_t size_sum, min_unit_size, alloc_size;
1100		- int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
1101		- int last_allocs, group, unit;
1102		- unsigned int cpu, tcpu;
1103		- struct pcpu_alloc_info *ai;
1104		- unsigned int *cpu_map;
1105		-
1106		- /* this function may be called multiple times */
1107		- memset(group_map, 0, sizeof(group_map));
1108		- memset(group_cnt, 0, sizeof(group_cnt));
1109		-
1110		- /* calculate size_sum and ensure dyn_size is enough for early alloc */
1111		- size_sum = PFN_ALIGN(static_size + reserved_size +
1112		- max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
1113		- dyn_size = size_sum - static_size - reserved_size;
1114		-
1115		- /*
1116		- * Determine min_unit_size, alloc_size and max_upa such that
1117		- * alloc_size is multiple of atom_size and is the smallest
1118		- * which can accomodate 4k aligned segments which are equal to
1119		- * or larger than min_unit_size.
1120		- */
1121		- min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1122		-
1123		- alloc_size = roundup(min_unit_size, atom_size);
1124		- upa = alloc_size / min_unit_size;
1125		- while (alloc_size % upa \|\| ((alloc_size / upa) & ~PAGE_MASK))
1126		- upa--;
1127		- max_upa = upa;
1128		-
1129		- /* group cpus according to their proximity */
1130		- for_each_possible_cpu(cpu) {
1131		- group = 0;
1132		- next_group:
1133		- for_each_possible_cpu(tcpu) {
1134		- if (cpu == tcpu)
1135		- break;
1136		- if (group_map[tcpu] == group && cpu_distance_fn &&
1137		- (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE \|\|
1138		- cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1139		- group++;
1140		- nr_groups = max(nr_groups, group + 1);
1141		- goto next_group;
1142		- }
1143		- }
1144		- group_map[cpu] = group;
1145		- group_cnt[group]++;
1146		- }
1147		-
1148		- /*
1149		- * Expand unit size until address space usage goes over 75%
1150		- * and then as much as possible without using more address
1151		- * space.
1152		- */
1153		- last_allocs = INT_MAX;
1154		- for (upa = max_upa; upa; upa--) {
1155		- int allocs = 0, wasted = 0;
1156		-
1157		- if (alloc_size % upa \|\| ((alloc_size / upa) & ~PAGE_MASK))
1158		- continue;
1159		-
1160		- for (group = 0; group < nr_groups; group++) {
1161		- int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1162		- allocs += this_allocs;
1163		- wasted += this_allocs * upa - group_cnt[group];
1164		- }
1165		-
1166		- /*
1167		- * Don't accept if wastage is over 1/3. The
1168		- * greater-than comparison ensures upa==1 always
1169		- * passes the following check.
1170		- */
1171		- if (wasted > num_possible_cpus() / 3)
1172		- continue;
1173		-
1174		- /* and then don't consume more memory */
1175		- if (allocs > last_allocs)
1176		- break;
1177		- last_allocs = allocs;
1178		- best_upa = upa;
1179		- }
1180		- upa = best_upa;
1181		-
1182		- /* allocate and fill alloc_info */
1183		- for (group = 0; group < nr_groups; group++)
1184		- nr_units += roundup(group_cnt[group], upa);
1185		-
1186		- ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
1187		- if (!ai)
1188		- return ERR_PTR(-ENOMEM);
1189		- cpu_map = ai->groups[0].cpu_map;
1190		-
1191		- for (group = 0; group < nr_groups; group++) {
1192		- ai->groups[group].cpu_map = cpu_map;
1193		- cpu_map += roundup(group_cnt[group], upa);
1194		- }
1195		-
1196		- ai->static_size = static_size;
1197		- ai->reserved_size = reserved_size;
1198		- ai->dyn_size = dyn_size;
1199		- ai->unit_size = alloc_size / upa;
1200		- ai->atom_size = atom_size;
1201		- ai->alloc_size = alloc_size;
1202		-
1203		- for (group = 0, unit = 0; group_cnt[group]; group++) {
1204		- struct pcpu_group_info *gi = &ai->groups[group];
1205		-
1206		- /*
1207		- * Initialize base_offset as if all groups are located
1208		- * back-to-back. The caller should update this to
1209		- * reflect actual allocation.
1210		- */
1211		- gi->base_offset = unit * ai->unit_size;
1212		-
1213		- for_each_possible_cpu(cpu)
1214		- if (group_map[cpu] == group)
1215		- gi->cpu_map[gi->nr_units++] = cpu;
1216		- gi->nr_units = roundup(gi->nr_units, upa);
1217		- unit += gi->nr_units;
1218		- }
1219		- BUG_ON(unit != nr_units);
1220		-
1221		- return ai;
1222		-}
1223		-
1224		-/**
1225	1080	* pcpu_dump_alloc_info - print out information about pcpu_alloc_info
1226	1081	* @lvl: loglevel
1227	1082	* @ai: allocation info to dump
1228	1083
...	...	@@ -1363,7 +1218,9 @@
1363	1218
1364	1219	/* sanity checks */
1365	1220	PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
	1221	+#ifdef CONFIG_SMP
1366	1222	PCPU_SETUP_BUG_ON(!ai->static_size);
	1223	+#endif
1367	1224	PCPU_SETUP_BUG_ON(!base_addr);
1368	1225	PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
1369	1226	PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
...	...	@@ -1488,6 +1345,8 @@
1488	1345	return 0;
1489	1346	}
1490	1347
	1348	+#ifdef CONFIG_SMP
	1349	+
1491	1350	const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
1492	1351	[PCPU_FC_AUTO] = "auto",
1493	1352	[PCPU_FC_EMBED] = "embed",
1494	1353
1495	1354
...	...	@@ -1515,9 +1374,181 @@
1515	1374	}
1516	1375	early_param("percpu_alloc", percpu_alloc_setup);
1517	1376
	1377	+/*
	1378	+ * pcpu_embed_first_chunk() is used by the generic percpu setup.
	1379	+ * Build it if needed by the arch config or the generic setup is going
	1380	+ * to be used.
	1381	+ */
1518	1382	#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) \|\| \
1519	1383	!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
	1384	+#define BUILD_EMBED_FIRST_CHUNK
	1385	+#endif
	1386	+
	1387	+/* build pcpu_page_first_chunk() iff needed by the arch config */
	1388	+#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
	1389	+#define BUILD_PAGE_FIRST_CHUNK
	1390	+#endif
	1391	+
	1392	+/* pcpu_build_alloc_info() is used by both embed and page first chunk */
	1393	+#if defined(BUILD_EMBED_FIRST_CHUNK) \|\| defined(BUILD_PAGE_FIRST_CHUNK)
1520	1394	/**
	1395	+ * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
	1396	+ * @reserved_size: the size of reserved percpu area in bytes
	1397	+ * @dyn_size: minimum free size for dynamic allocation in bytes
	1398	+ * @atom_size: allocation atom size
	1399	+ * @cpu_distance_fn: callback to determine distance between cpus, optional
	1400	+ *
	1401	+ * This function determines grouping of units, their mappings to cpus
	1402	+ * and other parameters considering needed percpu size, allocation
	1403	+ * atom size and distances between CPUs.
	1404	+ *
	1405	+ * Groups are always mutliples of atom size and CPUs which are of
	1406	+ * LOCAL_DISTANCE both ways are grouped together and share space for
	1407	+ * units in the same group. The returned configuration is guaranteed
	1408	+ * to have CPUs on different nodes on different groups and >=75% usage
	1409	+ * of allocated virtual address space.
	1410	+ *
	1411	+ * RETURNS:
	1412	+ * On success, pointer to the new allocation_info is returned. On
	1413	+ * failure, ERR_PTR value is returned.
	1414	+ */
	1415	+static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
	1416	+ size_t reserved_size, size_t dyn_size,
	1417	+ size_t atom_size,
	1418	+ pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
	1419	+{
	1420	+ static int group_map[NR_CPUS] __initdata;
	1421	+ static int group_cnt[NR_CPUS] __initdata;
	1422	+ const size_t static_size = __per_cpu_end - __per_cpu_start;
	1423	+ int nr_groups = 1, nr_units = 0;
	1424	+ size_t size_sum, min_unit_size, alloc_size;
	1425	+ int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
	1426	+ int last_allocs, group, unit;
	1427	+ unsigned int cpu, tcpu;
	1428	+ struct pcpu_alloc_info *ai;
	1429	+ unsigned int *cpu_map;
	1430	+
	1431	+ /* this function may be called multiple times */
	1432	+ memset(group_map, 0, sizeof(group_map));
	1433	+ memset(group_cnt, 0, sizeof(group_cnt));
	1434	+
	1435	+ /* calculate size_sum and ensure dyn_size is enough for early alloc */
	1436	+ size_sum = PFN_ALIGN(static_size + reserved_size +
	1437	+ max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
	1438	+ dyn_size = size_sum - static_size - reserved_size;
	1439	+
	1440	+ /*
	1441	+ * Determine min_unit_size, alloc_size and max_upa such that
	1442	+ * alloc_size is multiple of atom_size and is the smallest
	1443	+ * which can accomodate 4k aligned segments which are equal to
	1444	+ * or larger than min_unit_size.
	1445	+ */
	1446	+ min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
	1447	+
	1448	+ alloc_size = roundup(min_unit_size, atom_size);
	1449	+ upa = alloc_size / min_unit_size;
	1450	+ while (alloc_size % upa \|\| ((alloc_size / upa) & ~PAGE_MASK))
	1451	+ upa--;
	1452	+ max_upa = upa;
	1453	+
	1454	+ /* group cpus according to their proximity */
	1455	+ for_each_possible_cpu(cpu) {
	1456	+ group = 0;
	1457	+ next_group:
	1458	+ for_each_possible_cpu(tcpu) {
	1459	+ if (cpu == tcpu)
	1460	+ break;
	1461	+ if (group_map[tcpu] == group && cpu_distance_fn &&
	1462	+ (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE \|\|
	1463	+ cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
	1464	+ group++;
	1465	+ nr_groups = max(nr_groups, group + 1);
	1466	+ goto next_group;
	1467	+ }
	1468	+ }
	1469	+ group_map[cpu] = group;
	1470	+ group_cnt[group]++;
	1471	+ }
	1472	+
	1473	+ /*
	1474	+ * Expand unit size until address space usage goes over 75%
	1475	+ * and then as much as possible without using more address
	1476	+ * space.
	1477	+ */
	1478	+ last_allocs = INT_MAX;
	1479	+ for (upa = max_upa; upa; upa--) {
	1480	+ int allocs = 0, wasted = 0;
	1481	+
	1482	+ if (alloc_size % upa \|\| ((alloc_size / upa) & ~PAGE_MASK))
	1483	+ continue;
	1484	+
	1485	+ for (group = 0; group < nr_groups; group++) {
	1486	+ int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
	1487	+ allocs += this_allocs;
	1488	+ wasted += this_allocs * upa - group_cnt[group];
	1489	+ }
	1490	+
	1491	+ /*
	1492	+ * Don't accept if wastage is over 1/3. The
	1493	+ * greater-than comparison ensures upa==1 always
	1494	+ * passes the following check.
	1495	+ */
	1496	+ if (wasted > num_possible_cpus() / 3)
	1497	+ continue;
	1498	+
	1499	+ /* and then don't consume more memory */
	1500	+ if (allocs > last_allocs)
	1501	+ break;
	1502	+ last_allocs = allocs;
	1503	+ best_upa = upa;
	1504	+ }
	1505	+ upa = best_upa;
	1506	+
	1507	+ /* allocate and fill alloc_info */
	1508	+ for (group = 0; group < nr_groups; group++)
	1509	+ nr_units += roundup(group_cnt[group], upa);
	1510	+
	1511	+ ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
	1512	+ if (!ai)
	1513	+ return ERR_PTR(-ENOMEM);
	1514	+ cpu_map = ai->groups[0].cpu_map;
	1515	+
	1516	+ for (group = 0; group < nr_groups; group++) {
	1517	+ ai->groups[group].cpu_map = cpu_map;
	1518	+ cpu_map += roundup(group_cnt[group], upa);
	1519	+ }
	1520	+
	1521	+ ai->static_size = static_size;
	1522	+ ai->reserved_size = reserved_size;
	1523	+ ai->dyn_size = dyn_size;
	1524	+ ai->unit_size = alloc_size / upa;
	1525	+ ai->atom_size = atom_size;
	1526	+ ai->alloc_size = alloc_size;
	1527	+
	1528	+ for (group = 0, unit = 0; group_cnt[group]; group++) {
	1529	+ struct pcpu_group_info *gi = &ai->groups[group];
	1530	+
	1531	+ /*
	1532	+ * Initialize base_offset as if all groups are located
	1533	+ * back-to-back. The caller should update this to
	1534	+ * reflect actual allocation.
	1535	+ */
	1536	+ gi->base_offset = unit * ai->unit_size;
	1537	+
	1538	+ for_each_possible_cpu(cpu)
	1539	+ if (group_map[cpu] == group)
	1540	+ gi->cpu_map[gi->nr_units++] = cpu;
	1541	+ gi->nr_units = roundup(gi->nr_units, upa);
	1542	+ unit += gi->nr_units;
	1543	+ }
	1544	+ BUG_ON(unit != nr_units);
	1545	+
	1546	+ return ai;
	1547	+}
	1548	+#endif /* BUILD_EMBED_FIRST_CHUNK \|\| BUILD_PAGE_FIRST_CHUNK */
	1549	+
	1550	+#if defined(BUILD_EMBED_FIRST_CHUNK)
	1551	+/**
1521	1552	* pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
1522	1553	* @reserved_size: the size of reserved percpu area in bytes
1523	1554	* @dyn_size: minimum free size for dynamic allocation in bytes
1524	1555
...	...	@@ -1645,10 +1676,9 @@
1645	1676	free_bootmem(__pa(areas), areas_size);
1646	1677	return rc;
1647	1678	}
1648		-#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK \|\|
1649		- !CONFIG_HAVE_SETUP_PER_CPU_AREA */
	1679	+#endif /* BUILD_EMBED_FIRST_CHUNK */
1650	1680
1651		-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
	1681	+#ifdef BUILD_PAGE_FIRST_CHUNK
1652	1682	/**
1653	1683	* pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
1654	1684	* @reserved_size: the size of reserved percpu area in bytes
1655	1685
1656	1686
...	...	@@ -1756,10 +1786,11 @@
1756	1786	pcpu_free_alloc_info(ai);
1757	1787	return rc;
1758	1788	}
1759		-#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
	1789	+#endif /* BUILD_PAGE_FIRST_CHUNK */
1760	1790
	1791	+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
1761	1792	/*
1762		- * Generic percpu area setup.
	1793	+ * Generic SMP percpu area setup.
1763	1794	*
1764	1795	* The embedding helper is used because its behavior closely resembles
1765	1796	* the original non-dynamic generic percpu area setup. This is
...	...	@@ -1770,7 +1801,6 @@
1770	1801	* on the physical linear memory mapping which uses large page
1771	1802	* mappings on applicable archs.
1772	1803	*/
1773		-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
1774	1804	unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
1775	1805	EXPORT_SYMBOL(__per_cpu_offset);
1776	1806
1777	1807
...	...	@@ -1799,13 +1829,48 @@
1799	1829	PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
1800	1830	pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
1801	1831	if (rc < 0)
1802		- panic("Failed to initialized percpu areas.");
	1832	+ panic("Failed to initialize percpu areas.");
1803	1833
1804	1834	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1805	1835	for_each_possible_cpu(cpu)
1806	1836	__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
1807	1837	}
1808		-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
	1838	+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
	1839	+
	1840	+#else /* CONFIG_SMP */
	1841	+
	1842	+/*
	1843	+ * UP percpu area setup.
	1844	+ *
	1845	+ * UP always uses km-based percpu allocator with identity mapping.
	1846	+ * Static percpu variables are indistinguishable from the usual static
	1847	+ * variables and don't require any special preparation.
	1848	+ */
	1849	+void __init setup_per_cpu_areas(void)
	1850	+{
	1851	+ const size_t unit_size =
	1852	+ roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
	1853	+ PERCPU_DYNAMIC_RESERVE));
	1854	+ struct pcpu_alloc_info *ai;
	1855	+ void *fc;
	1856	+
	1857	+ ai = pcpu_alloc_alloc_info(1, 1);
	1858	+ fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
	1859	+ if (!ai \|\| !fc)
	1860	+ panic("Failed to allocate memory for percpu areas.");
	1861	+
	1862	+ ai->dyn_size = unit_size;
	1863	+ ai->unit_size = unit_size;
	1864	+ ai->atom_size = unit_size;
	1865	+ ai->alloc_size = unit_size;
	1866	+ ai->groups[0].nr_units = 1;
	1867	+ ai->groups[0].cpu_map[0] = 0;
	1868	+
	1869	+ if (pcpu_setup_first_chunk(ai, fc) < 0)
	1870	+ panic("Failed to initialize percpu areas.");
	1871	+}
	1872	+
	1873	+#endif /* CONFIG_SMP */
1809	1874
1810	1875	/*
1811	1876	* First and reserved chunks are initialized with temporary allocation
1		-/*
2		- * mm/percpu_up.c - dummy percpu memory allocator implementation for UP
3		- */
4		-
5		-#include <linux/module.h>
6		-#include <linux/percpu.h>
7		-#include <linux/slab.h>
8		-
9		-void __percpu *__alloc_percpu(size_t size, size_t align)
10		-{
11		- /*
12		- * Can't easily make larger alignment work with kmalloc. WARN
13		- * on it. Larger alignment should only be used for module
14		- * percpu sections on SMP for which this path isn't used.
15		- */
16		- WARN_ON_ONCE(align > SMP_CACHE_BYTES);
17		- return (void __percpu __force *)kzalloc(size, GFP_KERNEL);
18		-}
19		-EXPORT_SYMBOL_GPL(__alloc_percpu);
20		-
21		-void free_percpu(void __percpu *p)
22		-{
23		- kfree(this_cpu_ptr(p));
24		-}
25		-EXPORT_SYMBOL_GPL(free_percpu);
26		-
27		-phys_addr_t per_cpu_ptr_to_phys(void *addr)
28		-{
29		- return __pa(addr);
30		-}
...	...	@@ -2065,6 +2065,7 @@
2065	2065	}
2066	2066	EXPORT_SYMBOL_GPL(free_vm_area);
2067	2067
	2068	+#ifdef CONFIG_SMP
2068	2069	static struct vmap_area node_to_va(struct rb_node n)
2069	2070	{
2070	2071	return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
...	...	@@ -2345,6 +2346,7 @@
2345	2346	free_vm_area(vms[i]);
2346	2347	kfree(vms);
2347	2348	}
	2349	+#endif /* CONFIG_SMP */
2348	2350
2349	2351	#ifdef CONFIG_PROC_FS
2350	2352	static void s_start(struct seq_file m, loff_t *pos)