mm: convert mm->cpu_vm_cpumask into cpumask_var_t

cpumask_t is very big struct and cpu_vm_mask is placed wrong position. It might lead to reduce cache hit ratio. This patch has two change. 1) Move the place of cpumask into last of mm_struct. Because usually cpumask is accessed only front bits when the system has cpu-hotplug capability 2) Convert cpu_vm_mask into cpumask_var_t. It may help to reduce memory footprint if cpumask_size() will use nr_cpumask_bits properly in future. In addition, this patch change the name of cpu_vm_mask with cpu_vm_mask_var. It may help to detect out of tree cpu_vm_mask users. This patch has no functional change. [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: David Howells <dhowells@redhat.com> Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com> Cc: Hugh Dickins <hughd@google.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

mm: convert mm->cpu_vm_cpumask into cpumask_var_t
cpumask_t is very big struct and cpu_vm_mask is placed wrong position. It might lead to reduce cache hit ratio. This patch has two change. 1) Move the place of cpumask into last of mm_struct. Because usually cpumask is accessed only front bits when the system has cpu-hotplug capability 2) Convert cpu_vm_mask into cpumask_var_t. It may help to reduce memory footprint if cpumask_size() will use nr_cpumask_bits properly in future. In addition, this patch change the name of cpu_vm_mask with cpu_vm_mask_var. It may help to detect out of tree cpu_vm_mask users. This patch has no functional change. [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: David Howells <dhowells@redhat.com> Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com> Cc: Hugh Dickins <hughd@google.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
KOSAKI Motohiro · Linus Torvalds
1 parent 692e0b3542
Showing 7 changed files with 44 additions and 9 deletions Side-by-side Diff
Documentation/cachetlb.txt
arch/x86/kernel/tboot.c
include/linux/mm_types.h
include/linux/sched.h
init/main.c
kernel/fork.c
mm/init-mm.c
@@ -16,7 +16,7 @@
 thinking SMP cache/tlb flushing must be so inefficient, this is in
 fact an area where many optimizations are possible.  For example,
 if it can be proven that a user address space has never executed
-on a cpu (see vma->cpu_vm_mask), one need not perform a flush
+on a cpu (see mm_cpumask()), one need not perform a flush
 for this address space on that cpu.
  
 First, the TLB flushing interfaces, since they are the simplest.  The
@@ -110,7 +110,6 @@
 	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask    = CPU_MASK_ALL,
 };
  
 static inline void switch_to_tboot_pt(void)
@@ -265,8 +265,6 @@
  
 	struct linux_binfmt *binfmt;
  
-	cpumask_t cpu_vm_mask;
-
 	/* Architecture-specific MM context */
 	mm_context_t context;
  
  
@@ -316,10 +314,15 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
+
+	cpumask_var_t cpu_vm_mask_var;
 };
  
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
-#define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
+static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
+{
+	return mm->cpu_vm_mask_var;
+}
  
 #endif /* _LINUX_MM_TYPES_H */
@@ -2176,6 +2176,7 @@
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
 		__mmdrop(mm);
 }
+extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm);
  
 /* mmput gets rid of the mappings and all user-space */
 extern void mmput(struct mm_struct *);
@@ -509,6 +509,8 @@
 	sort_main_extable();
 	trap_init();
 	mm_init();
+	BUG_ON(mm_init_cpumask(&init_mm, 0));
+
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
@@ -485,6 +485,20 @@
 #endif
 }
  
+int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
+		return -ENOMEM;
+
+	if (oldmm)
+		cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
+	else
+		memset(mm_cpumask(mm), 0, cpumask_size());
+#endif
+	return 0;
+}
+
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
  
@@ -521,10 +535,20 @@
 	struct mm_struct * mm;
  
 	mm = allocate_mm();
-	if (mm) {
-		memset(mm, 0, sizeof(*mm));
-		mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	memset(mm, 0, sizeof(*mm));
+	mm = mm_init(mm, current);
+	if (!mm)
+		return NULL;
+
+	if (mm_init_cpumask(mm, NULL)) {
+		mm_free_pgd(mm);
+		free_mm(mm);
+		return NULL;
 	}
+
 	return mm;
 }
  
@@ -536,6 +560,7 @@
 void __mmdrop(struct mm_struct *mm)
 {
 	BUG_ON(mm == &init_mm);
+	free_cpumask_var(mm->cpu_vm_mask_var);
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
@@ -690,6 +715,9 @@
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
  
+	if (mm_init_cpumask(mm, oldmm))
+		goto fail_nocpumask;
+
 	if (init_new_context(tsk, mm))
 		goto fail_nocontext;
  
@@ -716,6 +744,9 @@
 	return NULL;
  
 fail_nocontext:
+	free_cpumask_var(mm->cpu_vm_mask_var);
+
+fail_nocpumask:
 	/*
 	 * If init_new_context() failed, we cannot use mmput() to free the mm
 	 * because it calls destroy_context()
@@ -21,7 +21,6 @@
 	.mmap_sem	= __RWSEM_INITIALIZER(init_mm.mmap_sem),
 	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
-	.cpu_vm_mask	= CPU_MASK_ALL,
 	INIT_MM_CONTEXT(init_mm)
 };
...	...	@@ -16,7 +16,7 @@
16	16	thinking SMP cache/tlb flushing must be so inefficient, this is in
17	17	fact an area where many optimizations are possible. For example,
18	18	if it can be proven that a user address space has never executed
19		-on a cpu (see vma->cpu_vm_mask), one need not perform a flush
	19	+on a cpu (see mm_cpumask()), one need not perform a flush
20	20	for this address space on that cpu.
21	21
22	22	First, the TLB flushing interfaces, since they are the simplest. The
...	...	@@ -110,7 +110,6 @@
110	110	.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
111	111	.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
112	112	.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
113		- .cpu_vm_mask = CPU_MASK_ALL,
114	113	};
115	114
116	115	static inline void switch_to_tboot_pt(void)
...	...	@@ -265,8 +265,6 @@
265	265
266	266	struct linux_binfmt *binfmt;
267	267
268		- cpumask_t cpu_vm_mask;
269		-
270	268	/* Architecture-specific MM context */
271	269	mm_context_t context;
272	270
273	271
...	...	@@ -316,10 +314,15 @@
316	314	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
317	315	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
318	316	#endif
	317	+
	318	+ cpumask_var_t cpu_vm_mask_var;
319	319	};
320	320
321	321	/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
322		-#define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
	322	+static inline cpumask_t mm_cpumask(struct mm_struct mm)
	323	+{
	324	+ return mm->cpu_vm_mask_var;
	325	+}
323	326
324	327	#endif /* _LINUX_MM_TYPES_H */
...	...	@@ -2176,6 +2176,7 @@
2176	2176	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
2177	2177	__mmdrop(mm);
2178	2178	}
	2179	+extern int mm_init_cpumask(struct mm_struct mm, struct mm_struct oldmm);
2179	2180
2180	2181	/* mmput gets rid of the mappings and all user-space */
2181	2182	extern void mmput(struct mm_struct *);
...	...	@@ -509,6 +509,8 @@
509	509	sort_main_extable();
510	510	trap_init();
511	511	mm_init();
	512	+ BUG_ON(mm_init_cpumask(&init_mm, 0));
	513	+
512	514	/*
513	515	* Set up the scheduler prior starting any interrupts (such as the
514	516	* timer interrupt). Full topology setup happens at smp_init()
...	...	@@ -485,6 +485,20 @@
485	485	#endif
486	486	}
487	487
	488	+int mm_init_cpumask(struct mm_struct mm, struct mm_struct oldmm)
	489	+{
	490	+#ifdef CONFIG_CPUMASK_OFFSTACK
	491	+ if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
	492	+ return -ENOMEM;
	493	+
	494	+ if (oldmm)
	495	+ cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
	496	+ else
	497	+ memset(mm_cpumask(mm), 0, cpumask_size());
	498	+#endif
	499	+ return 0;
	500	+}
	501	+
488	502	static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
489	503	{
490	504	atomic_set(&mm->mm_users, 1);
491	505
...	...	@@ -521,10 +535,20 @@
521	535	struct mm_struct * mm;
522	536
523	537	mm = allocate_mm();
524		- if (mm) {
525		- memset(mm, 0, sizeof(*mm));
526		- mm = mm_init(mm, current);
	538	+ if (!mm)
	539	+ return NULL;
	540	+
	541	+ memset(mm, 0, sizeof(*mm));
	542	+ mm = mm_init(mm, current);
	543	+ if (!mm)
	544	+ return NULL;
	545	+
	546	+ if (mm_init_cpumask(mm, NULL)) {
	547	+ mm_free_pgd(mm);
	548	+ free_mm(mm);
	549	+ return NULL;
527	550	}
	551	+
528	552	return mm;
529	553	}
530	554
...	...	@@ -536,6 +560,7 @@
536	560	void __mmdrop(struct mm_struct *mm)
537	561	{
538	562	BUG_ON(mm == &init_mm);
	563	+ free_cpumask_var(mm->cpu_vm_mask_var);
539	564	mm_free_pgd(mm);
540	565	destroy_context(mm);
541	566	mmu_notifier_mm_destroy(mm);
...	...	@@ -690,6 +715,9 @@
690	715	if (!mm_init(mm, tsk))
691	716	goto fail_nomem;
692	717
	718	+ if (mm_init_cpumask(mm, oldmm))
	719	+ goto fail_nocpumask;
	720	+
693	721	if (init_new_context(tsk, mm))
694	722	goto fail_nocontext;
695	723
...	...	@@ -716,6 +744,9 @@
716	744	return NULL;
717	745
718	746	fail_nocontext:
	747	+ free_cpumask_var(mm->cpu_vm_mask_var);
	748	+
	749	+fail_nocpumask:
719	750	/*
720	751	* If init_new_context() failed, we cannot use mmput() to free the mm
721	752	* because it calls destroy_context()
...	...	@@ -21,7 +21,6 @@
21	21	.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
22	22	.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
23	23	.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
24		- .cpu_vm_mask = CPU_MASK_ALL,
25	24	INIT_MM_CONTEXT(init_mm)
26	25	};