Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86-32, percpu: Correct the ordering of the percpu readmostly section x86, mm: Enable ARCH_DMA_ADDR_T_64BIT with X86_64 || HIGHMEM64G x86: Spread tlb flush vector between nodes percpu: Introduce a read-mostly percpu API x86, mm: Fix incorrect data type in vmalloc_sync_all() x86, mm: Hold mm->page_table_lock while doing vmalloc_sync x86, mm: Fix bogus whitespace in sync_global_pgds() x86-32: Fix sparse warning for the __PHYSICAL_MASK calculation x86, mm: Add RESERVE_BRK_ARRAY() helper mm, x86: Saving vmcore with non-lazy freeing of vmas x86, kdump: Change copy_oldmem_page() to use cached addressing x86, mm: fix uninitialized addr in kernel_physical_mapping_init() x86, kmemcheck: Remove double test x86, mm: Make spurious_fault check explicitly check the PRESENT bit x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes x86, mm: Separate x86_64 vmalloc_sync_all() into separate functions x86, mm: Avoid unnecessary TLB flush

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86-32, percpu: Correct the ordering of the percpu readmostly section x86, mm: Enable ARCH_DMA_ADDR_T_64BIT with X86_64 || HIGHMEM64G x86: Spread tlb flush vector between nodes percpu: Introduce a read-mostly percpu API x86, mm: Fix incorrect data type in vmalloc_sync_all() x86, mm: Hold mm->page_table_lock while doing vmalloc_sync x86, mm: Fix bogus whitespace in sync_global_pgds() x86-32: Fix sparse warning for the __PHYSICAL_MASK calculation x86, mm: Add RESERVE_BRK_ARRAY() helper mm, x86: Saving vmcore with non-lazy freeing of vmas x86, kdump: Change copy_oldmem_page() to use cached addressing x86, mm: fix uninitialized addr in kernel_physical_mapping_init() x86, kmemcheck: Remove double test x86, mm: Make spurious_fault check explicitly check the PRESENT bit x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes x86, mm: Separate x86_64 vmalloc_sync_all() into separate functions x86, mm: Avoid unnecessary TLB flush
Linus Torvalds
2 parents 8d8d2e9ccd 2aeb66d303
Showing 17 changed files Side-by-side Diff
arch/x86/Kconfig
arch/x86/include/asm/io.h
arch/x86/include/asm/page_types.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/setup.h
arch/x86/kernel/crash_dump_64.c
arch/x86/mm/fault.c
arch/x86/mm/init_64.c
arch/x86/mm/kmemcheck/opcode.c
arch/x86/mm/pgtable.c
arch/x86/mm/tlb.c
include/asm-generic/pgtable.h
include/asm-generic/vmlinux.lds.h
include/linux/percpu-defs.h
mm/memory.c
mm/vmalloc.c
@@ -1163,6 +1163,9 @@
 config ARCH_PHYS_ADDR_T_64BIT
 	def_bool X86_64 || X86_PAE
  
+config ARCH_DMA_ADDR_T_64BIT
+	def_bool X86_64 || HIGHMEM64G
+
 config DIRECT_GBPAGES
 	bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
 	default y
@@ -206,6 +206,7 @@
  
 extern void iounmap(volatile void __iomem *addr);
  
+extern void set_iounmap_nonlazy(void);
  
 #ifdef __KERNEL__
  
@@ -8,7 +8,7 @@
 #define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
  
-#define __PHYSICAL_MASK		((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
+#define __PHYSICAL_MASK		((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
 #define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
  
 /* Cast PAGE_MASK to a signed type so that it is sign-extended if
@@ -28,6 +28,8 @@
 extern spinlock_t pgd_lock;
 extern struct list_head pgd_list;
  
+extern struct mm_struct *pgd_page_get_mm(struct page *page);
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else  /* !CONFIG_PARAVIRT */
@@ -602,6 +604,8 @@
 	clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
 	pte_update(mm, addr, ptep);
 }
+
+#define flush_tlb_fix_spurious_fault(vma, address)
  
 /*
  * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
@@ -102,6 +102,8 @@
 	native_set_pgd(pgd, native_make_pgd(0));
 }
  
+extern void sync_global_pgds(unsigned long start, unsigned long end);
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
@@ -93,6 +93,11 @@
 			: : "i" (sz));					\
 	}
  
+/* Helper for reserving space for arrays of things */
+#define RESERVE_BRK_ARRAY(type, name, entries)		\
+	type *name;					\
+	RESERVE_BRK(name, sizeof(type) * entries)
+
 #ifdef __i386__
  
 void __init i386_start_kernel(void);
@@ -34,7 +34,7 @@
 	if (!csize)
 		return 0;
  
-	vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+	vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
 	if (!vaddr)
 		return -ENOMEM;
  
@@ -46,6 +46,7 @@
 	} else
 		memcpy(buf, vaddr + offset, csize);
  
+	set_iounmap_nonlazy();
 	iounmap(vaddr);
 	return csize;
 }
@@ -229,7 +229,16 @@
  
 		spin_lock_irqsave(&pgd_lock, flags);
 		list_for_each_entry(page, &pgd_list, lru) {
-			if (!vmalloc_sync_one(page_address(page), address))
+			spinlock_t *pgt_lock;
+			pmd_t *ret;
+
+			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+
+			spin_lock(pgt_lock);
+			ret = vmalloc_sync_one(page_address(page), address);
+			spin_unlock(pgt_lock);
+
+			if (!ret)
 				break;
 		}
 		spin_unlock_irqrestore(&pgd_lock, flags);
@@ -328,29 +337,7 @@
  
 void vmalloc_sync_all(void)
 {
-	unsigned long address;
-
-	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
-	     address += PGDIR_SIZE) {
-
-		const pgd_t *pgd_ref = pgd_offset_k(address);
-		unsigned long flags;
-		struct page *page;
-
-		if (pgd_none(*pgd_ref))
-			continue;
-
-		spin_lock_irqsave(&pgd_lock, flags);
-		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
-			else
-				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-		}
-		spin_unlock_irqrestore(&pgd_lock, flags);
-	}
+	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
 }
  
 /*
  
@@ -898,8 +885,14 @@
 	if (pmd_large(*pmd))
 		return spurious_fault_check(error_code, (pte_t *) pmd);
  
+	/*
+	 * Note: don't use pte_present() here, since it returns true
+	 * if the _PAGE_PROTNONE bit is set.  However, this aliases the
+	 * _PAGE_GLOBAL bit, which for kernel pages give false positives
+	 * when CONFIG_DEBUG_PAGEALLOC is used.
+	 */
 	pte = pte_offset_kernel(pmd, address);
-	if (!pte_present(*pte))
+	if (!(pte_flags(*pte) & _PAGE_PRESENT))
 		return 0;
  
 	ret = spurious_fault_check(error_code, pte);
@@ -98,6 +98,43 @@
 __setup("noexec32=", nonx32_setup);
  
 /*
+ * When memory was added/removed make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+	unsigned long address;
+
+	for (address = start; address <= end; address += PGDIR_SIZE) {
+		const pgd_t *pgd_ref = pgd_offset_k(address);
+		unsigned long flags;
+		struct page *page;
+
+		if (pgd_none(*pgd_ref))
+			continue;
+
+		spin_lock_irqsave(&pgd_lock, flags);
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd_t *pgd;
+			spinlock_t *pgt_lock;
+
+			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+			spin_lock(pgt_lock);
+
+			if (pgd_none(*pgd))
+				set_pgd(pgd, *pgd_ref);
+			else
+				BUG_ON(pgd_page_vaddr(*pgd)
+				       != pgd_page_vaddr(*pgd_ref));
+
+			spin_unlock(pgt_lock);
+		}
+		spin_unlock_irqrestore(&pgd_lock, flags);
+	}
+}
+
+/*
  * NOTE: This function is marked __ref because it calls __init function
  * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
  */
  
  
@@ -534,11 +571,13 @@
 			     unsigned long end,
 			     unsigned long page_size_mask)
 {
-
+	bool pgd_changed = false;
 	unsigned long next, last_map_addr = end;
+	unsigned long addr;
  
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
+	addr = start;
  
 	for (; start < end; start = next) {
 		pgd_t *pgd = pgd_offset_k(start);
  
@@ -563,7 +602,12 @@
 		spin_lock(&init_mm.page_table_lock);
 		pgd_populate(&init_mm, pgd, __va(pud_phys));
 		spin_unlock(&init_mm.page_table_lock);
+		pgd_changed = true;
 	}
+
+	if (pgd_changed)
+		sync_global_pgds(addr, end);
+
 	__flush_tlb_all();
  
 	return last_map_addr;
@@ -1003,6 +1047,7 @@
 		}
  
 	}
+	sync_global_pgds((unsigned long)start_page, end);
 	return 0;
 }
  
@@ -9,7 +9,7 @@
 		b == 0xf0 || b == 0xf2 || b == 0xf3
 		/* Group 2 */
 		|| b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
-		|| b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e
+		|| b == 0x64 || b == 0x65
 		/* Group 3 */
 		|| b == 0x66
 		/* Group 4 */
@@ -87,8 +87,20 @@
 #define UNSHARED_PTRS_PER_PGD				\
 	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
  
-static void pgd_ctor(pgd_t *pgd)
+
+static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
 {
+	BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
+	virt_to_page(pgd)->index = (pgoff_t)mm;
+}
+
+struct mm_struct *pgd_page_get_mm(struct page *page)
+{
+	return (struct mm_struct *)page->index;
+}
+
+static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
+{
 	/* If the pgd points to a shared pagetable level (either the
 	   ptes in non-PAE, or shared PMD in PAE), then just copy the
 	   references from swapper_pg_dir. */
  
@@ -105,8 +117,10 @@
 	}
  
 	/* list required to sync kernel mapping updates */
-	if (!SHARED_KERNEL_PMD)
+	if (!SHARED_KERNEL_PMD) {
+		pgd_set_mm(pgd, mm);
 		pgd_list_add(pgd);
+	}
 }
  
 static void pgd_dtor(pgd_t *pgd)
@@ -272,7 +286,7 @@
 	 */
 	spin_lock_irqsave(&pgd_lock, flags);
  
-	pgd_ctor(pgd);
+	pgd_ctor(mm, pgd);
 	pgd_prepopulate_pmd(mm, pgd, pmds);
  
 	spin_unlock_irqrestore(&pgd_lock, flags);
@@ -5,6 +5,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/cpu.h>
  
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -52,6 +53,8 @@
    want false sharing in the per cpu data segment. */
 static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
  
+static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
+
 /*
  * We cannot call mmdrop() because we are in interrupt context,
  * instead update mm->cpu_vm_mask.
@@ -173,7 +176,7 @@
 	union smp_flush_state *f;
  
 	/* Caller has disabled preemption */
-	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+	sender = this_cpu_read(tlb_vector_offset);
 	f = &flush_state[sender];
  
 	/*
@@ -218,6 +221,47 @@
 	flush_tlb_others_ipi(cpumask, mm, va);
 }
  
+static void __cpuinit calculate_tlb_offset(void)
+{
+	int cpu, node, nr_node_vecs;
+	/*
+	 * we are changing tlb_vector_offset for each CPU in runtime, but this
+	 * will not cause inconsistency, as the write is atomic under X86. we
+	 * might see more lock contentions in a short time, but after all CPU's
+	 * tlb_vector_offset are changed, everything should go normal
+	 *
+	 * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
+	 * waste some vectors.
+	 **/
+	if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
+		nr_node_vecs = 1;
+	else
+		nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
+
+	for_each_online_node(node) {
+		int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
+			nr_node_vecs;
+		int cpu_offset = 0;
+		for_each_cpu(cpu, cpumask_of_node(node)) {
+			per_cpu(tlb_vector_offset, cpu) = node_offset +
+				cpu_offset;
+			cpu_offset++;
+			cpu_offset = cpu_offset % nr_node_vecs;
+		}
+	}
+}
+
+static int tlb_cpuhp_notify(struct notifier_block *n,
+		unsigned long action, void *hcpu)
+{
+	switch (action & 0xf) {
+	case CPU_ONLINE:
+	case CPU_DEAD:
+		calculate_tlb_offset();
+	}
+	return NOTIFY_OK;
+}
+
 static int __cpuinit init_smp_flush(void)
 {
 	int i;
@@ -225,6 +269,8 @@
 	for (i = 0; i < ARRAY_SIZE(flush_state); i++)
 		raw_spin_lock_init(&flush_state[i].tlbstate_lock);
  
+	calculate_tlb_offset();
+	hotcpu_notifier(tlb_cpuhp_notify, 0);
 	return 0;
 }
 core_initcall(init_smp_flush);
@@ -129,6 +129,10 @@
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
  
+#ifndef flush_tlb_fix_spurious_fault
+#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
+#endif
+
 #ifndef pgprot_noncached
 #define pgprot_noncached(prot)	(prot)
 #endif
@@ -687,7 +687,9 @@
 				- LOAD_OFFSET) {			\
 		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
 		*(.data..percpu..first)					\
+		. = ALIGN(PAGE_SIZE);					\
 		*(.data..percpu..page_aligned)				\
+		*(.data..percpu..readmostly)				\
 		*(.data..percpu)					\
 		*(.data..percpu..shared_aligned)			\
 		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
  
@@ -713,7 +715,9 @@
 		VMLINUX_SYMBOL(__per_cpu_load) = .;			\
 		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
 		*(.data..percpu..first)					\
+		. = ALIGN(PAGE_SIZE);					\
 		*(.data..percpu..page_aligned)				\
+		*(.data..percpu..readmostly)				\
 		*(.data..percpu)					\
 		*(.data..percpu..shared_aligned)			\
 		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
@@ -139,6 +139,15 @@
 	__aligned(PAGE_SIZE)
  
 /*
+ * Declaration/definition used for per-CPU variables that must be read mostly.
+ */
+#define DECLARE_PER_CPU_READ_MOSTLY(type, name)			\
+	DECLARE_PER_CPU_SECTION(type, name, "..readmostly")
+
+#define DEFINE_PER_CPU_READ_MOSTLY(type, name)				\
+	DEFINE_PER_CPU_SECTION(type, name, "..readmostly")
+
+/*
  * Intermodule exports for per-CPU variables.  sparse forgets about
  * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to
  * noop if __CHECKER__.
@@ -3185,7 +3185,7 @@
 		 * with threads.
 		 */
 		if (flags & FAULT_FLAG_WRITE)
-			flush_tlb_page(vma, address);
+			flush_tlb_fix_spurious_fault(vma, address);
 	}
 unlock:
 	pte_unmap_unlock(pte, ptl);
@@ -517,6 +517,15 @@
 static void purge_fragmented_blocks_allcpus(void);
  
 /*
+ * called before a call to iounmap() if the caller wants vm_area_struct's
+ * immediately freed.
+ */
+void set_iounmap_nonlazy(void)
+{
+	atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
+}
+
+/*
  * Purges all lazily-freed vmap areas.
  *
  * If sync is 0 then don't purge if there is already a purge in progress.
...	...	@@ -1163,6 +1163,9 @@
1163	1163	config ARCH_PHYS_ADDR_T_64BIT
1164	1164	def_bool X86_64 \|\| X86_PAE
1165	1165
	1166	+config ARCH_DMA_ADDR_T_64BIT
	1167	+ def_bool X86_64 \|\| HIGHMEM64G
	1168	+
1166	1169	config DIRECT_GBPAGES
1167	1170	bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
1168	1171	default y
...	...	@@ -206,6 +206,7 @@
206	206
207	207	extern void iounmap(volatile void __iomem *addr);
208	208
	209	+extern void set_iounmap_nonlazy(void);
209	210
210	211	#ifdef __KERNEL__
211	212
...	...	@@ -8,7 +8,7 @@
8	8	#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
9	9	#define PAGE_MASK (~(PAGE_SIZE-1))
10	10
11		-#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
	11	+#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
12	12	#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
13	13
14	14	/* Cast PAGE_MASK to a signed type so that it is sign-extended if
...	...	@@ -28,6 +28,8 @@
28	28	extern spinlock_t pgd_lock;
29	29	extern struct list_head pgd_list;
30	30
	31	+extern struct mm_struct pgd_page_get_mm(struct page page);
	32	+
31	33	#ifdef CONFIG_PARAVIRT
32	34	#include <asm/paravirt.h>
33	35	#else /* !CONFIG_PARAVIRT */
...	...	@@ -602,6 +604,8 @@
602	604	clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
603	605	pte_update(mm, addr, ptep);
604	606	}
	607	+
	608	+#define flush_tlb_fix_spurious_fault(vma, address)
605	609
606	610	/*
607	611	* clone_pgd_range(pgd_t dst, pgd_t src, int count);
...	...	@@ -102,6 +102,8 @@
102	102	native_set_pgd(pgd, native_make_pgd(0));
103	103	}
104	104
	105	+extern void sync_global_pgds(unsigned long start, unsigned long end);
	106	+
105	107	/*
106	108	* Conversion functions: convert a page and protection to a page entry,
107	109	* and a page entry and page directory to the page they refer to.
...	...	@@ -93,6 +93,11 @@
93	93	: : "i" (sz)); \
94	94	}
95	95
	96	+/* Helper for reserving space for arrays of things */
	97	+#define RESERVE_BRK_ARRAY(type, name, entries) \
	98	+ type *name; \
	99	+ RESERVE_BRK(name, sizeof(type) * entries)
	100	+
96	101	#ifdef __i386__
97	102
98	103	void __init i386_start_kernel(void);
...	...	@@ -34,7 +34,7 @@
34	34	if (!csize)
35	35	return 0;
36	36
37		- vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
	37	+ vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
38	38	if (!vaddr)
39	39	return -ENOMEM;
40	40
...	...	@@ -46,6 +46,7 @@
46	46	} else
47	47	memcpy(buf, vaddr + offset, csize);
48	48
	49	+ set_iounmap_nonlazy();
49	50	iounmap(vaddr);
50	51	return csize;
51	52	}
...	...	@@ -229,7 +229,16 @@
229	229
230	230	spin_lock_irqsave(&pgd_lock, flags);
231	231	list_for_each_entry(page, &pgd_list, lru) {
232		- if (!vmalloc_sync_one(page_address(page), address))
	232	+ spinlock_t *pgt_lock;
	233	+ pmd_t *ret;
	234	+
	235	+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
	236	+
	237	+ spin_lock(pgt_lock);
	238	+ ret = vmalloc_sync_one(page_address(page), address);
	239	+ spin_unlock(pgt_lock);
	240	+
	241	+ if (!ret)
233	242	break;
234	243	}
235	244	spin_unlock_irqrestore(&pgd_lock, flags);
...	...	@@ -328,29 +337,7 @@
328	337
329	338	void vmalloc_sync_all(void)
330	339	{
331		- unsigned long address;
332		-
333		- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
334		- address += PGDIR_SIZE) {
335		-
336		- const pgd_t *pgd_ref = pgd_offset_k(address);
337		- unsigned long flags;
338		- struct page *page;
339		-
340		- if (pgd_none(*pgd_ref))
341		- continue;
342		-
343		- spin_lock_irqsave(&pgd_lock, flags);
344		- list_for_each_entry(page, &pgd_list, lru) {
345		- pgd_t *pgd;
346		- pgd = (pgd_t *)page_address(page) + pgd_index(address);
347		- if (pgd_none(*pgd))
348		- set_pgd(pgd, *pgd_ref);
349		- else
350		- BUG_ON(pgd_page_vaddr(pgd) != pgd_page_vaddr(pgd_ref));
351		- }
352		- spin_unlock_irqrestore(&pgd_lock, flags);
353		- }
	340	+ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
354	341	}
355	342
356	343	/*
357	344
...	...	@@ -898,8 +885,14 @@
898	885	if (pmd_large(*pmd))
899	886	return spurious_fault_check(error_code, (pte_t *) pmd);
900	887
	888	+ /*
	889	+ * Note: don't use pte_present() here, since it returns true
	890	+ * if the _PAGE_PROTNONE bit is set. However, this aliases the
	891	+ * _PAGE_GLOBAL bit, which for kernel pages give false positives
	892	+ * when CONFIG_DEBUG_PAGEALLOC is used.
	893	+ */
901	894	pte = pte_offset_kernel(pmd, address);
902		- if (!pte_present(*pte))
	895	+ if (!(pte_flags(*pte) & _PAGE_PRESENT))
903	896	return 0;
904	897
905	898	ret = spurious_fault_check(error_code, pte);
...	...	@@ -98,6 +98,43 @@
98	98	__setup("noexec32=", nonx32_setup);
99	99
100	100	/*
	101	+ * When memory was added/removed make sure all the processes MM have
	102	+ * suitable PGD entries in the local PGD level page.
	103	+ */
	104	+void sync_global_pgds(unsigned long start, unsigned long end)
	105	+{
	106	+ unsigned long address;
	107	+
	108	+ for (address = start; address <= end; address += PGDIR_SIZE) {
	109	+ const pgd_t *pgd_ref = pgd_offset_k(address);
	110	+ unsigned long flags;
	111	+ struct page *page;
	112	+
	113	+ if (pgd_none(*pgd_ref))
	114	+ continue;
	115	+
	116	+ spin_lock_irqsave(&pgd_lock, flags);
	117	+ list_for_each_entry(page, &pgd_list, lru) {
	118	+ pgd_t *pgd;
	119	+ spinlock_t *pgt_lock;
	120	+
	121	+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
	122	+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
	123	+ spin_lock(pgt_lock);
	124	+
	125	+ if (pgd_none(*pgd))
	126	+ set_pgd(pgd, *pgd_ref);
	127	+ else
	128	+ BUG_ON(pgd_page_vaddr(*pgd)
	129	+ != pgd_page_vaddr(*pgd_ref));
	130	+
	131	+ spin_unlock(pgt_lock);
	132	+ }
	133	+ spin_unlock_irqrestore(&pgd_lock, flags);
	134	+ }
	135	+}
	136	+
	137	+/*
101	138	* NOTE: This function is marked __ref because it calls __init function
102	139	* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
103	140	*/
104	141
105	142
...	...	@@ -534,11 +571,13 @@
534	571	unsigned long end,
535	572	unsigned long page_size_mask)
536	573	{
537		-
	574	+ bool pgd_changed = false;
538	575	unsigned long next, last_map_addr = end;
	576	+ unsigned long addr;
539	577
540	578	start = (unsigned long)__va(start);
541	579	end = (unsigned long)__va(end);
	580	+ addr = start;
542	581
543	582	for (; start < end; start = next) {
544	583	pgd_t *pgd = pgd_offset_k(start);
545	584
...	...	@@ -563,7 +602,12 @@
563	602	spin_lock(&init_mm.page_table_lock);
564	603	pgd_populate(&init_mm, pgd, __va(pud_phys));
565	604	spin_unlock(&init_mm.page_table_lock);
	605	+ pgd_changed = true;
566	606	}
	607	+
	608	+ if (pgd_changed)
	609	+ sync_global_pgds(addr, end);
	610	+
567	611	__flush_tlb_all();
568	612
569	613	return last_map_addr;
...	...	@@ -1003,6 +1047,7 @@
1003	1047	}
1004	1048
1005	1049	}
	1050	+ sync_global_pgds((unsigned long)start_page, end);
1006	1051	return 0;
1007	1052	}
1008	1053
...	...	@@ -9,7 +9,7 @@
9	9	b == 0xf0 \|\| b == 0xf2 \|\| b == 0xf3
10	10	/* Group 2 */
11	11	\|\| b == 0x2e \|\| b == 0x36 \|\| b == 0x3e \|\| b == 0x26
12		- \|\| b == 0x64 \|\| b == 0x65 \|\| b == 0x2e \|\| b == 0x3e
	12	+ \|\| b == 0x64 \|\| b == 0x65
13	13	/* Group 3 */
14	14	\|\| b == 0x66
15	15	/* Group 4 */