Commit 46bbffad54bd48bb809f2691c1970a79a588976b

Authored by Linus Torvalds

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, mm: Unify kernel_physical_mapping_init() API
  x86, mm: Allow highmem user page tables to be disabled at boot time
  x86: Do not reserve brk for DMI if it's not going to be used
  x86: Convert tlbstate_lock to raw_spinlock
  x86: Use the generic page_is_ram()
  x86: Remove BIOS data range from e820
  Move page_is_ram() declaration to mm.h
  Generic page_is_ram: use __weak
  resources: introduce generic page_is_ram()

Showing 14 changed files Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -2718,6 +2718,13 @@
2718 2718 medium is write-protected).
2719 2719 Example: quirks=0419:aaf5:rl,0421:0433:rc
2720 2720  
  2721 + userpte=
  2722 + [X86] Flags controlling user PTE allocations.
  2723 +
  2724 + nohigh = do not allocate PTE pages in
  2725 + HIGHMEM regardless of setting
  2726 + of CONFIG_HIGHPTE.
  2727 +
2721 2728 vdso= [X86,SH]
2722 2729 vdso=2: enable compat VDSO (default with COMPAT_VDSO)
2723 2730 vdso=1: enable VDSO (default)
... ... @@ -298,7 +298,7 @@
298 298 }
299 299  
300 300 #ifndef CONFIG_NEED_MULTIPLE_NODES
301   -static int __init page_is_ram(unsigned long pagenr)
  301 +int page_is_ram(unsigned long pagenr)
302 302 {
303 303 int i;
304 304  
arch/score/mm/init.c
... ... @@ -59,7 +59,7 @@
59 59 }
60 60  
61 61 #ifndef CONFIG_NEED_MULTIPLE_NODES
62   -static int __init page_is_ram(unsigned long pagenr)
  62 +int page_is_ram(unsigned long pagenr)
63 63 {
64 64 if (pagenr >= min_low_pfn && pagenr < max_low_pfn)
65 65 return 1;
arch/x86/include/asm/page_types.h
... ... @@ -40,7 +40,6 @@
40 40  
41 41 #ifndef __ASSEMBLY__
42 42  
43   -extern int page_is_ram(unsigned long pagenr);
44 43 extern int devmem_is_allowed(unsigned long pagenr);
45 44  
46 45 extern unsigned long max_low_pfn_mapped;
arch/x86/include/asm/pgalloc.h
... ... @@ -23,6 +23,11 @@
23 23 #endif
24 24  
25 25 /*
  26 + * Flags to use when allocating a user page table page.
  27 + */
  28 +extern gfp_t __userpte_alloc_gfp;
  29 +
  30 +/*
26 31 * Allocate and free page tables.
27 32 */
28 33 extern pgd_t *pgd_alloc(struct mm_struct *);
arch/x86/kernel/e820.c
... ... @@ -517,10 +517,18 @@
517 517 int checktype)
518 518 {
519 519 int i;
  520 + u64 end;
520 521 u64 real_removed_size = 0;
521 522  
522 523 if (size > (ULLONG_MAX - start))
523 524 size = ULLONG_MAX - start;
  525 +
  526 + end = start + size;
  527 + printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
  528 + (unsigned long long) start,
  529 + (unsigned long long) end);
  530 + e820_print_type(old_type);
  531 + printk(KERN_CONT "\n");
524 532  
525 533 for (i = 0; i < e820.nr_map; i++) {
526 534 struct e820entry *ei = &e820.map[i];
arch/x86/kernel/setup.c
... ... @@ -121,7 +121,9 @@
121 121 unsigned long max_low_pfn_mapped;
122 122 unsigned long max_pfn_mapped;
123 123  
  124 +#ifdef CONFIG_DMI
124 125 RESERVE_BRK(dmi_alloc, 65536);
  126 +#endif
125 127  
126 128 unsigned int boot_cpu_id __read_mostly;
127 129  
... ... @@ -667,6 +669,23 @@
667 669 {}
668 670 };
669 671  
  672 +static void __init trim_bios_range(void)
  673 +{
  674 + /*
  675 + * A special case is the first 4Kb of memory;
  676 + * This is a BIOS owned area, not kernel ram, but generally
  677 + * not listed as such in the E820 table.
  678 + */
  679 + e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
  680 + /*
  681 + * special case: Some BIOSen report the PC BIOS
  682 + * area (640->1Mb) as ram even though it is not.
  683 + * take them out.
  684 + */
  685 + e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
  686 + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
  687 +}
  688 +
670 689 /*
671 690 * Determine if we were loaded by an EFI loader. If so, then we have also been
672 691 * passed the efi memmap, systab, etc., so we should use these data structures
... ... @@ -830,7 +849,7 @@
830 849 insert_resource(&iomem_resource, &data_resource);
831 850 insert_resource(&iomem_resource, &bss_resource);
832 851  
833   -
  852 + trim_bios_range();
834 853 #ifdef CONFIG_X86_32
835 854 if (ppro_with_ram_bug()) {
836 855 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
... ... @@ -266,16 +266,9 @@
266 266 if (!after_bootmem)
267 267 find_early_table_space(end, use_pse, use_gbpages);
268 268  
269   -#ifdef CONFIG_X86_32
270 269 for (i = 0; i < nr_range; i++)
271   - kernel_physical_mapping_init(mr[i].start, mr[i].end,
272   - mr[i].page_size_mask);
273   - ret = end;
274   -#else /* CONFIG_X86_64 */
275   - for (i = 0; i < nr_range; i++)
276 270 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
277 271 mr[i].page_size_mask);
278   -#endif
279 272  
280 273 #ifdef CONFIG_X86_32
281 274 early_ioremap_page_table_range_init();
arch/x86/mm/init_32.c
... ... @@ -241,6 +241,7 @@
241 241 unsigned long page_size_mask)
242 242 {
243 243 int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
  244 + unsigned long last_map_addr = end;
244 245 unsigned long start_pfn, end_pfn;
245 246 pgd_t *pgd_base = swapper_pg_dir;
246 247 int pgd_idx, pmd_idx, pte_ofs;
247 248  
... ... @@ -341,9 +342,10 @@
341 342 prot = PAGE_KERNEL_EXEC;
342 343  
343 344 pages_4k++;
344   - if (mapping_iter == 1)
  345 + if (mapping_iter == 1) {
345 346 set_pte(pte, pfn_pte(pfn, init_prot));
346   - else
  347 + last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
  348 + } else
347 349 set_pte(pte, pfn_pte(pfn, prot));
348 350 }
349 351 }
... ... @@ -368,7 +370,7 @@
368 370 mapping_iter = 2;
369 371 goto repeat;
370 372 }
371   - return 0;
  373 + return last_map_addr;
372 374 }
373 375  
374 376 pte_t *kmap_pte;
arch/x86/mm/ioremap.c
... ... @@ -24,43 +24,6 @@
24 24  
25 25 #include "physaddr.h"
26 26  
27   -int page_is_ram(unsigned long pagenr)
28   -{
29   - resource_size_t addr, end;
30   - int i;
31   -
32   - /*
33   - * A special case is the first 4Kb of memory;
34   - * This is a BIOS owned area, not kernel ram, but generally
35   - * not listed as such in the E820 table.
36   - */
37   - if (pagenr == 0)
38   - return 0;
39   -
40   - /*
41   - * Second special case: Some BIOSen report the PC BIOS
42   - * area (640->1Mb) as ram even though it is not.
43   - */
44   - if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
45   - pagenr < (BIOS_END >> PAGE_SHIFT))
46   - return 0;
47   -
48   - for (i = 0; i < e820.nr_map; i++) {
49   - /*
50   - * Not usable memory:
51   - */
52   - if (e820.map[i].type != E820_RAM)
53   - continue;
54   - addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
55   - end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
56   -
57   -
58   - if ((pagenr >= addr) && (pagenr < end))
59   - return 1;
60   - }
61   - return 0;
62   -}
63   -
64 27 /*
65 28 * Fix up the linear direct mapping of the kernel to avoid cache attribute
66 29 * conflicts.
arch/x86/mm/pgtable.c
... ... @@ -6,6 +6,14 @@
6 6  
7 7 #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
8 8  
  9 +#ifdef CONFIG_HIGHPTE
  10 +#define PGALLOC_USER_GFP __GFP_HIGHMEM
  11 +#else
  12 +#define PGALLOC_USER_GFP 0
  13 +#endif
  14 +
  15 +gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
  16 +
9 17 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
10 18 {
11 19 return (pte_t *)__get_free_page(PGALLOC_GFP);
12 20  
... ... @@ -15,15 +23,28 @@
15 23 {
16 24 struct page *pte;
17 25  
18   -#ifdef CONFIG_HIGHPTE
19   - pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
20   -#else
21   - pte = alloc_pages(PGALLOC_GFP, 0);
22   -#endif
  26 + pte = alloc_pages(__userpte_alloc_gfp, 0);
23 27 if (pte)
24 28 pgtable_page_ctor(pte);
25 29 return pte;
26 30 }
  31 +
  32 +static int __init setup_userpte(char *arg)
  33 +{
  34 + if (!arg)
  35 + return -EINVAL;
  36 +
  37 + /*
  38 + * "userpte=nohigh" disables allocation of user pagetables in
  39 + * high memory.
  40 + */
  41 + if (strcmp(arg, "nohigh") == 0)
  42 + __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
  43 + else
  44 + return -EINVAL;
  45 + return 0;
  46 +}
  47 +early_param("userpte", setup_userpte);
27 48  
28 49 void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
29 50 {
... ... @@ -41,7 +41,7 @@
41 41 struct {
42 42 struct mm_struct *flush_mm;
43 43 unsigned long flush_va;
44   - spinlock_t tlbstate_lock;
  44 + raw_spinlock_t tlbstate_lock;
45 45 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
46 46 };
47 47 char pad[INTERNODE_CACHE_BYTES];
... ... @@ -181,7 +181,7 @@
181 181 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
182 182 * probably not worth checking this for a cache-hot lock.
183 183 */
184   - spin_lock(&f->tlbstate_lock);
  184 + raw_spin_lock(&f->tlbstate_lock);
185 185  
186 186 f->flush_mm = mm;
187 187 f->flush_va = va;
... ... @@ -199,7 +199,7 @@
199 199  
200 200 f->flush_mm = NULL;
201 201 f->flush_va = 0;
202   - spin_unlock(&f->tlbstate_lock);
  202 + raw_spin_unlock(&f->tlbstate_lock);
203 203 }
204 204  
205 205 void native_flush_tlb_others(const struct cpumask *cpumask,
... ... @@ -223,7 +223,7 @@
223 223 int i;
224 224  
225 225 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
226   - spin_lock_init(&flush_state[i].tlbstate_lock);
  226 + raw_spin_lock_init(&flush_state[i].tlbstate_lock);
227 227  
228 228 return 0;
229 229 }
... ... @@ -265,6 +265,8 @@
265 265 return atomic_inc_not_zero(&page->_count);
266 266 }
267 267  
  268 +extern int page_is_ram(unsigned long pfn);
  269 +
268 270 /* Support for virtually mapped pages */
269 271 struct page *vmalloc_to_page(const void *addr);
270 272 unsigned long vmalloc_to_pfn(const void *addr);
... ... @@ -327,6 +327,19 @@
327 327  
328 328 #endif
329 329  
  330 +static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
  331 +{
  332 + return 1;
  333 +}
  334 +/*
  335 + * This generic page_is_ram() returns true if specified address is
  336 + * registered as "System RAM" in iomem_resource list.
  337 + */
  338 +int __weak page_is_ram(unsigned long pfn)
  339 +{
  340 + return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
  341 +}
  342 +
330 343 /*
331 344 * Find empty slot in the resource tree given range and alignment.
332 345 */