Commit 919ee677b656c52c5f86d3d916786891220d5452
1 parent
1f261ef53b
Exists in
master
and in
7 other branches
[SPARC64]: Add NUMA support.
Currently there is only code to parse NUMA attributes on sun4v/niagara systems, but later on we will add such parsing for older systems. Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 7 changed files with 881 additions and 138 deletions Side-by-side Diff
Makefile
arch/sparc64/Kconfig
... | ... | @@ -250,6 +250,26 @@ |
250 | 250 | |
251 | 251 | endmenu |
252 | 252 | |
253 | +config NUMA | |
254 | + bool "NUMA support" | |
255 | + | |
256 | +config NODES_SHIFT | |
257 | + int | |
258 | + default "4" | |
259 | + depends on NEED_MULTIPLE_NODES | |
260 | + | |
261 | +# Some NUMA nodes have memory ranges that span | |
262 | +# other nodes. Even though a pfn is valid and | |
263 | +# between a node's start and end pfns, it may not | |
264 | +# reside on that node. See memmap_init_zone() | |
265 | +# for details. | |
266 | +config NODES_SPAN_OTHER_NODES | |
267 | + def_bool y | |
268 | + depends on NEED_MULTIPLE_NODES | |
269 | + | |
270 | +config ARCH_POPULATES_NODE_MAP | |
271 | + def_bool y | |
272 | + | |
253 | 273 | config ARCH_SELECT_MEMORY_MODEL |
254 | 274 | def_bool y |
255 | 275 |
arch/sparc64/defconfig
1 | 1 | # |
2 | 2 | # Automatically generated make config: don't edit |
3 | -# Linux kernel version: 2.6.25 | |
4 | -# Sun Apr 20 01:33:21 2008 | |
3 | +# Linux kernel version: 2.6.25-numa | |
4 | +# Wed Apr 23 04:49:08 2008 | |
5 | 5 | # |
6 | 6 | CONFIG_SPARC=y |
7 | 7 | CONFIG_SPARC64=y |
... | ... | @@ -152,6 +152,8 @@ |
152 | 152 | CONFIG_HUGETLB_PAGE_SIZE_4MB=y |
153 | 153 | # CONFIG_HUGETLB_PAGE_SIZE_512K is not set |
154 | 154 | # CONFIG_HUGETLB_PAGE_SIZE_64K is not set |
155 | +# CONFIG_NUMA is not set | |
156 | +CONFIG_ARCH_POPULATES_NODE_MAP=y | |
155 | 157 | CONFIG_ARCH_SELECT_MEMORY_MODEL=y |
156 | 158 | CONFIG_ARCH_SPARSEMEM_ENABLE=y |
157 | 159 | CONFIG_ARCH_SPARSEMEM_DEFAULT=y |
... | ... | @@ -787,7 +789,6 @@ |
787 | 789 | # CONFIG_SENSORS_PCF8574 is not set |
788 | 790 | # CONFIG_PCF8575 is not set |
789 | 791 | # CONFIG_SENSORS_PCF8591 is not set |
790 | -# CONFIG_TPS65010 is not set | |
791 | 792 | # CONFIG_SENSORS_MAX6875 is not set |
792 | 793 | # CONFIG_SENSORS_TSL2550 is not set |
793 | 794 | # CONFIG_I2C_DEBUG_CORE is not set |
... | ... | @@ -869,6 +870,7 @@ |
869 | 870 | # Multifunction device drivers |
870 | 871 | # |
871 | 872 | # CONFIG_MFD_SM501 is not set |
873 | +# CONFIG_HTC_PASIC3 is not set | |
872 | 874 | |
873 | 875 | # |
874 | 876 | # Multimedia devices |
... | ... | @@ -1219,10 +1221,6 @@ |
1219 | 1221 | # CONFIG_NEW_LEDS is not set |
1220 | 1222 | # CONFIG_INFINIBAND is not set |
1221 | 1223 | # CONFIG_RTC_CLASS is not set |
1222 | - | |
1223 | -# | |
1224 | -# Userspace I/O | |
1225 | -# | |
1226 | 1224 | # CONFIG_UIO is not set |
1227 | 1225 | |
1228 | 1226 | # |
... | ... | @@ -1399,6 +1397,7 @@ |
1399 | 1397 | CONFIG_DEBUG_BUGVERBOSE=y |
1400 | 1398 | # CONFIG_DEBUG_INFO is not set |
1401 | 1399 | # CONFIG_DEBUG_VM is not set |
1400 | +# CONFIG_DEBUG_WRITECOUNT is not set | |
1402 | 1401 | # CONFIG_DEBUG_LIST is not set |
1403 | 1402 | # CONFIG_DEBUG_SG is not set |
1404 | 1403 | # CONFIG_BOOT_PRINTK_DELAY is not set |
1405 | 1404 | |
1406 | 1405 | |
1407 | 1406 | |
1408 | 1407 | |
1409 | 1408 | |
1410 | 1409 | |
1411 | 1410 | |
1412 | 1411 | |
1413 | 1412 | |
1414 | 1413 | |
1415 | 1414 | |
... | ... | @@ -1425,53 +1424,82 @@ |
1425 | 1424 | CONFIG_ASYNC_MEMCPY=m |
1426 | 1425 | CONFIG_ASYNC_XOR=m |
1427 | 1426 | CONFIG_CRYPTO=y |
1427 | + | |
1428 | +# | |
1429 | +# Crypto core or helper | |
1430 | +# | |
1428 | 1431 | CONFIG_CRYPTO_ALGAPI=y |
1429 | 1432 | CONFIG_CRYPTO_AEAD=y |
1430 | 1433 | CONFIG_CRYPTO_BLKCIPHER=y |
1431 | -# CONFIG_CRYPTO_SEQIV is not set | |
1432 | 1434 | CONFIG_CRYPTO_HASH=y |
1433 | 1435 | CONFIG_CRYPTO_MANAGER=y |
1436 | +CONFIG_CRYPTO_GF128MUL=m | |
1437 | +CONFIG_CRYPTO_NULL=m | |
1438 | +# CONFIG_CRYPTO_CRYPTD is not set | |
1439 | +CONFIG_CRYPTO_AUTHENC=y | |
1440 | +CONFIG_CRYPTO_TEST=m | |
1441 | + | |
1442 | +# | |
1443 | +# Authenticated Encryption with Associated Data | |
1444 | +# | |
1445 | +# CONFIG_CRYPTO_CCM is not set | |
1446 | +# CONFIG_CRYPTO_GCM is not set | |
1447 | +# CONFIG_CRYPTO_SEQIV is not set | |
1448 | + | |
1449 | +# | |
1450 | +# Block modes | |
1451 | +# | |
1452 | +CONFIG_CRYPTO_CBC=y | |
1453 | +# CONFIG_CRYPTO_CTR is not set | |
1454 | +# CONFIG_CRYPTO_CTS is not set | |
1455 | +CONFIG_CRYPTO_ECB=m | |
1456 | +CONFIG_CRYPTO_LRW=m | |
1457 | +CONFIG_CRYPTO_PCBC=m | |
1458 | +CONFIG_CRYPTO_XTS=m | |
1459 | + | |
1460 | +# | |
1461 | +# Hash modes | |
1462 | +# | |
1434 | 1463 | CONFIG_CRYPTO_HMAC=y |
1435 | 1464 | CONFIG_CRYPTO_XCBC=y |
1436 | -CONFIG_CRYPTO_NULL=m | |
1465 | + | |
1466 | +# | |
1467 | +# Digest | |
1468 | +# | |
1469 | +CONFIG_CRYPTO_CRC32C=m | |
1437 | 1470 | CONFIG_CRYPTO_MD4=y |
1438 | 1471 | CONFIG_CRYPTO_MD5=y |
1472 | +CONFIG_CRYPTO_MICHAEL_MIC=m | |
1439 | 1473 | CONFIG_CRYPTO_SHA1=y |
1440 | 1474 | CONFIG_CRYPTO_SHA256=m |
1441 | 1475 | CONFIG_CRYPTO_SHA512=m |
1442 | -CONFIG_CRYPTO_WP512=m | |
1443 | 1476 | CONFIG_CRYPTO_TGR192=m |
1444 | -CONFIG_CRYPTO_GF128MUL=m | |
1445 | -CONFIG_CRYPTO_ECB=m | |
1446 | -CONFIG_CRYPTO_CBC=y | |
1447 | -CONFIG_CRYPTO_PCBC=m | |
1448 | -CONFIG_CRYPTO_LRW=m | |
1449 | -CONFIG_CRYPTO_XTS=m | |
1450 | -# CONFIG_CRYPTO_CTR is not set | |
1451 | -# CONFIG_CRYPTO_GCM is not set | |
1452 | -# CONFIG_CRYPTO_CCM is not set | |
1453 | -# CONFIG_CRYPTO_CRYPTD is not set | |
1454 | -CONFIG_CRYPTO_DES=y | |
1455 | -CONFIG_CRYPTO_FCRYPT=m | |
1456 | -CONFIG_CRYPTO_BLOWFISH=m | |
1457 | -CONFIG_CRYPTO_TWOFISH=m | |
1458 | -CONFIG_CRYPTO_TWOFISH_COMMON=m | |
1459 | -CONFIG_CRYPTO_SERPENT=m | |
1477 | +CONFIG_CRYPTO_WP512=m | |
1478 | + | |
1479 | +# | |
1480 | +# Ciphers | |
1481 | +# | |
1460 | 1482 | CONFIG_CRYPTO_AES=m |
1483 | +CONFIG_CRYPTO_ANUBIS=m | |
1484 | +CONFIG_CRYPTO_ARC4=m | |
1485 | +CONFIG_CRYPTO_BLOWFISH=m | |
1486 | +CONFIG_CRYPTO_CAMELLIA=m | |
1461 | 1487 | CONFIG_CRYPTO_CAST5=m |
1462 | 1488 | CONFIG_CRYPTO_CAST6=m |
1463 | -CONFIG_CRYPTO_TEA=m | |
1464 | -CONFIG_CRYPTO_ARC4=m | |
1489 | +CONFIG_CRYPTO_DES=y | |
1490 | +CONFIG_CRYPTO_FCRYPT=m | |
1465 | 1491 | CONFIG_CRYPTO_KHAZAD=m |
1466 | -CONFIG_CRYPTO_ANUBIS=m | |
1467 | -CONFIG_CRYPTO_SEED=m | |
1468 | 1492 | # CONFIG_CRYPTO_SALSA20 is not set |
1493 | +CONFIG_CRYPTO_SEED=m | |
1494 | +CONFIG_CRYPTO_SERPENT=m | |
1495 | +CONFIG_CRYPTO_TEA=m | |
1496 | +CONFIG_CRYPTO_TWOFISH=m | |
1497 | +CONFIG_CRYPTO_TWOFISH_COMMON=m | |
1498 | + | |
1499 | +# | |
1500 | +# Compression | |
1501 | +# | |
1469 | 1502 | CONFIG_CRYPTO_DEFLATE=y |
1470 | -CONFIG_CRYPTO_MICHAEL_MIC=m | |
1471 | -CONFIG_CRYPTO_CRC32C=m | |
1472 | -CONFIG_CRYPTO_CAMELLIA=m | |
1473 | -CONFIG_CRYPTO_TEST=m | |
1474 | -CONFIG_CRYPTO_AUTHENC=y | |
1475 | 1503 | # CONFIG_CRYPTO_LZO is not set |
1476 | 1504 | CONFIG_CRYPTO_HW=y |
1477 | 1505 | # CONFIG_CRYPTO_DEV_HIFN_795X is not set |
... | ... | @@ -1492,4 +1520,5 @@ |
1492 | 1520 | CONFIG_HAS_IOMEM=y |
1493 | 1521 | CONFIG_HAS_IOPORT=y |
1494 | 1522 | CONFIG_HAS_DMA=y |
1523 | +CONFIG_HAVE_LMB=y |
arch/sparc64/kernel/sysfs.c
... | ... | @@ -273,9 +273,21 @@ |
273 | 273 | mmu_stats_supported = 1; |
274 | 274 | } |
275 | 275 | |
276 | +static void register_nodes(void) | |
277 | +{ | |
278 | +#ifdef CONFIG_NUMA | |
279 | + int i; | |
280 | + | |
281 | + for (i = 0; i < MAX_NUMNODES; i++) | |
282 | + register_one_node(i); | |
283 | +#endif | |
284 | +} | |
285 | + | |
276 | 286 | static int __init topology_init(void) |
277 | 287 | { |
278 | 288 | int cpu; |
289 | + | |
290 | + register_nodes(); | |
279 | 291 | |
280 | 292 | check_mmu_stats(); |
281 | 293 |
arch/sparc64/mm/init.c
... | ... | @@ -25,6 +25,7 @@ |
25 | 25 | #include <linux/sort.h> |
26 | 26 | #include <linux/percpu.h> |
27 | 27 | #include <linux/lmb.h> |
28 | +#include <linux/mmzone.h> | |
28 | 29 | |
29 | 30 | #include <asm/head.h> |
30 | 31 | #include <asm/system.h> |
31 | 32 | |
... | ... | @@ -73,9 +74,7 @@ |
73 | 74 | #define MAX_BANKS 32 |
74 | 75 | |
75 | 76 | static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; |
76 | -static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; | |
77 | 77 | static int pavail_ents __initdata; |
78 | -static int pavail_rescan_ents __initdata; | |
79 | 78 | |
80 | 79 | static int cmp_p64(const void *a, const void *b) |
81 | 80 | { |
82 | 81 | |
83 | 82 | |
84 | 83 | |
85 | 84 | |
86 | 85 | |
... | ... | @@ -716,20 +715,29 @@ |
716 | 715 | smp_new_mmu_context_version(); |
717 | 716 | } |
718 | 717 | |
719 | -/* Find a free area for the bootmem map, avoiding the kernel image | |
720 | - * and the initial ramdisk. | |
721 | - */ | |
722 | -static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn, | |
723 | - unsigned long end_pfn) | |
718 | +static int numa_enabled = 1; | |
719 | +static int numa_debug; | |
720 | + | |
721 | +static int __init early_numa(char *p) | |
724 | 722 | { |
725 | - unsigned long bootmap_size; | |
723 | + if (!p) | |
724 | + return 0; | |
726 | 725 | |
727 | - bootmap_size = bootmem_bootmap_pages(end_pfn - start_pfn); | |
728 | - bootmap_size <<= PAGE_SHIFT; | |
726 | + if (strstr(p, "off")) | |
727 | + numa_enabled = 0; | |
729 | 728 | |
730 | - return lmb_alloc(bootmap_size, PAGE_SIZE) >> PAGE_SHIFT; | |
729 | + if (strstr(p, "debug")) | |
730 | + numa_debug = 1; | |
731 | + | |
732 | + return 0; | |
731 | 733 | } |
734 | +early_param("numa", early_numa); | |
732 | 735 | |
736 | +#define numadbg(f, a...) \ | |
737 | +do { if (numa_debug) \ | |
738 | + printk(KERN_INFO f, ## a); \ | |
739 | +} while (0) | |
740 | + | |
733 | 741 | static void __init find_ramdisk(unsigned long phys_base) |
734 | 742 | { |
735 | 743 | #ifdef CONFIG_BLK_DEV_INITRD |
... | ... | @@ -755,6 +763,9 @@ |
755 | 763 | ramdisk_image -= KERNBASE; |
756 | 764 | ramdisk_image += phys_base; |
757 | 765 | |
766 | + numadbg("Found ramdisk at physical address 0x%lx, size %u\n", | |
767 | + ramdisk_image, sparc_ramdisk_size); | |
768 | + | |
758 | 769 | initrd_start = ramdisk_image; |
759 | 770 | initrd_end = ramdisk_image + sparc_ramdisk_size; |
760 | 771 | |
761 | 772 | |
762 | 773 | |
763 | 774 | |
764 | 775 | |
765 | 776 | |
766 | 777 | |
767 | 778 | |
768 | 779 | |
769 | 780 | |
770 | 781 | |
771 | 782 | |
772 | 783 | |
773 | 784 | |
774 | 785 | |
... | ... | @@ -763,61 +774,626 @@ |
763 | 774 | #endif |
764 | 775 | } |
765 | 776 | |
766 | -/* About pages_avail, this is the value we will use to calculate | |
767 | - * the zholes_size[] argument given to free_area_init_node(). The | |
768 | - * page allocator uses this to calculate nr_kernel_pages, | |
769 | - * nr_all_pages and zone->present_pages. On NUMA it is used | |
770 | - * to calculate zone->min_unmapped_pages and zone->min_slab_pages. | |
771 | - * | |
772 | - * So this number should really be set to what the page allocator | |
773 | - * actually ends up with. This means: | |
774 | - * 1) It should include bootmem map pages, we'll release those. | |
775 | - * 2) It should not include the kernel image, except for the | |
776 | - * __init sections which we will also release. | |
777 | - * 3) It should include the initrd image, since we'll release | |
778 | - * that too. | |
777 | +struct node_mem_mask { | |
778 | + unsigned long mask; | |
779 | + unsigned long val; | |
780 | + unsigned long bootmem_paddr; | |
781 | +}; | |
782 | +static struct node_mem_mask node_masks[MAX_NUMNODES]; | |
783 | +static int num_node_masks; | |
784 | + | |
785 | +int numa_cpu_lookup_table[NR_CPUS]; | |
786 | +cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; | |
787 | + | |
788 | +#ifdef CONFIG_NEED_MULTIPLE_NODES | |
789 | +static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; | |
790 | + | |
791 | +struct mdesc_mblock { | |
792 | + u64 base; | |
793 | + u64 size; | |
794 | + u64 offset; /* RA-to-PA */ | |
795 | +}; | |
796 | +static struct mdesc_mblock *mblocks; | |
797 | +static int num_mblocks; | |
798 | + | |
799 | +static unsigned long ra_to_pa(unsigned long addr) | |
800 | +{ | |
801 | + int i; | |
802 | + | |
803 | + for (i = 0; i < num_mblocks; i++) { | |
804 | + struct mdesc_mblock *m = &mblocks[i]; | |
805 | + | |
806 | + if (addr >= m->base && | |
807 | + addr < (m->base + m->size)) { | |
808 | + addr += m->offset; | |
809 | + break; | |
810 | + } | |
811 | + } | |
812 | + return addr; | |
813 | +} | |
814 | + | |
815 | +static int find_node(unsigned long addr) | |
816 | +{ | |
817 | + int i; | |
818 | + | |
819 | + addr = ra_to_pa(addr); | |
820 | + for (i = 0; i < num_node_masks; i++) { | |
821 | + struct node_mem_mask *p = &node_masks[i]; | |
822 | + | |
823 | + if ((addr & p->mask) == p->val) | |
824 | + return i; | |
825 | + } | |
826 | + return -1; | |
827 | +} | |
828 | + | |
829 | +static unsigned long nid_range(unsigned long start, unsigned long end, | |
830 | + int *nid) | |
831 | +{ | |
832 | + *nid = find_node(start); | |
833 | + start += PAGE_SIZE; | |
834 | + while (start < end) { | |
835 | + int n = find_node(start); | |
836 | + | |
837 | + if (n != *nid) | |
838 | + break; | |
839 | + start += PAGE_SIZE; | |
840 | + } | |
841 | + | |
842 | + return start; | |
843 | +} | |
844 | +#else | |
845 | +static unsigned long nid_range(unsigned long start, unsigned long end, | |
846 | + int *nid) | |
847 | +{ | |
848 | + *nid = 0; | |
849 | + return end; | |
850 | +} | |
851 | +#endif | |
852 | + | |
853 | +/* This must be invoked after performing all of the necessary | |
854 | + * add_active_range() calls for 'nid'. We need to be able to get | |
855 | + * correct data from get_pfn_range_for_nid(). | |
779 | 856 | */ |
780 | -static unsigned long __init bootmem_init(unsigned long *pages_avail, | |
781 | - unsigned long phys_base) | |
857 | +static void __init allocate_node_data(int nid) | |
782 | 858 | { |
783 | - unsigned long end_pfn; | |
859 | + unsigned long paddr, num_pages, start_pfn, end_pfn; | |
860 | + struct pglist_data *p; | |
861 | + | |
862 | +#ifdef CONFIG_NEED_MULTIPLE_NODES | |
863 | + paddr = lmb_alloc_nid(sizeof(struct pglist_data), | |
864 | + SMP_CACHE_BYTES, nid, nid_range); | |
865 | + if (!paddr) { | |
866 | + prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); | |
867 | + prom_halt(); | |
868 | + } | |
869 | + NODE_DATA(nid) = __va(paddr); | |
870 | + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | |
871 | + | |
872 | + NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; | |
873 | +#endif | |
874 | + | |
875 | + p = NODE_DATA(nid); | |
876 | + | |
877 | + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); | |
878 | + p->node_start_pfn = start_pfn; | |
879 | + p->node_spanned_pages = end_pfn - start_pfn; | |
880 | + | |
881 | + if (p->node_spanned_pages) { | |
882 | + num_pages = bootmem_bootmap_pages(p->node_spanned_pages); | |
883 | + | |
884 | + paddr = lmb_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid, | |
885 | + nid_range); | |
886 | + if (!paddr) { | |
887 | + prom_printf("Cannot allocate bootmap for nid[%d]\n", | |
888 | + nid); | |
889 | + prom_halt(); | |
890 | + } | |
891 | + node_masks[nid].bootmem_paddr = paddr; | |
892 | + } | |
893 | +} | |
894 | + | |
895 | +static void init_node_masks_nonnuma(void) | |
896 | +{ | |
784 | 897 | int i; |
785 | 898 | |
786 | - *pages_avail = lmb_phys_mem_size() >> PAGE_SHIFT; | |
787 | - end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; | |
899 | + numadbg("Initializing tables for non-numa.\n"); | |
788 | 900 | |
789 | - /* Initialize the boot-time allocator. */ | |
790 | - max_pfn = max_low_pfn = end_pfn; | |
791 | - min_low_pfn = (phys_base >> PAGE_SHIFT); | |
901 | + node_masks[0].mask = node_masks[0].val = 0; | |
902 | + num_node_masks = 1; | |
792 | 903 | |
793 | - init_bootmem_node(NODE_DATA(0), | |
794 | - choose_bootmap_pfn(min_low_pfn, end_pfn), | |
795 | - min_low_pfn, end_pfn); | |
904 | + for (i = 0; i < NR_CPUS; i++) | |
905 | + numa_cpu_lookup_table[i] = 0; | |
796 | 906 | |
797 | - /* Now register the available physical memory with the | |
798 | - * allocator. | |
799 | - */ | |
800 | - for (i = 0; i < lmb.memory.cnt; i++) | |
801 | - free_bootmem(lmb.memory.region[i].base, | |
802 | - lmb_size_bytes(&lmb.memory, i)); | |
907 | + numa_cpumask_lookup_table[0] = CPU_MASK_ALL; | |
908 | +} | |
803 | 909 | |
804 | - for (i = 0; i < lmb.reserved.cnt; i++) | |
805 | - reserve_bootmem(lmb.reserved.region[i].base, | |
806 | - lmb_size_bytes(&lmb.reserved, i), | |
807 | - BOOTMEM_DEFAULT); | |
910 | +#ifdef CONFIG_NEED_MULTIPLE_NODES | |
911 | +struct pglist_data *node_data[MAX_NUMNODES]; | |
808 | 912 | |
809 | - *pages_avail -= PAGE_ALIGN(kern_size) >> PAGE_SHIFT; | |
913 | +EXPORT_SYMBOL(numa_cpu_lookup_table); | |
914 | +EXPORT_SYMBOL(numa_cpumask_lookup_table); | |
915 | +EXPORT_SYMBOL(node_data); | |
810 | 916 | |
811 | - for (i = 0; i < lmb.memory.cnt; ++i) { | |
812 | - unsigned long start_pfn, end_pfn, pages; | |
917 | +struct mdesc_mlgroup { | |
918 | + u64 node; | |
919 | + u64 latency; | |
920 | + u64 match; | |
921 | + u64 mask; | |
922 | +}; | |
923 | +static struct mdesc_mlgroup *mlgroups; | |
924 | +static int num_mlgroups; | |
813 | 925 | |
814 | - pages = lmb_size_pages(&lmb.memory, i); | |
926 | +static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio, | |
927 | + u32 cfg_handle) | |
928 | +{ | |
929 | + u64 arc; | |
930 | + | |
931 | + mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) { | |
932 | + u64 target = mdesc_arc_target(md, arc); | |
933 | + const u64 *val; | |
934 | + | |
935 | + val = mdesc_get_property(md, target, | |
936 | + "cfg-handle", NULL); | |
937 | + if (val && *val == cfg_handle) | |
938 | + return 0; | |
939 | + } | |
940 | + return -ENODEV; | |
941 | +} | |
942 | + | |
943 | +static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp, | |
944 | + u32 cfg_handle) | |
945 | +{ | |
946 | + u64 arc, candidate, best_latency = ~(u64)0; | |
947 | + | |
948 | + candidate = MDESC_NODE_NULL; | |
949 | + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { | |
950 | + u64 target = mdesc_arc_target(md, arc); | |
951 | + const char *name = mdesc_node_name(md, target); | |
952 | + const u64 *val; | |
953 | + | |
954 | + if (strcmp(name, "pio-latency-group")) | |
955 | + continue; | |
956 | + | |
957 | + val = mdesc_get_property(md, target, "latency", NULL); | |
958 | + if (!val) | |
959 | + continue; | |
960 | + | |
961 | + if (*val < best_latency) { | |
962 | + candidate = target; | |
963 | + best_latency = *val; | |
964 | + } | |
965 | + } | |
966 | + | |
967 | + if (candidate == MDESC_NODE_NULL) | |
968 | + return -ENODEV; | |
969 | + | |
970 | + return scan_pio_for_cfg_handle(md, candidate, cfg_handle); | |
971 | +} | |
972 | + | |
973 | +int of_node_to_nid(struct device_node *dp) | |
974 | +{ | |
975 | + const struct linux_prom64_registers *regs; | |
976 | + struct mdesc_handle *md; | |
977 | + u32 cfg_handle; | |
978 | + int count, nid; | |
979 | + u64 grp; | |
980 | + | |
981 | + if (!mlgroups) | |
982 | + return -1; | |
983 | + | |
984 | + regs = of_get_property(dp, "reg", NULL); | |
985 | + if (!regs) | |
986 | + return -1; | |
987 | + | |
988 | + cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff; | |
989 | + | |
990 | + md = mdesc_grab(); | |
991 | + | |
992 | + count = 0; | |
993 | + nid = -1; | |
994 | + mdesc_for_each_node_by_name(md, grp, "group") { | |
995 | + if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) { | |
996 | + nid = count; | |
997 | + break; | |
998 | + } | |
999 | + count++; | |
1000 | + } | |
1001 | + | |
1002 | + mdesc_release(md); | |
1003 | + | |
1004 | + return nid; | |
1005 | +} | |
1006 | + | |
1007 | +static void add_node_ranges(void) | |
1008 | +{ | |
1009 | + int i; | |
1010 | + | |
1011 | + for (i = 0; i < lmb.memory.cnt; i++) { | |
1012 | + unsigned long size = lmb_size_bytes(&lmb.memory, i); | |
1013 | + unsigned long start, end; | |
1014 | + | |
1015 | + start = lmb.memory.region[i].base; | |
1016 | + end = start + size; | |
1017 | + while (start < end) { | |
1018 | + unsigned long this_end; | |
1019 | + int nid; | |
1020 | + | |
1021 | + this_end = nid_range(start, end, &nid); | |
1022 | + | |
1023 | + numadbg("Adding active range nid[%d] " | |
1024 | + "start[%lx] end[%lx]\n", | |
1025 | + nid, start, this_end); | |
1026 | + | |
1027 | + add_active_range(nid, | |
1028 | + start >> PAGE_SHIFT, | |
1029 | + this_end >> PAGE_SHIFT); | |
1030 | + | |
1031 | + start = this_end; | |
1032 | + } | |
1033 | + } | |
1034 | +} | |
1035 | + | |
1036 | +static int __init grab_mlgroups(struct mdesc_handle *md) | |
1037 | +{ | |
1038 | + unsigned long paddr; | |
1039 | + int count = 0; | |
1040 | + u64 node; | |
1041 | + | |
1042 | + mdesc_for_each_node_by_name(md, node, "memory-latency-group") | |
1043 | + count++; | |
1044 | + if (!count) | |
1045 | + return -ENOENT; | |
1046 | + | |
1047 | + paddr = lmb_alloc(count * sizeof(struct mdesc_mlgroup), | |
1048 | + SMP_CACHE_BYTES); | |
1049 | + if (!paddr) | |
1050 | + return -ENOMEM; | |
1051 | + | |
1052 | + mlgroups = __va(paddr); | |
1053 | + num_mlgroups = count; | |
1054 | + | |
1055 | + count = 0; | |
1056 | + mdesc_for_each_node_by_name(md, node, "memory-latency-group") { | |
1057 | + struct mdesc_mlgroup *m = &mlgroups[count++]; | |
1058 | + const u64 *val; | |
1059 | + | |
1060 | + m->node = node; | |
1061 | + | |
1062 | + val = mdesc_get_property(md, node, "latency", NULL); | |
1063 | + m->latency = *val; | |
1064 | + val = mdesc_get_property(md, node, "address-match", NULL); | |
1065 | + m->match = *val; | |
1066 | + val = mdesc_get_property(md, node, "address-mask", NULL); | |
1067 | + m->mask = *val; | |
1068 | + | |
1069 | + numadbg("MLGROUP[%d]: node[%lx] latency[%lx] " | |
1070 | + "match[%lx] mask[%lx]\n", | |
1071 | + count - 1, m->node, m->latency, m->match, m->mask); | |
1072 | + } | |
1073 | + | |
1074 | + return 0; | |
1075 | +} | |
1076 | + | |
1077 | +static int __init grab_mblocks(struct mdesc_handle *md) | |
1078 | +{ | |
1079 | + unsigned long paddr; | |
1080 | + int count = 0; | |
1081 | + u64 node; | |
1082 | + | |
1083 | + mdesc_for_each_node_by_name(md, node, "mblock") | |
1084 | + count++; | |
1085 | + if (!count) | |
1086 | + return -ENOENT; | |
1087 | + | |
1088 | + paddr = lmb_alloc(count * sizeof(struct mdesc_mblock), | |
1089 | + SMP_CACHE_BYTES); | |
1090 | + if (!paddr) | |
1091 | + return -ENOMEM; | |
1092 | + | |
1093 | + mblocks = __va(paddr); | |
1094 | + num_mblocks = count; | |
1095 | + | |
1096 | + count = 0; | |
1097 | + mdesc_for_each_node_by_name(md, node, "mblock") { | |
1098 | + struct mdesc_mblock *m = &mblocks[count++]; | |
1099 | + const u64 *val; | |
1100 | + | |
1101 | + val = mdesc_get_property(md, node, "base", NULL); | |
1102 | + m->base = *val; | |
1103 | + val = mdesc_get_property(md, node, "size", NULL); | |
1104 | + m->size = *val; | |
1105 | + val = mdesc_get_property(md, node, | |
1106 | + "address-congruence-offset", NULL); | |
1107 | + m->offset = *val; | |
1108 | + | |
1109 | + numadbg("MBLOCK[%d]: base[%lx] size[%lx] offset[%lx]\n", | |
1110 | + count - 1, m->base, m->size, m->offset); | |
1111 | + } | |
1112 | + | |
1113 | + return 0; | |
1114 | +} | |
1115 | + | |
1116 | +static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, | |
1117 | + u64 grp, cpumask_t *mask) | |
1118 | +{ | |
1119 | + u64 arc; | |
1120 | + | |
1121 | + cpus_clear(*mask); | |
1122 | + | |
1123 | + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { | |
1124 | + u64 target = mdesc_arc_target(md, arc); | |
1125 | + const char *name = mdesc_node_name(md, target); | |
1126 | + const u64 *id; | |
1127 | + | |
1128 | + if (strcmp(name, "cpu")) | |
1129 | + continue; | |
1130 | + id = mdesc_get_property(md, target, "id", NULL); | |
1131 | + if (*id < NR_CPUS) | |
1132 | + cpu_set(*id, *mask); | |
1133 | + } | |
1134 | +} | |
1135 | + | |
1136 | +static struct mdesc_mlgroup * __init find_mlgroup(u64 node) | |
1137 | +{ | |
1138 | + int i; | |
1139 | + | |
1140 | + for (i = 0; i < num_mlgroups; i++) { | |
1141 | + struct mdesc_mlgroup *m = &mlgroups[i]; | |
1142 | + if (m->node == node) | |
1143 | + return m; | |
1144 | + } | |
1145 | + return NULL; | |
1146 | +} | |
1147 | + | |
1148 | +static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, | |
1149 | + int index) | |
1150 | +{ | |
1151 | + struct mdesc_mlgroup *candidate = NULL; | |
1152 | + u64 arc, best_latency = ~(u64)0; | |
1153 | + struct node_mem_mask *n; | |
1154 | + | |
1155 | + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { | |
1156 | + u64 target = mdesc_arc_target(md, arc); | |
1157 | + struct mdesc_mlgroup *m = find_mlgroup(target); | |
1158 | + if (!m) | |
1159 | + continue; | |
1160 | + if (m->latency < best_latency) { | |
1161 | + candidate = m; | |
1162 | + best_latency = m->latency; | |
1163 | + } | |
1164 | + } | |
1165 | + if (!candidate) | |
1166 | + return -ENOENT; | |
1167 | + | |
1168 | + if (num_node_masks != index) { | |
1169 | + printk(KERN_ERR "Inconsistent NUMA state, " | |
1170 | + "index[%d] != num_node_masks[%d]\n", | |
1171 | + index, num_node_masks); | |
1172 | + return -EINVAL; | |
1173 | + } | |
1174 | + | |
1175 | + n = &node_masks[num_node_masks++]; | |
1176 | + | |
1177 | + n->mask = candidate->mask; | |
1178 | + n->val = candidate->match; | |
1179 | + | |
1180 | + numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%lx])\n", | |
1181 | + index, n->mask, n->val, candidate->latency); | |
1182 | + | |
1183 | + return 0; | |
1184 | +} | |
1185 | + | |
1186 | +static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, | |
1187 | + int index) | |
1188 | +{ | |
1189 | + cpumask_t mask; | |
1190 | + int cpu; | |
1191 | + | |
1192 | + numa_parse_mdesc_group_cpus(md, grp, &mask); | |
1193 | + | |
1194 | + for_each_cpu_mask(cpu, mask) | |
1195 | + numa_cpu_lookup_table[cpu] = index; | |
1196 | + numa_cpumask_lookup_table[index] = mask; | |
1197 | + | |
1198 | + if (numa_debug) { | |
1199 | + printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); | |
1200 | + for_each_cpu_mask(cpu, mask) | |
1201 | + printk("%d ", cpu); | |
1202 | + printk("]\n"); | |
1203 | + } | |
1204 | + | |
1205 | + return numa_attach_mlgroup(md, grp, index); | |
1206 | +} | |
1207 | + | |
1208 | +static int __init numa_parse_mdesc(void) | |
1209 | +{ | |
1210 | + struct mdesc_handle *md = mdesc_grab(); | |
1211 | + int i, err, count; | |
1212 | + u64 node; | |
1213 | + | |
1214 | + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); | |
1215 | + if (node == MDESC_NODE_NULL) { | |
1216 | + mdesc_release(md); | |
1217 | + return -ENOENT; | |
1218 | + } | |
1219 | + | |
1220 | + err = grab_mblocks(md); | |
1221 | + if (err < 0) | |
1222 | + goto out; | |
1223 | + | |
1224 | + err = grab_mlgroups(md); | |
1225 | + if (err < 0) | |
1226 | + goto out; | |
1227 | + | |
1228 | + count = 0; | |
1229 | + mdesc_for_each_node_by_name(md, node, "group") { | |
1230 | + err = numa_parse_mdesc_group(md, node, count); | |
1231 | + if (err < 0) | |
1232 | + break; | |
1233 | + count++; | |
1234 | + } | |
1235 | + | |
1236 | + add_node_ranges(); | |
1237 | + | |
1238 | + for (i = 0; i < num_node_masks; i++) { | |
1239 | + allocate_node_data(i); | |
1240 | + node_set_online(i); | |
1241 | + } | |
1242 | + | |
1243 | + err = 0; | |
1244 | +out: | |
1245 | + mdesc_release(md); | |
1246 | + return err; | |
1247 | +} | |
1248 | + | |
1249 | +static int __init numa_parse_sun4u(void) | |
1250 | +{ | |
1251 | + return -1; | |
1252 | +} | |
1253 | + | |
1254 | +static int __init bootmem_init_numa(void) | |
1255 | +{ | |
1256 | + int err = -1; | |
1257 | + | |
1258 | + numadbg("bootmem_init_numa()\n"); | |
1259 | + | |
1260 | + if (numa_enabled) { | |
1261 | + if (tlb_type == hypervisor) | |
1262 | + err = numa_parse_mdesc(); | |
1263 | + else | |
1264 | + err = numa_parse_sun4u(); | |
1265 | + } | |
1266 | + return err; | |
1267 | +} | |
1268 | + | |
1269 | +#else | |
1270 | + | |
1271 | +static int bootmem_init_numa(void) | |
1272 | +{ | |
1273 | + return -1; | |
1274 | +} | |
1275 | + | |
1276 | +#endif | |
1277 | + | |
1278 | +static void __init bootmem_init_nonnuma(void) | |
1279 | +{ | |
1280 | + unsigned long top_of_ram = lmb_end_of_DRAM(); | |
1281 | + unsigned long total_ram = lmb_phys_mem_size(); | |
1282 | + unsigned int i; | |
1283 | + | |
1284 | + numadbg("bootmem_init_nonnuma()\n"); | |
1285 | + | |
1286 | + printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", | |
1287 | + top_of_ram, total_ram); | |
1288 | + printk(KERN_INFO "Memory hole size: %ldMB\n", | |
1289 | + (top_of_ram - total_ram) >> 20); | |
1290 | + | |
1291 | + init_node_masks_nonnuma(); | |
1292 | + | |
1293 | + for (i = 0; i < lmb.memory.cnt; i++) { | |
1294 | + unsigned long size = lmb_size_bytes(&lmb.memory, i); | |
1295 | + unsigned long start_pfn, end_pfn; | |
1296 | + | |
1297 | + if (!size) | |
1298 | + continue; | |
1299 | + | |
815 | 1300 | start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; |
816 | - end_pfn = start_pfn + pages; | |
1301 | + end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); | |
1302 | + add_active_range(0, start_pfn, end_pfn); | |
1303 | + } | |
817 | 1304 | |
818 | - memory_present(0, start_pfn, end_pfn); | |
1305 | + allocate_node_data(0); | |
1306 | + | |
1307 | + node_set_online(0); | |
1308 | +} | |
1309 | + | |
1310 | +static void __init reserve_range_in_node(int nid, unsigned long start, | |
1311 | + unsigned long end) | |
1312 | +{ | |
1313 | + numadbg(" reserve_range_in_node(nid[%d],start[%lx],end[%lx]\n", | |
1314 | + nid, start, end); | |
1315 | + while (start < end) { | |
1316 | + unsigned long this_end; | |
1317 | + int n; | |
1318 | + | |
1319 | + this_end = nid_range(start, end, &n); | |
1320 | + if (n == nid) { | |
1321 | + numadbg(" MATCH reserving range [%lx:%lx]\n", | |
1322 | + start, this_end); | |
1323 | + reserve_bootmem_node(NODE_DATA(nid), start, | |
1324 | + (this_end - start), BOOTMEM_DEFAULT); | |
1325 | + } else | |
1326 | + numadbg(" NO MATCH, advancing start to %lx\n", | |
1327 | + this_end); | |
1328 | + | |
1329 | + start = this_end; | |
819 | 1330 | } |
1331 | +} | |
820 | 1332 | |
1333 | +static void __init trim_reserved_in_node(int nid) | |
1334 | +{ | |
1335 | + int i; | |
1336 | + | |
1337 | + numadbg(" trim_reserved_in_node(%d)\n", nid); | |
1338 | + | |
1339 | + for (i = 0; i < lmb.reserved.cnt; i++) { | |
1340 | + unsigned long start = lmb.reserved.region[i].base; | |
1341 | + unsigned long size = lmb_size_bytes(&lmb.reserved, i); | |
1342 | + unsigned long end = start + size; | |
1343 | + | |
1344 | + reserve_range_in_node(nid, start, end); | |
1345 | + } | |
1346 | +} | |
1347 | + | |
1348 | +static void __init bootmem_init_one_node(int nid) | |
1349 | +{ | |
1350 | + struct pglist_data *p; | |
1351 | + | |
1352 | + numadbg("bootmem_init_one_node(%d)\n", nid); | |
1353 | + | |
1354 | + p = NODE_DATA(nid); | |
1355 | + | |
1356 | + if (p->node_spanned_pages) { | |
1357 | + unsigned long paddr = node_masks[nid].bootmem_paddr; | |
1358 | + unsigned long end_pfn; | |
1359 | + | |
1360 | + end_pfn = p->node_start_pfn + p->node_spanned_pages; | |
1361 | + | |
1362 | + numadbg(" init_bootmem_node(%d, %lx, %lx, %lx)\n", | |
1363 | + nid, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn); | |
1364 | + | |
1365 | + init_bootmem_node(p, paddr >> PAGE_SHIFT, | |
1366 | + p->node_start_pfn, end_pfn); | |
1367 | + | |
1368 | + numadbg(" free_bootmem_with_active_regions(%d, %lx)\n", | |
1369 | + nid, end_pfn); | |
1370 | + free_bootmem_with_active_regions(nid, end_pfn); | |
1371 | + | |
1372 | + trim_reserved_in_node(nid); | |
1373 | + | |
1374 | + numadbg(" sparse_memory_present_with_active_regions(%d)\n", | |
1375 | + nid); | |
1376 | + sparse_memory_present_with_active_regions(nid); | |
1377 | + } | |
1378 | +} | |
1379 | + | |
1380 | +static unsigned long __init bootmem_init(unsigned long phys_base) | |
1381 | +{ | |
1382 | + unsigned long end_pfn; | |
1383 | + int nid; | |
1384 | + | |
1385 | + end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; | |
1386 | + max_pfn = max_low_pfn = end_pfn; | |
1387 | + min_low_pfn = (phys_base >> PAGE_SHIFT); | |
1388 | + | |
1389 | + if (bootmem_init_numa() < 0) | |
1390 | + bootmem_init_nonnuma(); | |
1391 | + | |
1392 | + /* XXX cpu notifier XXX */ | |
1393 | + | |
1394 | + for_each_online_node(nid) | |
1395 | + bootmem_init_one_node(nid); | |
1396 | + | |
821 | 1397 | sparse_init(); |
822 | 1398 | |
823 | 1399 | return end_pfn; |
... | ... | @@ -1112,7 +1688,7 @@ |
1112 | 1688 | |
1113 | 1689 | void __init paging_init(void) |
1114 | 1690 | { |
1115 | - unsigned long end_pfn, pages_avail, shift, phys_base; | |
1691 | + unsigned long end_pfn, shift, phys_base; | |
1116 | 1692 | unsigned long real_end, i; |
1117 | 1693 | |
1118 | 1694 | /* These build time checkes make sure that the dcache_dirty_cpu() |
1119 | 1695 | |
1120 | 1696 | |
1121 | 1697 | |
1122 | 1698 | |
1123 | 1699 | |
1124 | 1700 | |
... | ... | @@ -1220,27 +1796,21 @@ |
1220 | 1796 | sun4v_mdesc_init(); |
1221 | 1797 | |
1222 | 1798 | /* Setup bootmem... */ |
1223 | - pages_avail = 0; | |
1224 | - last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); | |
1799 | + last_valid_pfn = end_pfn = bootmem_init(phys_base); | |
1225 | 1800 | |
1801 | +#ifndef CONFIG_NEED_MULTIPLE_NODES | |
1226 | 1802 | max_mapnr = last_valid_pfn; |
1227 | - | |
1803 | +#endif | |
1228 | 1804 | kernel_physical_mapping_init(); |
1229 | 1805 | |
1230 | 1806 | { |
1231 | - unsigned long zones_size[MAX_NR_ZONES]; | |
1232 | - unsigned long zholes_size[MAX_NR_ZONES]; | |
1233 | - int znum; | |
1807 | + unsigned long max_zone_pfns[MAX_NR_ZONES]; | |
1234 | 1808 | |
1235 | - for (znum = 0; znum < MAX_NR_ZONES; znum++) | |
1236 | - zones_size[znum] = zholes_size[znum] = 0; | |
1809 | + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | |
1237 | 1810 | |
1238 | - zones_size[ZONE_NORMAL] = end_pfn; | |
1239 | - zholes_size[ZONE_NORMAL] = end_pfn - pages_avail; | |
1811 | + max_zone_pfns[ZONE_NORMAL] = end_pfn; | |
1240 | 1812 | |
1241 | - free_area_init_node(0, &contig_page_data, zones_size, | |
1242 | - __pa(PAGE_OFFSET) >> PAGE_SHIFT, | |
1243 | - zholes_size); | |
1813 | + free_area_init_nodes(max_zone_pfns); | |
1244 | 1814 | } |
1245 | 1815 | |
1246 | 1816 | printk("Booting Linux...\n"); |
1247 | 1817 | |
1248 | 1818 | |
1249 | 1819 | |
... | ... | @@ -1249,21 +1819,52 @@ |
1249 | 1819 | cpu_probe(); |
1250 | 1820 | } |
1251 | 1821 | |
1252 | -static void __init taint_real_pages(void) | |
1822 | +int __init page_in_phys_avail(unsigned long paddr) | |
1253 | 1823 | { |
1254 | 1824 | int i; |
1255 | 1825 | |
1826 | + paddr &= PAGE_MASK; | |
1827 | + | |
1828 | + for (i = 0; i < pavail_ents; i++) { | |
1829 | + unsigned long start, end; | |
1830 | + | |
1831 | + start = pavail[i].phys_addr; | |
1832 | + end = start + pavail[i].reg_size; | |
1833 | + | |
1834 | + if (paddr >= start && paddr < end) | |
1835 | + return 1; | |
1836 | + } | |
1837 | + if (paddr >= kern_base && paddr < (kern_base + kern_size)) | |
1838 | + return 1; | |
1839 | +#ifdef CONFIG_BLK_DEV_INITRD | |
1840 | + if (paddr >= __pa(initrd_start) && | |
1841 | + paddr < __pa(PAGE_ALIGN(initrd_end))) | |
1842 | + return 1; | |
1843 | +#endif | |
1844 | + | |
1845 | + return 0; | |
1846 | +} | |
1847 | + | |
1848 | +static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; | |
1849 | +static int pavail_rescan_ents __initdata; | |
1850 | + | |
1851 | +/* Certain OBP calls, such as fetching "available" properties, can | |
1852 | + * claim physical memory. So, along with initializing the valid | |
1853 | + * address bitmap, what we do here is refetch the physical available | |
1854 | + * memory list again, and make sure it provides at least as much | |
1855 | + * memory as 'pavail' does. | |
1856 | + */ | |
1857 | +static void setup_valid_addr_bitmap_from_pavail(void) | |
1858 | +{ | |
1859 | + int i; | |
1860 | + | |
1256 | 1861 | read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); |
1257 | 1862 | |
1258 | - /* Find changes discovered in the physmem available rescan and | |
1259 | - * reserve the lost portions in the bootmem maps. | |
1260 | - */ | |
1261 | 1863 | for (i = 0; i < pavail_ents; i++) { |
1262 | 1864 | unsigned long old_start, old_end; |
1263 | 1865 | |
1264 | 1866 | old_start = pavail[i].phys_addr; |
1265 | - old_end = old_start + | |
1266 | - pavail[i].reg_size; | |
1867 | + old_end = old_start + pavail[i].reg_size; | |
1267 | 1868 | while (old_start < old_end) { |
1268 | 1869 | int n; |
1269 | 1870 | |
1270 | 1871 | |
1271 | 1872 | |
... | ... | @@ -1281,40 +1882,23 @@ |
1281 | 1882 | goto do_next_page; |
1282 | 1883 | } |
1283 | 1884 | } |
1284 | - reserve_bootmem(old_start, PAGE_SIZE, BOOTMEM_DEFAULT); | |
1285 | 1885 | |
1886 | + prom_printf("mem_init: Lost memory in pavail\n"); | |
1887 | + prom_printf("mem_init: OLD start[%lx] size[%lx]\n", | |
1888 | + pavail[i].phys_addr, | |
1889 | + pavail[i].reg_size); | |
1890 | + prom_printf("mem_init: NEW start[%lx] size[%lx]\n", | |
1891 | + pavail_rescan[i].phys_addr, | |
1892 | + pavail_rescan[i].reg_size); | |
1893 | + prom_printf("mem_init: Cannot continue, aborting.\n"); | |
1894 | + prom_halt(); | |
1895 | + | |
1286 | 1896 | do_next_page: |
1287 | 1897 | old_start += PAGE_SIZE; |
1288 | 1898 | } |
1289 | 1899 | } |
1290 | 1900 | } |
1291 | 1901 | |
1292 | -int __init page_in_phys_avail(unsigned long paddr) | |
1293 | -{ | |
1294 | - int i; | |
1295 | - | |
1296 | - paddr &= PAGE_MASK; | |
1297 | - | |
1298 | - for (i = 0; i < pavail_rescan_ents; i++) { | |
1299 | - unsigned long start, end; | |
1300 | - | |
1301 | - start = pavail_rescan[i].phys_addr; | |
1302 | - end = start + pavail_rescan[i].reg_size; | |
1303 | - | |
1304 | - if (paddr >= start && paddr < end) | |
1305 | - return 1; | |
1306 | - } | |
1307 | - if (paddr >= kern_base && paddr < (kern_base + kern_size)) | |
1308 | - return 1; | |
1309 | -#ifdef CONFIG_BLK_DEV_INITRD | |
1310 | - if (paddr >= __pa(initrd_start) && | |
1311 | - paddr < __pa(PAGE_ALIGN(initrd_end))) | |
1312 | - return 1; | |
1313 | -#endif | |
1314 | - | |
1315 | - return 0; | |
1316 | -} | |
1317 | - | |
1318 | 1902 | void __init mem_init(void) |
1319 | 1903 | { |
1320 | 1904 | unsigned long codepages, datapages, initpages; |
1321 | 1905 | |
1322 | 1906 | |
... | ... | @@ -1337,14 +1921,26 @@ |
1337 | 1921 | addr += PAGE_SIZE; |
1338 | 1922 | } |
1339 | 1923 | |
1340 | - taint_real_pages(); | |
1924 | + setup_valid_addr_bitmap_from_pavail(); | |
1341 | 1925 | |
1342 | 1926 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); |
1343 | 1927 | |
1928 | +#ifdef CONFIG_NEED_MULTIPLE_NODES | |
1929 | + for_each_online_node(i) { | |
1930 | + if (NODE_DATA(i)->node_spanned_pages != 0) { | |
1931 | + totalram_pages += | |
1932 | + free_all_bootmem_node(NODE_DATA(i)); | |
1933 | + } | |
1934 | + } | |
1935 | +#else | |
1936 | + totalram_pages = free_all_bootmem(); | |
1937 | +#endif | |
1938 | + | |
1344 | 1939 | /* We subtract one to account for the mem_map_zero page |
1345 | 1940 | * allocated below. |
1346 | 1941 | */ |
1347 | - totalram_pages = num_physpages = free_all_bootmem() - 1; | |
1942 | + totalram_pages -= 1; | |
1943 | + num_physpages = totalram_pages; | |
1348 | 1944 | |
1349 | 1945 | /* |
1350 | 1946 | * Set up the zero page, mark it reserved, so that page count |
include/asm-sparc64/mmzone.h
1 | +#ifndef _SPARC64_MMZONE_H | |
2 | +#define _SPARC64_MMZONE_H | |
3 | + | |
4 | +#ifdef CONFIG_NEED_MULTIPLE_NODES | |
5 | + | |
6 | +extern struct pglist_data *node_data[]; | |
7 | + | |
8 | +#define NODE_DATA(nid) (node_data[nid]) | |
9 | +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) | |
10 | +#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn) | |
11 | + | |
12 | +extern int numa_cpu_lookup_table[]; | |
13 | +extern cpumask_t numa_cpumask_lookup_table[]; | |
14 | + | |
15 | +#endif /* CONFIG_NEED_MULTIPLE_NODES */ | |
16 | + | |
17 | +#endif /* _SPARC64_MMZONE_H */ |
include/asm-sparc64/topology.h
1 | 1 | #ifndef _ASM_SPARC64_TOPOLOGY_H |
2 | 2 | #define _ASM_SPARC64_TOPOLOGY_H |
3 | 3 | |
4 | +#ifdef CONFIG_NUMA | |
5 | + | |
6 | +#include <asm/mmzone.h> | |
7 | + | |
8 | +static inline int cpu_to_node(int cpu) | |
9 | +{ | |
10 | + return numa_cpu_lookup_table[cpu]; | |
11 | +} | |
12 | + | |
13 | +#define parent_node(node) (node) | |
14 | + | |
15 | +static inline cpumask_t node_to_cpumask(int node) | |
16 | +{ | |
17 | + return numa_cpumask_lookup_table[node]; | |
18 | +} | |
19 | + | |
20 | +/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ | |
21 | +#define node_to_cpumask_ptr(v, node) \ | |
22 | + cpumask_t *v = &(numa_cpumask_lookup_table[node]) | |
23 | + | |
24 | +#define node_to_cpumask_ptr_next(v, node) \ | |
25 | + v = &(numa_cpumask_lookup_table[node]) | |
26 | + | |
27 | +static inline int node_to_first_cpu(int node) | |
28 | +{ | |
29 | + cpumask_t tmp; | |
30 | + tmp = node_to_cpumask(node); | |
31 | + return first_cpu(tmp); | |
32 | +} | |
33 | + | |
34 | +struct pci_bus; | |
35 | +#ifdef CONFIG_PCI | |
36 | +extern int pcibus_to_node(struct pci_bus *pbus); | |
37 | +#else | |
38 | +static inline int pcibus_to_node(struct pci_bus *pbus) | |
39 | +{ | |
40 | + return -1; | |
41 | +} | |
42 | +#endif | |
43 | + | |
44 | +#define pcibus_to_cpumask(bus) \ | |
45 | + (pcibus_to_node(bus) == -1 ? \ | |
46 | + CPU_MASK_ALL : \ | |
47 | + node_to_cpumask(pcibus_to_node(bus))) | |
48 | + | |
49 | +#define SD_NODE_INIT (struct sched_domain) { \ | |
50 | + .min_interval = 8, \ | |
51 | + .max_interval = 32, \ | |
52 | + .busy_factor = 32, \ | |
53 | + .imbalance_pct = 125, \ | |
54 | + .cache_nice_tries = 2, \ | |
55 | + .busy_idx = 3, \ | |
56 | + .idle_idx = 2, \ | |
57 | + .newidle_idx = 0, \ | |
58 | + .wake_idx = 1, \ | |
59 | + .forkexec_idx = 1, \ | |
60 | + .flags = SD_LOAD_BALANCE \ | |
61 | + | SD_BALANCE_FORK \ | |
62 | + | SD_BALANCE_EXEC \ | |
63 | + | SD_SERIALIZE \ | |
64 | + | SD_WAKE_BALANCE, \ | |
65 | + .last_balance = jiffies, \ | |
66 | + .balance_interval = 1, \ | |
67 | +} | |
68 | + | |
69 | +#else /* CONFIG_NUMA */ | |
70 | + | |
71 | +#include <asm-generic/topology.h> | |
72 | + | |
73 | +#endif /* !(CONFIG_NUMA) */ | |
74 | + | |
4 | 75 | #ifdef CONFIG_SMP |
5 | 76 | #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) |
6 | 77 | #define topology_core_id(cpu) (cpu_data(cpu).core_id) |
... | ... | @@ -9,8 +80,6 @@ |
9 | 80 | #define mc_capable() (sparc64_multi_core) |
10 | 81 | #define smt_capable() (sparc64_multi_core) |
11 | 82 | #endif /* CONFIG_SMP */ |
12 | - | |
13 | -#include <asm-generic/topology.h> | |
14 | 83 | |
15 | 84 | #define cpu_coregroup_map(cpu) (cpu_core_map[cpu]) |
16 | 85 |