Commit 20d9207849d5abe60461841b3c3724f6e7c9d33e

Authored by Linus Torvalds

Merge branch 'x86/uv' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86/uv' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86: UV BAU distribution and payload MMRs
  x86: UV: BAU partition-relative distribution map
  x86, uv: add Kconfig dependency on NUMA for UV systems
  x86: prevent /sys/firmware/sgi_uv from being created on non-uv systems
  x86, UV: Fix for nodes with memory and no cpus
  x86, UV: system table in bios accessed after unmap
  x86: UV BAU messaging timeouts
  x86: UV BAU and nodes with no memory

Showing 6 changed files Side-by-side Diff

... ... @@ -353,6 +353,7 @@
353 353 bool "SGI Ultraviolet"
354 354 depends on X86_64
355 355 depends on X86_EXTENDED_PLATFORM
  356 + depends on NUMA
356 357 select X86_X2APIC
357 358 ---help---
358 359 This option is needed in order to support SGI Ultraviolet systems.
arch/x86/include/asm/uv/uv_mmrs.h
... ... @@ -17,6 +17,11 @@
17 17 /* ========================================================================= */
18 18 /* UVH_BAU_DATA_CONFIG */
19 19 /* ========================================================================= */
  20 +#define UVH_LB_BAU_MISC_CONTROL 0x320170UL
  21 +#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15
  22 +#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16
  23 +#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL
  24 +/* 1011 timebase 7 (168millisec) * 3 ticks -> 500ms */
20 25 #define UVH_BAU_DATA_CONFIG 0x61680UL
21 26 #define UVH_BAU_DATA_CONFIG_32 0x0438
22 27  
arch/x86/kernel/apic/x2apic_uv_x.c
... ... @@ -549,7 +549,8 @@
549 549 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
550 550 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
551 551 int max_pnode = 0;
552   - unsigned long mmr_base, present;
  552 + unsigned long mmr_base, present, paddr;
  553 + unsigned short pnode_mask;
553 554  
554 555 map_low_mmrs();
555 556  
... ... @@ -592,6 +593,7 @@
592 593 }
593 594 }
594 595  
  596 + pnode_mask = (1 << n_val) - 1;
595 597 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
596 598 gnode_upper = (((unsigned long)node_id.s.node_id) &
597 599 ~((1 << n_val) - 1)) << m_val;
... ... @@ -615,7 +617,7 @@
615 617 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
616 618 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
617 619 uv_cpu_hub_info(cpu)->pnode = pnode;
618   - uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
  620 + uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
619 621 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
620 622 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
621 623 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
... ... @@ -629,6 +631,16 @@
629 631 "lcpu %d, blade %d\n",
630 632 cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
631 633 lcpu, blade);
  634 + }
  635 +
  636 + /* Add blade/pnode info for nodes without cpus */
  637 + for_each_online_node(nid) {
  638 + if (uv_node_to_blade[nid] >= 0)
  639 + continue;
  640 + paddr = node_start_pfn(nid) << PAGE_SHIFT;
  641 + pnode = (paddr >> m_val) & pnode_mask;
  642 + blade = boot_pnode_to_blade(pnode);
  643 + uv_node_to_blade[nid] = blade;
632 644 }
633 645  
634 646 map_gru_high(max_pnode);
arch/x86/kernel/bios_uv.c
... ... @@ -182,7 +182,8 @@
182 182 memcpy(&uv_systab, tab, sizeof(struct uv_systab));
183 183 iounmap(tab);
184 184  
185   - printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
  185 + printk(KERN_INFO "EFI UV System Table Revision %d\n",
  186 + uv_systab.revision);
186 187 }
187 188 #else /* !CONFIG_EFI */
188 189  
arch/x86/kernel/tlb_uv.c
... ... @@ -25,6 +25,8 @@
25 25  
26 26 /* position of pnode (which is nasid>>1): */
27 27 static int uv_nshift __read_mostly;
  28 +/* base pnode in this partition */
  29 +static int uv_partition_base_pnode __read_mostly;
28 30  
29 31 static unsigned long uv_mmask __read_mostly;
30 32  
... ... @@ -32,6 +34,34 @@
32 34 static DEFINE_PER_CPU(struct bau_control, bau_control);
33 35  
34 36 /*
  37 + * Determine the first node on a blade.
  38 + */
  39 +static int __init blade_to_first_node(int blade)
  40 +{
  41 + int node, b;
  42 +
  43 + for_each_online_node(node) {
  44 + b = uv_node_to_blade_id(node);
  45 + if (blade == b)
  46 + return node;
  47 + }
  48 + return -1; /* shouldn't happen */
  49 +}
  50 +
  51 +/*
  52 + * Determine the apicid of the first cpu on a blade.
  53 + */
  54 +static int __init blade_to_first_apicid(int blade)
  55 +{
  56 + int cpu;
  57 +
  58 + for_each_present_cpu(cpu)
  59 + if (blade == uv_cpu_to_blade_id(cpu))
  60 + return per_cpu(x86_cpu_to_apicid, cpu);
  61 + return -1;
  62 +}
  63 +
  64 +/*
35 65 * Free a software acknowledge hardware resource by clearing its Pending
36 66 * bit. This will return a reply to the sender.
37 67 * If the message has timed out, a reply has already been sent by the
... ... @@ -67,7 +97,7 @@
67 97 msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
68 98 cpu = uv_blade_processor_id();
69 99 msg->number_of_cpus =
70   - uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
  100 + uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
71 101 this_cpu_mask = 1UL << cpu;
72 102 if (msp->seen_by.bits & this_cpu_mask)
73 103 return;
74 104  
... ... @@ -215,14 +245,14 @@
215 245 * Returns @flush_mask if some remote flushing remains to be done. The
216 246 * mask will have some bits still set.
217 247 */
218   -const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
  248 +const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,
219 249 struct bau_desc *bau_desc,
220 250 struct cpumask *flush_mask)
221 251 {
222 252 int completion_status = 0;
223 253 int right_shift;
224 254 int tries = 0;
225   - int blade;
  255 + int pnode;
226 256 int bit;
227 257 unsigned long mmr_offset;
228 258 unsigned long index;
... ... @@ -265,8 +295,8 @@
265 295 * use the IPI method of shootdown on them.
266 296 */
267 297 for_each_cpu(bit, flush_mask) {
268   - blade = uv_cpu_to_blade_id(bit);
269   - if (blade == this_blade)
  298 + pnode = uv_cpu_to_pnode(bit);
  299 + if (pnode == this_pnode)
270 300 continue;
271 301 cpumask_clear_cpu(bit, flush_mask);
272 302 }
273 303  
274 304  
... ... @@ -309,16 +339,16 @@
309 339 struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
310 340 int i;
311 341 int bit;
312   - int blade;
  342 + int pnode;
313 343 int uv_cpu;
314   - int this_blade;
  344 + int this_pnode;
315 345 int locals = 0;
316 346 struct bau_desc *bau_desc;
317 347  
318 348 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
319 349  
320 350 uv_cpu = uv_blade_processor_id();
321   - this_blade = uv_numa_blade_id();
  351 + this_pnode = uv_hub_info->pnode;
322 352 bau_desc = __get_cpu_var(bau_control).descriptor_base;
323 353 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
324 354  
325 355  
... ... @@ -326,13 +356,14 @@
326 356  
327 357 i = 0;
328 358 for_each_cpu(bit, flush_mask) {
329   - blade = uv_cpu_to_blade_id(bit);
330   - BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
331   - if (blade == this_blade) {
  359 + pnode = uv_cpu_to_pnode(bit);
  360 + BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1));
  361 + if (pnode == this_pnode) {
332 362 locals++;
333 363 continue;
334 364 }
335   - bau_node_set(blade, &bau_desc->distribution);
  365 + bau_node_set(pnode - uv_partition_base_pnode,
  366 + &bau_desc->distribution);
336 367 i++;
337 368 }
338 369 if (i == 0) {
... ... @@ -350,7 +381,7 @@
350 381 bau_desc->payload.address = va;
351 382 bau_desc->payload.sending_cpu = cpu;
352 383  
353   - return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
  384 + return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);
354 385 }
355 386  
356 387 /*
357 388  
358 389  
359 390  
360 391  
361 392  
362 393  
... ... @@ -418,24 +449,58 @@
418 449 set_irq_regs(old_regs);
419 450 }
420 451  
  452 +/*
  453 + * uv_enable_timeouts
  454 + *
  455 + * Each target blade (i.e. blades that have cpu's) needs to have
  456 + * shootdown message timeouts enabled. The timeout does not cause
  457 + * an interrupt, but causes an error message to be returned to
  458 + * the sender.
  459 + */
421 460 static void uv_enable_timeouts(void)
422 461 {
423   - int i;
424 462 int blade;
425   - int last_blade;
  463 + int nblades;
426 464 int pnode;
427   - int cur_cpu = 0;
428   - unsigned long apicid;
  465 + unsigned long mmr_image;
429 466  
430   - last_blade = -1;
431   - for_each_online_node(i) {
432   - blade = uv_node_to_blade_id(i);
433   - if (blade == last_blade)
  467 + nblades = uv_num_possible_blades();
  468 +
  469 + for (blade = 0; blade < nblades; blade++) {
  470 + if (!uv_blade_nr_possible_cpus(blade))
434 471 continue;
435   - last_blade = blade;
436   - apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
  472 +
437 473 pnode = uv_blade_to_pnode(blade);
438   - cur_cpu += uv_blade_nr_possible_cpus(i);
  474 + mmr_image =
  475 + uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
  476 + /*
  477 + * Set the timeout period and then lock it in, in three
  478 + * steps; captures and locks in the period.
  479 + *
  480 + * To program the period, the SOFT_ACK_MODE must be off.
  481 + */
  482 + mmr_image &= ~((unsigned long)1 <<
  483 + UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
  484 + uv_write_global_mmr64
  485 + (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
  486 + /*
  487 + * Set the 4-bit period.
  488 + */
  489 + mmr_image &= ~((unsigned long)0xf <<
  490 + UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
  491 + mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
  492 + UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
  493 + uv_write_global_mmr64
  494 + (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
  495 + /*
  496 + * Subsequent reversals of the timebase bit (3) cause an
  497 + * immediate timeout of one or all INTD resources as
  498 + * indicated in bits 2:0 (7 causes all of them to timeout).
  499 + */
  500 + mmr_image |= ((unsigned long)1 <<
  501 + UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
  502 + uv_write_global_mmr64
  503 + (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
439 504 }
440 505 }
441 506  
... ... @@ -482,8 +547,7 @@
482 547 stat->requestee, stat->onetlb, stat->alltlb,
483 548 stat->s_retry, stat->d_retry, stat->ptc_i);
484 549 seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
485   - uv_read_global_mmr64(uv_blade_to_pnode
486   - (uv_cpu_to_blade_id(cpu)),
  550 + uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
487 551 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
488 552 stat->sflush, stat->dflush,
489 553 stat->retriesok, stat->nomsg,
490 554  
491 555  
492 556  
... ... @@ -617,16 +681,18 @@
617 681 * finish the initialization of the per-blade control structures
618 682 */
619 683 static void __init
620   -uv_table_bases_finish(int blade, int node, int cur_cpu,
  684 +uv_table_bases_finish(int blade,
621 685 struct bau_control *bau_tablesp,
622 686 struct bau_desc *adp)
623 687 {
624 688 struct bau_control *bcp;
625   - int i;
  689 + int cpu;
626 690  
627   - for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) {
628   - bcp = (struct bau_control *)&per_cpu(bau_control, i);
  691 + for_each_present_cpu(cpu) {
  692 + if (blade != uv_cpu_to_blade_id(cpu))
  693 + continue;
629 694  
  695 + bcp = (struct bau_control *)&per_cpu(bau_control, cpu);
630 696 bcp->bau_msg_head = bau_tablesp->va_queue_first;
631 697 bcp->va_queue_first = bau_tablesp->va_queue_first;
632 698 bcp->va_queue_last = bau_tablesp->va_queue_last;
633 699  
... ... @@ -649,11 +715,10 @@
649 715 struct bau_desc *adp;
650 716 struct bau_desc *ad2;
651 717  
652   - adp = (struct bau_desc *)
653   - kmalloc_node(16384, GFP_KERNEL, node);
  718 + adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
654 719 BUG_ON(!adp);
655 720  
656   - pa = __pa((unsigned long)adp);
  721 + pa = uv_gpa(adp); /* need the real nasid*/
657 722 n = pa >> uv_nshift;
658 723 m = pa & uv_mmask;
659 724  
... ... @@ -667,8 +732,12 @@
667 732 for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
668 733 memset(ad2, 0, sizeof(struct bau_desc));
669 734 ad2->header.sw_ack_flag = 1;
670   - ad2->header.base_dest_nodeid =
671   - uv_blade_to_pnode(uv_cpu_to_blade_id(0));
  735 + /*
  736 + * base_dest_nodeid is the first node in the partition, so
  737 + * the bit map will indicate partition-relative node numbers.
  738 + * note that base_dest_nodeid is actually a nasid.
  739 + */
  740 + ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
672 741 ad2->header.command = UV_NET_ENDPOINT_INTD;
673 742 ad2->header.int_both = 1;
674 743 /*
... ... @@ -686,6 +755,8 @@
686 755 uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
687 756 {
688 757 struct bau_payload_queue_entry *pqp;
  758 + unsigned long pa;
  759 + int pn;
689 760 char *cp;
690 761  
691 762 pqp = (struct bau_payload_queue_entry *) kmalloc_node(
692 763  
... ... @@ -696,10 +767,14 @@
696 767 cp = (char *)pqp + 31;
697 768 pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
698 769 bau_tablesp->va_queue_first = pqp;
  770 + /*
  771 + * need the pnode of where the memory was really allocated
  772 + */
  773 + pa = uv_gpa(pqp);
  774 + pn = pa >> uv_nshift;
699 775 uv_write_global_mmr64(pnode,
700 776 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
701   - ((unsigned long)pnode <<
702   - UV_PAYLOADQ_PNODE_SHIFT) |
  777 + ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
703 778 uv_physnodeaddr(pqp));
704 779 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
705 780 uv_physnodeaddr(pqp));
706 781  
... ... @@ -715,8 +790,9 @@
715 790 /*
716 791 * Initialization of each UV blade's structures
717 792 */
718   -static int __init uv_init_blade(int blade, int node, int cur_cpu)
  793 +static int __init uv_init_blade(int blade)
719 794 {
  795 + int node;
720 796 int pnode;
721 797 unsigned long pa;
722 798 unsigned long apicid;
723 799  
724 800  
... ... @@ -724,16 +800,17 @@
724 800 struct bau_payload_queue_entry *pqp;
725 801 struct bau_control *bau_tablesp;
726 802  
  803 + node = blade_to_first_node(blade);
727 804 bau_tablesp = uv_table_bases_init(blade, node);
728 805 pnode = uv_blade_to_pnode(blade);
729 806 adp = uv_activation_descriptor_init(node, pnode);
730 807 pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
731   - uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp);
  808 + uv_table_bases_finish(blade, bau_tablesp, adp);
732 809 /*
733 810 * the below initialization can't be in firmware because the
734 811 * messaging IRQ will be determined by the OS
735 812 */
736   - apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
  813 + apicid = blade_to_first_apicid(blade);
737 814 pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
738 815 if ((pa & 0xff) != UV_BAU_MESSAGE) {
739 816 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
740 817  
... ... @@ -748,9 +825,7 @@
748 825 static int __init uv_bau_init(void)
749 826 {
750 827 int blade;
751   - int node;
752 828 int nblades;
753   - int last_blade;
754 829 int cur_cpu;
755 830  
756 831 if (!is_uv_system())
757 832  
... ... @@ -763,29 +838,21 @@
763 838 uv_bau_retry_limit = 1;
764 839 uv_nshift = uv_hub_info->n_val;
765 840 uv_mmask = (1UL << uv_hub_info->n_val) - 1;
766   - nblades = 0;
767   - last_blade = -1;
768   - cur_cpu = 0;
769   - for_each_online_node(node) {
770   - blade = uv_node_to_blade_id(node);
771   - if (blade == last_blade)
772   - continue;
773   - last_blade = blade;
774   - nblades++;
775   - }
  841 + nblades = uv_num_possible_blades();
  842 +
776 843 uv_bau_table_bases = (struct bau_control **)
777 844 kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
778 845 BUG_ON(!uv_bau_table_bases);
779 846  
780   - last_blade = -1;
781   - for_each_online_node(node) {
782   - blade = uv_node_to_blade_id(node);
783   - if (blade == last_blade)
784   - continue;
785   - last_blade = blade;
786   - uv_init_blade(blade, node, cur_cpu);
787   - cur_cpu += uv_blade_nr_possible_cpus(blade);
788   - }
  847 + uv_partition_base_pnode = 0x7fffffff;
  848 + for (blade = 0; blade < nblades; blade++)
  849 + if (uv_blade_nr_possible_cpus(blade) &&
  850 + (uv_blade_to_pnode(blade) < uv_partition_base_pnode))
  851 + uv_partition_base_pnode = uv_blade_to_pnode(blade);
  852 + for (blade = 0; blade < nblades; blade++)
  853 + if (uv_blade_nr_possible_cpus(blade))
  854 + uv_init_blade(blade);
  855 +
789 856 alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
790 857 uv_enable_timeouts();
791 858  
arch/x86/kernel/uv_sysfs.c
... ... @@ -21,6 +21,7 @@
21 21  
22 22 #include <linux/sysdev.h>
23 23 #include <asm/uv/bios.h>
  24 +#include <asm/uv/uv.h>
24 25  
25 26 struct kobject *sgi_uv_kobj;
26 27  
... ... @@ -46,6 +47,9 @@
46 47 static int __init sgi_uv_sysfs_init(void)
47 48 {
48 49 unsigned long ret;
  50 +
  51 + if (!is_uv_system())
  52 + return -ENODEV;
49 53  
50 54 if (!sgi_uv_kobj)
51 55 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);