Commit 4f0234f4f9da485ecb9729af1b88567700fd4767

Authored by David S. Miller
1 parent b3e13fbeb9

[SPARC64]: Initial LDOM cpu hotplug support.

Only adding cpus is supports at the moment, removal
will come next.

When new cpus are configured, the machine description is
updated.  When we get the configure request we pass in a
cpu mask of to-be-added cpus to the mdesc CPU node parser
so it only fetches information for those cpus.  That code
also proceeds to update the SMT/multi-core scheduling bitmaps.

cpu_up() does all the work and we return the status back
over the DS channel.

CPUs via dr-cpu need to be booted straight out of the
hypervisor, and this requires:

1) A new trampoline mechanism.  CPUs are booted straight
   out of the hypervisor with MMU disabled and running in
   physical addresses with no mappings installed in the TLB.

   The new hvtramp.S code sets up the critical cpu state,
   installs the locked TLB mappings for the kernel, and
   turns the MMU on.  It then proceeds to follow the logic
   of the existing trampoline.S SMP cpu bringup code.

2) All calls into OBP have to be disallowed when domaining
   is enabled.  Since cpus boot straight into the kernel from
   the hypervisor, OBP has no state about that cpu and therefore
   cannot handle being invoked on that cpu.

   Luckily it's only a handful of interfaces which can be called
   after the OBP device tree is obtained.  For example, rebooting,
   halting, powering-off, and setting options node variables.

CPU removal support will require some infrastructure changes
here.  Namely we'll have to process the requests via a true
kernel thread instead of in a workqueue.  workqueues run on
a per-cpu thread, but when unconfiguring we might need to
force the thread to execute on another cpu if the current cpu
is the one being removed.  Removal of a cpu also causes the kernel
to destroy that cpu's workqueue running thread.

Another issue on removal is that we may have interrupts still
pointing to the cpu-to-be-removed.  So new code will be needed
to walk the active INO list and retarget those cpus as-needed.

Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 16 changed files with 716 additions and 128 deletions Side-by-side Diff

arch/sparc64/Kconfig
... ... @@ -108,6 +108,15 @@
108 108  
109 109 source kernel/Kconfig.hz
110 110  
  111 +config HOTPLUG_CPU
  112 + bool "Support for hot-pluggable CPUs"
  113 + depends on SMP
  114 + select HOTPLUG
  115 + ---help---
  116 + Say Y here to experiment with turning CPUs off and on. CPUs
  117 + can be controlled through /sys/devices/system/cpu/cpu#.
  118 + Say N if you want to disable CPU hotplug.
  119 +
111 120 source "init/Kconfig"
112 121  
113 122 config SYSVIPC_COMPAT
... ... @@ -307,6 +316,7 @@
307 316  
308 317 config SUN_LDOMS
309 318 bool "Sun Logical Domains support"
  319 + select HOTPLUG_CPU
310 320 help
311 321 Say Y here is you want to support virtual devices via
312 322 Logical Domains.
arch/sparc64/kernel/Makefile
... ... @@ -12,7 +12,8 @@
12 12 irq.o ptrace.o time.o sys_sparc.o signal.o \
13 13 unaligned.o central.o pci.o starfire.o semaphore.o \
14 14 power.o sbus.o iommu_common.o sparc64_ksyms.o chmc.o \
15   - visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
  15 + visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o \
  16 + hvtramp.o
16 17  
17 18 obj-$(CONFIG_STACKTRACE) += stacktrace.o
18 19 obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \
arch/sparc64/kernel/ds.c
... ... @@ -12,11 +12,16 @@
12 12 #include <linux/sched.h>
13 13 #include <linux/delay.h>
14 14 #include <linux/mutex.h>
  15 +#include <linux/workqueue.h>
  16 +#include <linux/cpu.h>
15 17  
16 18 #include <asm/ldc.h>
17 19 #include <asm/vio.h>
18 20 #include <asm/power.h>
19 21 #include <asm/mdesc.h>
  22 +#include <asm/head.h>
  23 +#include <asm/io.h>
  24 +#include <asm/hvtramp.h>
20 25  
21 26 #define DRV_MODULE_NAME "ds"
22 27 #define PFX DRV_MODULE_NAME ": "
... ... @@ -124,7 +129,7 @@
124 129 __u64 handle;
125 130  
126 131 void (*data)(struct ldc_channel *lp,
127   - struct ds_cap_state *dp,
  132 + struct ds_cap_state *cp,
128 133 void *buf, int len);
129 134  
130 135 const char *service_id;
... ... @@ -135,6 +140,91 @@
135 140 #define CAP_STATE_REGISTERED 0x02
136 141 };
137 142  
  143 +static void md_update_data(struct ldc_channel *lp, struct ds_cap_state *cp,
  144 + void *buf, int len);
  145 +static void domain_shutdown_data(struct ldc_channel *lp,
  146 + struct ds_cap_state *cp,
  147 + void *buf, int len);
  148 +static void domain_panic_data(struct ldc_channel *lp,
  149 + struct ds_cap_state *cp,
  150 + void *buf, int len);
  151 +static void dr_cpu_data(struct ldc_channel *lp,
  152 + struct ds_cap_state *cp,
  153 + void *buf, int len);
  154 +static void ds_pri_data(struct ldc_channel *lp,
  155 + struct ds_cap_state *cp,
  156 + void *buf, int len);
  157 +static void ds_var_data(struct ldc_channel *lp,
  158 + struct ds_cap_state *cp,
  159 + void *buf, int len);
  160 +
  161 +struct ds_cap_state ds_states[] = {
  162 + {
  163 + .service_id = "md-update",
  164 + .data = md_update_data,
  165 + },
  166 + {
  167 + .service_id = "domain-shutdown",
  168 + .data = domain_shutdown_data,
  169 + },
  170 + {
  171 + .service_id = "domain-panic",
  172 + .data = domain_panic_data,
  173 + },
  174 + {
  175 + .service_id = "dr-cpu",
  176 + .data = dr_cpu_data,
  177 + },
  178 + {
  179 + .service_id = "pri",
  180 + .data = ds_pri_data,
  181 + },
  182 + {
  183 + .service_id = "var-config",
  184 + .data = ds_var_data,
  185 + },
  186 + {
  187 + .service_id = "var-config-backup",
  188 + .data = ds_var_data,
  189 + },
  190 +};
  191 +
  192 +static DEFINE_SPINLOCK(ds_lock);
  193 +
  194 +struct ds_info {
  195 + struct ldc_channel *lp;
  196 + u8 hs_state;
  197 +#define DS_HS_START 0x01
  198 +#define DS_HS_DONE 0x02
  199 +
  200 + void *rcv_buf;
  201 + int rcv_buf_len;
  202 +};
  203 +
  204 +static struct ds_info *ds_info;
  205 +
  206 +static struct ds_cap_state *find_cap(u64 handle)
  207 +{
  208 + unsigned int index = handle >> 32;
  209 +
  210 + if (index >= ARRAY_SIZE(ds_states))
  211 + return NULL;
  212 + return &ds_states[index];
  213 +}
  214 +
  215 +static struct ds_cap_state *find_cap_by_string(const char *name)
  216 +{
  217 + int i;
  218 +
  219 + for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
  220 + if (strcmp(ds_states[i].service_id, name))
  221 + continue;
  222 +
  223 + return &ds_states[i];
  224 + }
  225 + return NULL;
  226 +}
  227 +
138 228 static int ds_send(struct ldc_channel *lp, void *data, int len)
139 229 {
140 230 int err, limit = 1000;
141 231  
142 232  
143 233  
144 234  
145 235  
146 236  
147 237  
148 238  
... ... @@ -265,36 +355,354 @@
265 355 panic("PANIC requested by LDOM manager.");
266 356 }
267 357  
268   -struct ds_cpu_tag {
  358 +struct dr_cpu_tag {
269 359 __u64 req_num;
270 360 __u32 type;
271   -#define DS_CPU_CONFIGURE 0x43
272   -#define DS_CPU_UNCONFIGURE 0x55
273   -#define DS_CPU_FORCE_UNCONFIGURE 0x46
274   -#define DS_CPU_STATUS 0x53
  361 +#define DR_CPU_CONFIGURE 0x43
  362 +#define DR_CPU_UNCONFIGURE 0x55
  363 +#define DR_CPU_FORCE_UNCONFIGURE 0x46
  364 +#define DR_CPU_STATUS 0x53
275 365  
276 366 /* Responses */
277   -#define DS_CPU_OK 0x6f
278   -#define DS_CPU_ERROR 0x65
  367 +#define DR_CPU_OK 0x6f
  368 +#define DR_CPU_ERROR 0x65
279 369  
280 370 __u32 num_records;
281 371 };
282 372  
283   -struct ds_cpu_record {
284   - __u32 cpu_id;
  373 +struct dr_cpu_resp_entry {
  374 + __u32 cpu;
  375 + __u32 result;
  376 +#define DR_CPU_RES_OK 0x00
  377 +#define DR_CPU_RES_FAILURE 0x01
  378 +#define DR_CPU_RES_BLOCKED 0x02
  379 +#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03
  380 +#define DR_CPU_RES_NOT_IN_MD 0x04
  381 +
  382 + __u32 stat;
  383 +#define DR_CPU_STAT_NOT_PRESENT 0x00
  384 +#define DR_CPU_STAT_UNCONFIGURED 0x01
  385 +#define DR_CPU_STAT_CONFIGURED 0x02
  386 +
  387 + __u32 str_off;
285 388 };
286 389  
  390 +/* XXX Put this in some common place. XXX */
  391 +static unsigned long kimage_addr_to_ra(void *p)
  392 +{
  393 + unsigned long val = (unsigned long) p;
  394 +
  395 + return kern_base + (val - KERNBASE);
  396 +}
  397 +
  398 +void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
  399 +{
  400 + extern unsigned long sparc64_ttable_tl0;
  401 + extern unsigned long kern_locked_tte_data;
  402 + extern int bigkernel;
  403 + struct hvtramp_descr *hdesc;
  404 + unsigned long trampoline_ra;
  405 + struct trap_per_cpu *tb;
  406 + u64 tte_vaddr, tte_data;
  407 + unsigned long hv_err;
  408 +
  409 + hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
  410 + if (!hdesc) {
  411 + printk(KERN_ERR PFX "ldom_startcpu_cpuid: Cannot allocate "
  412 + "hvtramp_descr.\n");
  413 + return;
  414 + }
  415 +
  416 + hdesc->cpu = cpu;
  417 + hdesc->num_mappings = (bigkernel ? 2 : 1);
  418 +
  419 + tb = &trap_block[cpu];
  420 + tb->hdesc = hdesc;
  421 +
  422 + hdesc->fault_info_va = (unsigned long) &tb->fault_info;
  423 + hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
  424 +
  425 + hdesc->thread_reg = thread_reg;
  426 +
  427 + tte_vaddr = (unsigned long) KERNBASE;
  428 + tte_data = kern_locked_tte_data;
  429 +
  430 + hdesc->maps[0].vaddr = tte_vaddr;
  431 + hdesc->maps[0].tte = tte_data;
  432 + if (bigkernel) {
  433 + tte_vaddr += 0x400000;
  434 + tte_data += 0x400000;
  435 + hdesc->maps[1].vaddr = tte_vaddr;
  436 + hdesc->maps[1].tte = tte_data;
  437 + }
  438 +
  439 + trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
  440 +
  441 + hv_err = sun4v_cpu_start(cpu, trampoline_ra,
  442 + kimage_addr_to_ra(&sparc64_ttable_tl0),
  443 + __pa(hdesc));
  444 +}
  445 +
  446 +/* DR cpu requests get queued onto the work list by the
  447 + * dr_cpu_data() callback. The list is protected by
  448 + * ds_lock, and processed by dr_cpu_process() in order.
  449 + */
  450 +static LIST_HEAD(dr_cpu_work_list);
  451 +
  452 +struct dr_cpu_queue_entry {
  453 + struct list_head list;
  454 + char req[0];
  455 +};
  456 +
  457 +static void __dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
  458 +{
  459 + struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
  460 + struct ds_info *dp = ds_info;
  461 + struct {
  462 + struct ds_data data;
  463 + struct dr_cpu_tag tag;
  464 + } pkt;
  465 + int msg_len;
  466 +
  467 + memset(&pkt, 0, sizeof(pkt));
  468 + pkt.data.tag.type = DS_DATA;
  469 + pkt.data.handle = cp->handle;
  470 + pkt.tag.req_num = tag->req_num;
  471 + pkt.tag.type = DR_CPU_ERROR;
  472 + pkt.tag.num_records = 0;
  473 +
  474 + msg_len = (sizeof(struct ds_data) +
  475 + sizeof(struct dr_cpu_tag));
  476 +
  477 + pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
  478 +
  479 + ds_send(dp->lp, &pkt, msg_len);
  480 +}
  481 +
  482 +static void dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
  483 +{
  484 + unsigned long flags;
  485 +
  486 + spin_lock_irqsave(&ds_lock, flags);
  487 + __dr_cpu_send_error(cp, data);
  488 + spin_unlock_irqrestore(&ds_lock, flags);
  489 +}
  490 +
  491 +#define CPU_SENTINEL 0xffffffff
  492 +
  493 +static void purge_dups(u32 *list, u32 num_ents)
  494 +{
  495 + unsigned int i;
  496 +
  497 + for (i = 0; i < num_ents; i++) {
  498 + u32 cpu = list[i];
  499 + unsigned int j;
  500 +
  501 + if (cpu == CPU_SENTINEL)
  502 + continue;
  503 +
  504 + for (j = i + 1; j < num_ents; j++) {
  505 + if (list[j] == cpu)
  506 + list[j] = CPU_SENTINEL;
  507 + }
  508 + }
  509 +}
  510 +
  511 +static int dr_cpu_size_response(int ncpus)
  512 +{
  513 + return (sizeof(struct ds_data) +
  514 + sizeof(struct dr_cpu_tag) +
  515 + (sizeof(struct dr_cpu_resp_entry) * ncpus));
  516 +}
  517 +
  518 +static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
  519 + u64 handle, int resp_len, int ncpus,
  520 + cpumask_t *mask, u32 default_stat)
  521 +{
  522 + struct dr_cpu_resp_entry *ent;
  523 + struct dr_cpu_tag *tag;
  524 + int i, cpu;
  525 +
  526 + tag = (struct dr_cpu_tag *) (resp + 1);
  527 + ent = (struct dr_cpu_resp_entry *) (tag + 1);
  528 +
  529 + resp->tag.type = DS_DATA;
  530 + resp->tag.len = resp_len - sizeof(struct ds_msg_tag);
  531 + resp->handle = handle;
  532 + tag->req_num = req_num;
  533 + tag->type = DR_CPU_OK;
  534 + tag->num_records = ncpus;
  535 +
  536 + i = 0;
  537 + for_each_cpu_mask(cpu, *mask) {
  538 + ent[i].cpu = cpu;
  539 + ent[i].result = DR_CPU_RES_OK;
  540 + ent[i].stat = default_stat;
  541 + i++;
  542 + }
  543 + BUG_ON(i != ncpus);
  544 +}
  545 +
  546 +static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus,
  547 + u32 res, u32 stat)
  548 +{
  549 + struct dr_cpu_resp_entry *ent;
  550 + struct dr_cpu_tag *tag;
  551 + int i;
  552 +
  553 + tag = (struct dr_cpu_tag *) (resp + 1);
  554 + ent = (struct dr_cpu_resp_entry *) (tag + 1);
  555 +
  556 + for (i = 0; i < ncpus; i++) {
  557 + if (ent[i].cpu != cpu)
  558 + continue;
  559 + ent[i].result = res;
  560 + ent[i].stat = stat;
  561 + break;
  562 + }
  563 +}
  564 +
  565 +static int dr_cpu_configure(struct ds_cap_state *cp, u64 req_num,
  566 + cpumask_t *mask)
  567 +{
  568 + struct ds_data *resp;
  569 + int resp_len, ncpus, cpu;
  570 + unsigned long flags;
  571 +
  572 + ncpus = cpus_weight(*mask);
  573 + resp_len = dr_cpu_size_response(ncpus);
  574 + resp = kzalloc(resp_len, GFP_KERNEL);
  575 + if (!resp)
  576 + return -ENOMEM;
  577 +
  578 + dr_cpu_init_response(resp, req_num, cp->handle,
  579 + resp_len, ncpus, mask,
  580 + DR_CPU_STAT_CONFIGURED);
  581 +
  582 + mdesc_fill_in_cpu_data(*mask);
  583 +
  584 + for_each_cpu_mask(cpu, *mask) {
  585 + int err;
  586 +
  587 + printk(KERN_INFO PFX "Starting cpu %d...\n", cpu);
  588 + err = cpu_up(cpu);
  589 + if (err)
  590 + dr_cpu_mark(resp, cpu, ncpus,
  591 + DR_CPU_RES_FAILURE,
  592 + DR_CPU_STAT_UNCONFIGURED);
  593 + }
  594 +
  595 + spin_lock_irqsave(&ds_lock, flags);
  596 + ds_send(ds_info->lp, resp, resp_len);
  597 + spin_unlock_irqrestore(&ds_lock, flags);
  598 +
  599 + kfree(resp);
  600 +
  601 + return 0;
  602 +}
  603 +
  604 +static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num,
  605 + cpumask_t *mask)
  606 +{
  607 + struct ds_data *resp;
  608 + int resp_len, ncpus;
  609 +
  610 + ncpus = cpus_weight(*mask);
  611 + resp_len = dr_cpu_size_response(ncpus);
  612 + resp = kzalloc(resp_len, GFP_KERNEL);
  613 + if (!resp)
  614 + return -ENOMEM;
  615 +
  616 + dr_cpu_init_response(resp, req_num, cp->handle,
  617 + resp_len, ncpus, mask,
  618 + DR_CPU_STAT_UNCONFIGURED);
  619 +
  620 + kfree(resp);
  621 +
  622 + return -EOPNOTSUPP;
  623 +}
  624 +
  625 +static void dr_cpu_process(struct work_struct *work)
  626 +{
  627 + struct dr_cpu_queue_entry *qp, *tmp;
  628 + struct ds_cap_state *cp;
  629 + unsigned long flags;
  630 + LIST_HEAD(todo);
  631 + cpumask_t mask;
  632 +
  633 + cp = find_cap_by_string("dr-cpu");
  634 +
  635 + spin_lock_irqsave(&ds_lock, flags);
  636 + list_splice(&dr_cpu_work_list, &todo);
  637 + spin_unlock_irqrestore(&ds_lock, flags);
  638 +
  639 + list_for_each_entry_safe(qp, tmp, &todo, list) {
  640 + struct ds_data *data = (struct ds_data *) qp->req;
  641 + struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
  642 + u32 *cpu_list = (u32 *) (tag + 1);
  643 + u64 req_num = tag->req_num;
  644 + unsigned int i;
  645 + int err;
  646 +
  647 + switch (tag->type) {
  648 + case DR_CPU_CONFIGURE:
  649 + case DR_CPU_UNCONFIGURE:
  650 + case DR_CPU_FORCE_UNCONFIGURE:
  651 + break;
  652 +
  653 + default:
  654 + dr_cpu_send_error(cp, data);
  655 + goto next;
  656 + }
  657 +
  658 + purge_dups(cpu_list, tag->num_records);
  659 +
  660 + cpus_clear(mask);
  661 + for (i = 0; i < tag->num_records; i++) {
  662 + if (cpu_list[i] == CPU_SENTINEL)
  663 + continue;
  664 +
  665 + if (cpu_list[i] < NR_CPUS)
  666 + cpu_set(cpu_list[i], mask);
  667 + }
  668 +
  669 + if (tag->type == DR_CPU_CONFIGURE)
  670 + err = dr_cpu_configure(cp, req_num, &mask);
  671 + else
  672 + err = dr_cpu_unconfigure(cp, req_num, &mask);
  673 +
  674 + if (err)
  675 + dr_cpu_send_error(cp, data);
  676 +
  677 +next:
  678 + list_del(&qp->list);
  679 + kfree(qp);
  680 + }
  681 +}
  682 +
  683 +static DECLARE_WORK(dr_cpu_work, dr_cpu_process);
  684 +
287 685 static void dr_cpu_data(struct ldc_channel *lp,
288 686 struct ds_cap_state *dp,
289 687 void *buf, int len)
290 688 {
  689 + struct dr_cpu_queue_entry *qp;
291 690 struct ds_data *dpkt = buf;
292   - struct ds_cpu_tag *rp;
  691 + struct dr_cpu_tag *rp;
293 692  
294   - rp = (struct ds_cpu_tag *) (dpkt + 1);
  693 + rp = (struct dr_cpu_tag *) (dpkt + 1);
295 694  
296   - printk(KERN_ERR PFX "CPU REQ [%lx:%x], len=%d\n",
297   - rp->req_num, rp->type, len);
  695 + qp = kmalloc(sizeof(struct dr_cpu_queue_entry) + len, GFP_ATOMIC);
  696 + if (!qp) {
  697 + struct ds_cap_state *cp;
  698 +
  699 + cp = find_cap_by_string("dr-cpu");
  700 + __dr_cpu_send_error(cp, dpkt);
  701 + } else {
  702 + memcpy(&qp->req, buf, len);
  703 + list_add_tail(&qp->list, &dr_cpu_work_list);
  704 + schedule_work(&dr_cpu_work);
  705 + }
298 706 }
299 707  
300 708 struct ds_pri_msg {
... ... @@ -368,73 +776,6 @@
368 776 ds_var_doorbell = 1;
369 777 }
370 778  
371   -struct ds_cap_state ds_states[] = {
372   - {
373   - .service_id = "md-update",
374   - .data = md_update_data,
375   - },
376   - {
377   - .service_id = "domain-shutdown",
378   - .data = domain_shutdown_data,
379   - },
380   - {
381   - .service_id = "domain-panic",
382   - .data = domain_panic_data,
383   - },
384   - {
385   - .service_id = "dr-cpu",
386   - .data = dr_cpu_data,
387   - },
388   - {
389   - .service_id = "pri",
390   - .data = ds_pri_data,
391   - },
392   - {
393   - .service_id = "var-config",
394   - .data = ds_var_data,
395   - },
396   - {
397   - .service_id = "var-config-backup",
398   - .data = ds_var_data,
399   - },
400   -};
401   -
402   -static DEFINE_SPINLOCK(ds_lock);
403   -
404   -struct ds_info {
405   - struct ldc_channel *lp;
406   - u8 hs_state;
407   -#define DS_HS_START 0x01
408   -#define DS_HS_DONE 0x02
409   -
410   - void *rcv_buf;
411   - int rcv_buf_len;
412   -};
413   -
414   -static struct ds_info *ds_info;
415   -
416   -static struct ds_cap_state *find_cap(u64 handle)
417   -{
418   - unsigned int index = handle >> 32;
419   -
420   - if (index >= ARRAY_SIZE(ds_states))
421   - return NULL;
422   - return &ds_states[index];
423   -}
424   -
425   -static struct ds_cap_state *find_cap_by_string(const char *name)
426   -{
427   - int i;
428   -
429   - for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
430   - if (strcmp(ds_states[i].service_id, name))
431   - continue;
432   -
433   - return &ds_states[i];
434   - }
435   - return NULL;
436   -}
437   -
438 779 void ldom_set_var(const char *var, const char *value)
439 780 {
440 781 struct ds_info *dp = ds_info;
... ... @@ -467,8 +808,8 @@
467 808 p += strlen(value) + 1;
468 809  
469 810 msg_len = (sizeof(struct ds_data) +
470   - sizeof(struct ds_var_set_msg) +
471   - (p - base));
  811 + sizeof(struct ds_var_set_msg) +
  812 + (p - base));
472 813 msg_len = (msg_len + 3) & ~3;
473 814 pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
474 815  
... ... @@ -520,6 +861,11 @@
520 861 sun4v_mach_sir();
521 862 }
522 863  
  864 +void ldom_power_off(void)
  865 +{
  866 + sun4v_mach_exit(0);
  867 +}
  868 +
523 869 static void ds_conn_reset(struct ds_info *dp)
524 870 {
525 871 printk(KERN_ERR PFX "ds_conn_reset() from %p\n",
... ... @@ -601,7 +947,7 @@
601 947 np->handle);
602 948 return 0;
603 949 }
604   - printk(KERN_ERR PFX "Could not register %s service\n",
  950 + printk(KERN_INFO PFX "Could not register %s service\n",
605 951 cp->service_id);
606 952 cp->state = CAP_STATE_UNKNOWN;
607 953 }
arch/sparc64/kernel/hvtramp.S
  1 +/* hvtramp.S: Hypervisor start-cpu trampoline code.
  2 + *
  3 + * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
  4 + */
  5 +
  6 +#include <asm/thread_info.h>
  7 +#include <asm/hypervisor.h>
  8 +#include <asm/scratchpad.h>
  9 +#include <asm/spitfire.h>
  10 +#include <asm/hvtramp.h>
  11 +#include <asm/pstate.h>
  12 +#include <asm/ptrace.h>
  13 +#include <asm/asi.h>
  14 +
  15 + .text
  16 + .align 8
  17 + .globl hv_cpu_startup, hv_cpu_startup_end
  18 +
  19 + /* This code executes directly out of the hypervisor
  20 + * with physical addressing (va==pa). %o0 contains
  21 + * our client argument which for Linux points to
  22 + * a descriptor data structure which defines the
  23 + * MMU entries we need to load up.
  24 + *
  25 + * After we set things up we enable the MMU and call
  26 + * into the kernel.
  27 + *
  28 + * First setup basic privileged cpu state.
  29 + */
  30 +hv_cpu_startup:
  31 + wrpr %g0, 0, %gl
  32 + wrpr %g0, 15, %pil
  33 + wrpr %g0, 0, %canrestore
  34 + wrpr %g0, 0, %otherwin
  35 + wrpr %g0, 6, %cansave
  36 + wrpr %g0, 6, %cleanwin
  37 + wrpr %g0, 0, %cwp
  38 + wrpr %g0, 0, %wstate
  39 + wrpr %g0, 0, %tl
  40 +
  41 + sethi %hi(sparc64_ttable_tl0), %g1
  42 + wrpr %g1, %tba
  43 +
  44 + mov %o0, %l0
  45 +
  46 + lduw [%l0 + HVTRAMP_DESCR_CPU], %g1
  47 + mov SCRATCHPAD_CPUID, %g2
  48 + stxa %g1, [%g2] ASI_SCRATCHPAD
  49 +
  50 + ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
  51 + stxa %g2, [%g0] ASI_SCRATCHPAD
  52 +
  53 + mov 0, %l1
  54 + lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
  55 + add %l0, HVTRAMP_DESCR_MAPS, %l3
  56 +
  57 +1: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0
  58 + clr %o1
  59 + ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2
  60 + mov HV_MMU_IMMU | HV_MMU_DMMU, %o3
  61 + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
  62 + ta HV_FAST_TRAP
  63 +
  64 + brnz,pn %o0, 80f
  65 + nop
  66 +
  67 + add %l1, 1, %l1
  68 + cmp %l1, %l2
  69 + blt,a,pt %xcc, 1b
  70 + add %l3, HVTRAMP_MAPPING_SIZE, %l3
  71 +
  72 + ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
  73 + mov HV_FAST_MMU_FAULT_AREA_CONF, %o5
  74 + ta HV_FAST_TRAP
  75 +
  76 + brnz,pn %o0, 80f
  77 + nop
  78 +
  79 + wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
  80 +
  81 + ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
  82 +
  83 + mov 1, %o0
  84 + set 1f, %o1
  85 + mov HV_FAST_MMU_ENABLE, %o5
  86 + ta HV_FAST_TRAP
  87 +
  88 + ba,pt %xcc, 80f
  89 + nop
  90 +
  91 +1:
  92 + wr %g0, 0, %fprs
  93 + wr %g0, ASI_P, %asi
  94 +
  95 + mov PRIMARY_CONTEXT, %g7
  96 + stxa %g0, [%g7] ASI_MMU
  97 + membar #Sync
  98 +
  99 + mov SECONDARY_CONTEXT, %g7
  100 + stxa %g0, [%g7] ASI_MMU
  101 + membar #Sync
  102 +
  103 + mov %l6, %g6
  104 + ldx [%g6 + TI_TASK], %g4
  105 +
  106 + mov 1, %g5
  107 + sllx %g5, THREAD_SHIFT, %g5
  108 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
  109 + add %g6, %g5, %sp
  110 + mov 0, %fp
  111 +
  112 + call init_irqwork_curcpu
  113 + nop
  114 + call hard_smp_processor_id
  115 + nop
  116 +
  117 + mov %o0, %o1
  118 + mov 0, %o0
  119 + mov 0, %o2
  120 + call sun4v_init_mondo_queues
  121 + mov 1, %o3
  122 +
  123 + call init_cur_cpu_trap
  124 + mov %g6, %o0
  125 +
  126 + wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
  127 +
  128 + call smp_callin
  129 + nop
  130 + call cpu_idle
  131 + mov 0, %o0
  132 + call cpu_panic
  133 + nop
  134 +
  135 +80: ba,pt %xcc, 80b
  136 + nop
  137 +
  138 + .align 8
  139 +hv_cpu_startup_end:
arch/sparc64/kernel/mdesc.c
... ... @@ -434,6 +434,22 @@
434 434 if (v)
435 435 printk("PLATFORM: max-cpus [%lu]\n", *v);
436 436  
  437 +#ifdef CONFIG_SMP
  438 + {
  439 + int max_cpu, i;
  440 +
  441 + if (v) {
  442 + max_cpu = *v;
  443 + if (max_cpu > NR_CPUS)
  444 + max_cpu = NR_CPUS;
  445 + } else {
  446 + max_cpu = NR_CPUS;
  447 + }
  448 + for (i = 0; i < max_cpu; i++)
  449 + cpu_set(i, cpu_possible_map);
  450 + }
  451 +#endif
  452 +
437 453 mdesc_release(hp);
438 454 }
439 455  
... ... @@ -451,9 +467,9 @@
451 467 return 0;
452 468 }
453 469  
454   -static void __init fill_in_one_cache(cpuinfo_sparc *c,
455   - struct mdesc_handle *hp,
456   - u64 mp)
  470 +static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
  471 + struct mdesc_handle *hp,
  472 + u64 mp)
457 473 {
458 474 const u64 *level = mdesc_get_property(hp, mp, "level", NULL);
459 475 const u64 *size = mdesc_get_property(hp, mp, "size", NULL);
... ... @@ -496,7 +512,8 @@
496 512 }
497 513 }
498 514  
499   -static void __init mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
  515 +static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
  516 + int core_id)
500 517 {
501 518 u64 a;
502 519  
... ... @@ -529,7 +546,7 @@
529 546 }
530 547 }
531 548  
532   -static void __init set_core_ids(struct mdesc_handle *hp)
  549 +static void __devinit set_core_ids(struct mdesc_handle *hp)
533 550 {
534 551 int idx;
535 552 u64 mp;
... ... @@ -554,7 +571,8 @@
554 571 }
555 572 }
556 573  
557   -static void __init mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
  574 +static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
  575 + int proc_id)
558 576 {
559 577 u64 a;
560 578  
... ... @@ -573,8 +591,8 @@
573 591 }
574 592 }
575 593  
576   -static void __init __set_proc_ids(struct mdesc_handle *hp,
577   - const char *exec_unit_name)
  594 +static void __devinit __set_proc_ids(struct mdesc_handle *hp,
  595 + const char *exec_unit_name)
578 596 {
579 597 int idx;
580 598 u64 mp;
581 599  
... ... @@ -595,13 +613,14 @@
595 613 }
596 614 }
597 615  
598   -static void __init set_proc_ids(struct mdesc_handle *hp)
  616 +static void __devinit set_proc_ids(struct mdesc_handle *hp)
599 617 {
600 618 __set_proc_ids(hp, "exec_unit");
601 619 __set_proc_ids(hp, "exec-unit");
602 620 }
603 621  
604   -static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def)
  622 +static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
  623 + unsigned char def)
605 624 {
606 625 u64 val;
607 626  
... ... @@ -619,8 +638,8 @@
619 638 *mask = ((1U << def) * 64U) - 1U;
620 639 }
621 640  
622   -static void __init get_mondo_data(struct mdesc_handle *hp, u64 mp,
623   - struct trap_per_cpu *tb)
  641 +static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
  642 + struct trap_per_cpu *tb)
624 643 {
625 644 const u64 *val;
626 645  
... ... @@ -637,7 +656,7 @@
637 656 get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
638 657 }
639 658  
640   -static void __init mdesc_fill_in_cpu_data(void)
  659 +void __devinit mdesc_fill_in_cpu_data(cpumask_t mask)
641 660 {
642 661 struct mdesc_handle *hp = mdesc_grab();
643 662 u64 mp;
... ... @@ -658,6 +677,8 @@
658 677 #ifdef CONFIG_SMP
659 678 if (cpuid >= NR_CPUS)
660 679 continue;
  680 + if (!cpu_isset(cpuid, mask))
  681 + continue;
661 682 #else
662 683 /* On uniprocessor we only want the values for the
663 684 * real physical cpu the kernel booted onto, however
... ... @@ -696,7 +717,6 @@
696 717  
697 718 #ifdef CONFIG_SMP
698 719 cpu_set(cpuid, cpu_present_map);
699   - cpu_set(cpuid, phys_cpu_present_map);
700 720 #endif
701 721  
702 722 c->core_id = 0;
... ... @@ -719,6 +739,7 @@
719 739 {
720 740 struct mdesc_handle *hp;
721 741 unsigned long len, real_len, status;
  742 + cpumask_t mask;
722 743  
723 744 (void) sun4v_mach_desc(0UL, 0UL, &len);
724 745  
... ... @@ -742,6 +763,8 @@
742 763 cur_mdesc = hp;
743 764  
744 765 report_platform_properties();
745   - mdesc_fill_in_cpu_data();
  766 +
  767 + cpus_setall(mask);
  768 + mdesc_fill_in_cpu_data(mask);
746 769 }
arch/sparc64/kernel/prom.c
... ... @@ -1808,7 +1808,7 @@
1808 1808  
1809 1809 #ifdef CONFIG_SMP
1810 1810 cpu_set(cpuid, cpu_present_map);
1811   - cpu_set(cpuid, phys_cpu_present_map);
  1811 + cpu_set(cpuid, cpu_possible_map);
1812 1812 #endif
1813 1813 }
1814 1814  
arch/sparc64/kernel/smp.c
... ... @@ -41,6 +41,7 @@
41 41 #include <asm/sections.h>
42 42 #include <asm/prom.h>
43 43 #include <asm/mdesc.h>
  44 +#include <asm/ldc.h>
44 45  
45 46 extern void calibrate_delay(void);
46 47  
47 48  
48 49  
... ... @@ -49,12 +50,18 @@
49 50 /* Please don't make this stuff initdata!!! --DaveM */
50 51 unsigned char boot_cpu_id;
51 52  
  53 +cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
52 54 cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
53   -cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
54 55 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
55 56 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
56 57 cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
57 58 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
  59 +
  60 +EXPORT_SYMBOL(cpu_possible_map);
  61 +EXPORT_SYMBOL(cpu_online_map);
  62 +EXPORT_SYMBOL(cpu_sibling_map);
  63 +EXPORT_SYMBOL(cpu_core_map);
  64 +
58 65 static cpumask_t smp_commenced_mask;
59 66 static cpumask_t cpu_callout_map;
60 67  
61 68  
... ... @@ -84,9 +91,10 @@
84 91  
85 92 static volatile unsigned long callin_flag = 0;
86 93  
87   -void __init smp_callin(void)
  94 +void __devinit smp_callin(void)
88 95 {
89 96 int cpuid = hard_smp_processor_id();
  97 + struct trap_per_cpu *tb = &trap_block[cpuid];;
90 98  
91 99 __local_per_cpu_offset = __per_cpu_offset(cpuid);
92 100  
... ... @@ -117,6 +125,11 @@
117 125 atomic_inc(&init_mm.mm_count);
118 126 current->active_mm = &init_mm;
119 127  
  128 + if (tb->hdesc) {
  129 + kfree(tb->hdesc);
  130 + tb->hdesc = NULL;
  131 + }
  132 +
120 133 while (!cpu_isset(cpuid, smp_commenced_mask))
121 134 rmb();
122 135  
123 136  
... ... @@ -296,14 +309,20 @@
296 309 /* Alloc the mondo queues, cpu will load them. */
297 310 sun4v_init_mondo_queues(0, cpu, 1, 0);
298 311  
299   - prom_startcpu_cpuid(cpu, entry, cookie);
  312 +#ifdef CONFIG_SUN_LDOMS
  313 + if (ldom_domaining_enabled)
  314 + ldom_startcpu_cpuid(cpu,
  315 + (unsigned long) cpu_new_thread);
  316 + else
  317 +#endif
  318 + prom_startcpu_cpuid(cpu, entry, cookie);
300 319 } else {
301 320 struct device_node *dp = of_find_node_by_cpuid(cpu);
302 321  
303 322 prom_startcpu(dp->node, entry, cookie);
304 323 }
305 324  
306   - for (timeout = 0; timeout < 5000000; timeout++) {
  325 + for (timeout = 0; timeout < 50000; timeout++) {
307 326 if (callin_flag)
308 327 break;
309 328 udelay(100);
310 329  
... ... @@ -1163,22 +1182,8 @@
1163 1182 return -EINVAL;
1164 1183 }
1165 1184  
1166   -/* Constrain the number of cpus to max_cpus. */
1167 1185 void __init smp_prepare_cpus(unsigned int max_cpus)
1168 1186 {
1169   - int i;
1170   -
1171   - if (num_possible_cpus() > max_cpus) {
1172   - for_each_possible_cpu(i) {
1173   - if (i != boot_cpu_id) {
1174   - cpu_clear(i, phys_cpu_present_map);
1175   - cpu_clear(i, cpu_present_map);
1176   - if (num_possible_cpus() <= max_cpus)
1177   - break;
1178   - }
1179   - }
1180   - }
1181   -
1182 1187 cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
1183 1188 }
1184 1189  
... ... @@ -1241,6 +1246,20 @@
1241 1246 }
1242 1247 return ret;
1243 1248 }
  1249 +
  1250 +#ifdef CONFIG_HOTPLUG_CPU
  1251 +int __cpu_disable(void)
  1252 +{
  1253 + printk(KERN_ERR "SMP: __cpu_disable() on cpu %d\n",
  1254 + smp_processor_id());
  1255 + return -ENODEV;
  1256 +}
  1257 +
  1258 +void __cpu_die(unsigned int cpu)
  1259 +{
  1260 + printk(KERN_ERR "SMP: __cpu_die(%u)\n", cpu);
  1261 +}
  1262 +#endif
1244 1263  
1245 1264 void __init smp_cpus_done(unsigned int max_cpus)
1246 1265 {
arch/sparc64/kernel/sparc64_ksyms.c
... ... @@ -124,10 +124,6 @@
124 124 EXPORT_SYMBOL(__write_unlock);
125 125 EXPORT_SYMBOL(__write_trylock);
126 126  
127   -/* CPU online map and active count. */
128   -EXPORT_SYMBOL(cpu_online_map);
129   -EXPORT_SYMBOL(phys_cpu_present_map);
130   -
131 127 EXPORT_SYMBOL(smp_call_function);
132 128 #endif /* CONFIG_SMP */
133 129  
arch/sparc64/prom/misc.c
... ... @@ -96,6 +96,10 @@
96 96 */
97 97 void prom_halt(void)
98 98 {
  99 +#ifdef CONFIG_SUN_LDOMS
  100 + if (ldom_domaining_enabled)
  101 + ldom_power_off();
  102 +#endif
99 103 again:
100 104 p1275_cmd("exit", P1275_INOUT(0, 0));
101 105 goto again; /* PROM is out to get me -DaveM */
... ... @@ -103,6 +107,10 @@
103 107  
104 108 void prom_halt_power_off(void)
105 109 {
  110 +#ifdef CONFIG_SUN_LDOMS
  111 + if (ldom_domaining_enabled)
  112 + ldom_power_off();
  113 +#endif
106 114 p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0));
107 115  
108 116 /* if nothing else helps, we just halt */
arch/sparc64/prom/p1275.c
... ... @@ -16,6 +16,7 @@
16 16 #include <asm/system.h>
17 17 #include <asm/spitfire.h>
18 18 #include <asm/pstate.h>
  19 +#include <asm/ldc.h>
19 20  
20 21 struct {
21 22 long prom_callback; /* 0x00 */
include/asm-sparc64/cpudata.h
... ... @@ -80,7 +80,8 @@
80 80 unsigned int dev_mondo_qmask;
81 81 unsigned int resum_qmask;
82 82 unsigned int nonresum_qmask;
83   - unsigned int __pad2[3];
  83 + unsigned int __pad2[1];
  84 + void *hdesc;
84 85 } __attribute__((aligned(64)));
85 86 extern struct trap_per_cpu trap_block[NR_CPUS];
86 87 extern void init_cur_cpu_trap(struct thread_info *);
include/asm-sparc64/hvtramp.h
  1 +#ifndef _SPARC64_HVTRAP_H
  2 +#define _SPARC64_HVTRAP_H
  3 +
  4 +#ifndef __ASSEMBLY__
  5 +
  6 +#include <linux/types.h>
  7 +
  8 +struct hvtramp_mapping {
  9 + __u64 vaddr;
  10 + __u64 tte;
  11 +};
  12 +
  13 +struct hvtramp_descr {
  14 + __u32 cpu;
  15 + __u32 num_mappings;
  16 + __u64 fault_info_va;
  17 + __u64 fault_info_pa;
  18 + __u64 thread_reg;
  19 + struct hvtramp_mapping maps[2];
  20 +};
  21 +
  22 +extern void hv_cpu_startup(unsigned long hvdescr_pa);
  23 +
  24 +#endif
  25 +
  26 +#define HVTRAMP_DESCR_CPU 0x00
  27 +#define HVTRAMP_DESCR_NUM_MAPPINGS 0x04
  28 +#define HVTRAMP_DESCR_FAULT_INFO_VA 0x08
  29 +#define HVTRAMP_DESCR_FAULT_INFO_PA 0x10
  30 +#define HVTRAMP_DESCR_THREAD_REG 0x18
  31 +#define HVTRAMP_DESCR_MAPS 0x20
  32 +
  33 +#define HVTRAMP_MAPPING_VADDR 0x00
  34 +#define HVTRAMP_MAPPING_TTE 0x08
  35 +#define HVTRAMP_MAPPING_SIZE 0x10
  36 +
  37 +#endif /* _SPARC64_HVTRAP_H */
include/asm-sparc64/hypervisor.h
... ... @@ -98,7 +98,7 @@
98 98 #define HV_FAST_MACH_EXIT 0x00
99 99  
100 100 #ifndef __ASSEMBLY__
101   -extern void sun4v_mach_exit(unsigned long exit_core);
  101 +extern void sun4v_mach_exit(unsigned long exit_code);
102 102 #endif
103 103  
104 104 /* Domain services. */
include/asm-sparc64/ldc.h
... ... @@ -6,6 +6,8 @@
6 6 extern int ldom_domaining_enabled;
7 7 extern void ldom_set_var(const char *var, const char *value);
8 8 extern void ldom_reboot(const char *boot_command);
  9 +extern void ldom_power_off(void);
  10 +extern void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg);
9 11  
10 12 /* The event handler will be evoked when link state changes
11 13 * or data becomes available on the receive side.
include/asm-sparc64/mdesc.h
... ... @@ -2,6 +2,7 @@
2 2 #define _SPARC64_MDESC_H
3 3  
4 4 #include <linux/types.h>
  5 +#include <linux/cpumask.h>
5 6 #include <asm/prom.h>
6 7  
7 8 struct mdesc_handle;
... ... @@ -59,6 +60,8 @@
59 60 extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc);
60 61  
61 62 extern void mdesc_update(void);
  63 +
  64 +extern void mdesc_fill_in_cpu_data(cpumask_t mask);
62 65  
63 66 extern void sun4v_mdesc_init(void);
64 67  
include/asm-sparc64/smp.h
... ... @@ -29,9 +29,6 @@
29 29 #include <asm/bitops.h>
30 30 #include <asm/atomic.h>
31 31  
32   -extern cpumask_t phys_cpu_present_map;
33   -#define cpu_possible_map phys_cpu_present_map
34   -
35 32 extern cpumask_t cpu_sibling_map[NR_CPUS];
36 33 extern cpumask_t cpu_core_map[NR_CPUS];
37 34 extern int sparc64_multi_core;
... ... @@ -45,6 +42,11 @@
45 42  
46 43 extern void smp_fill_in_sib_core_maps(void);
47 44 extern unsigned char boot_cpu_id;
  45 +
  46 +#ifdef CONFIG_HOTPLUG_CPU
  47 +extern int __cpu_disable(void);
  48 +extern void __cpu_die(unsigned int cpu);
  49 +#endif
48 50  
49 51 #endif /* !(__ASSEMBLY__) */
50 52