Commit dfd4d47e9a71c5a35eb67a44cd311efbe1846b7e

Authored by Scott Wood
Committed by Avi Kivity
1 parent b59049720d

KVM: PPC: booke: Improve timer register emulation

Decrementers are now properly driven by TCR/TSR, and the guest
has full read/write access to these registers.

The decrementer keeps ticking (and setting the TSR bit) regardless of
whether the interrupts are enabled with TCR.

The decrementer stops at zero, rather than going negative.

Decrementers (and FITs, once implemented) are delivered as
level-triggered interrupts -- dequeued when the TSR bit is cleared, not
on delivery.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
[scottwood@freescale.com: significant changes]
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>

Showing 8 changed files with 115 additions and 70 deletions Side-by-side Diff

arch/powerpc/include/asm/kvm_host.h
... ... @@ -330,7 +330,7 @@
330 330 u32 tbl;
331 331 u32 tbu;
332 332 u32 tcr;
333   - u32 tsr;
  333 + ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
334 334 u32 ivor[64];
335 335 ulong ivpr;
336 336 u32 pvr;
arch/powerpc/include/asm/kvm_ppc.h
... ... @@ -66,6 +66,7 @@
66 66 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
67 67 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
68 68 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
  69 +extern void kvmppc_decrementer_func(unsigned long data);
69 70 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
70 71  
71 72 /* Core-specific hooks */
arch/powerpc/kvm/book3s.c
... ... @@ -515,4 +515,12 @@
515 515 mutex_unlock(&kvm->slots_lock);
516 516 return r;
517 517 }
  518 +
  519 +void kvmppc_decrementer_func(unsigned long data)
  520 +{
  521 + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
  522 +
  523 + kvmppc_core_queue_dec(vcpu);
  524 + kvm_vcpu_kick(vcpu);
  525 +}
arch/powerpc/kvm/booke.c
... ... @@ -252,9 +252,11 @@
252 252 allowed = vcpu->arch.shared->msr & MSR_ME;
253 253 msr_mask = 0;
254 254 break;
255   - case BOOKE_IRQPRIO_EXTERNAL:
256 255 case BOOKE_IRQPRIO_DECREMENTER:
257 256 case BOOKE_IRQPRIO_FIT:
  257 + keep_irq = true;
  258 + /* fall through */
  259 + case BOOKE_IRQPRIO_EXTERNAL:
258 260 allowed = vcpu->arch.shared->msr & MSR_EE;
259 261 allowed = allowed && !crit;
260 262 msr_mask = MSR_CE|MSR_ME|MSR_DE;
261 263  
... ... @@ -282,11 +284,26 @@
282 284 return allowed;
283 285 }
284 286  
  287 +static void update_timer_ints(struct kvm_vcpu *vcpu)
  288 +{
  289 + if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
  290 + kvmppc_core_queue_dec(vcpu);
  291 + else
  292 + kvmppc_core_dequeue_dec(vcpu);
  293 +}
  294 +
285 295 static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
286 296 {
287 297 unsigned long *pending = &vcpu->arch.pending_exceptions;
288 298 unsigned int priority;
289 299  
  300 + if (vcpu->requests) {
  301 + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
  302 + smp_mb();
  303 + update_timer_ints(vcpu);
  304 + }
  305 + }
  306 +
290 307 priority = __ffs(*pending);
291 308 while (priority <= BOOKE_IRQPRIO_MAX) {
292 309 if (kvmppc_booke_irqprio_deliver(vcpu, priority))
293 310  
294 311  
295 312  
296 313  
... ... @@ -749,25 +766,16 @@
749 766 vcpu->arch.shared->esr = sregs->u.e.esr;
750 767 vcpu->arch.shared->dar = sregs->u.e.dear;
751 768 vcpu->arch.vrsave = sregs->u.e.vrsave;
752   - vcpu->arch.tcr = sregs->u.e.tcr;
  769 + kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
753 770  
754   - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
  771 + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
755 772 vcpu->arch.dec = sregs->u.e.dec;
  773 + kvmppc_emulate_dec(vcpu);
  774 + }
756 775  
757   - kvmppc_emulate_dec(vcpu);
758   -
759 776 if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
760   - /*
761   - * FIXME: existing KVM timer handling is incomplete.
762   - * TSR cannot be read by the guest, and its value in
763   - * vcpu->arch is always zero. For now, just handle
764   - * the case where the caller is trying to inject a
765   - * decrementer interrupt.
766   - */
767   -
768   - if ((sregs->u.e.tsr & TSR_DIS) &&
769   - (vcpu->arch.tcr & TCR_DIE))
770   - kvmppc_core_queue_dec(vcpu);
  777 + vcpu->arch.tsr = sregs->u.e.tsr;
  778 + update_timer_ints(vcpu);
771 779 }
772 780  
773 781 return 0;
... ... @@ -921,6 +929,33 @@
921 929  
922 930 void kvmppc_core_destroy_vm(struct kvm *kvm)
923 931 {
  932 +}
  933 +
  934 +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
  935 +{
  936 + vcpu->arch.tcr = new_tcr;
  937 + update_timer_ints(vcpu);
  938 +}
  939 +
  940 +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
  941 +{
  942 + set_bits(tsr_bits, &vcpu->arch.tsr);
  943 + smp_wmb();
  944 + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
  945 + kvm_vcpu_kick(vcpu);
  946 +}
  947 +
  948 +void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
  949 +{
  950 + clear_bits(tsr_bits, &vcpu->arch.tsr);
  951 + update_timer_ints(vcpu);
  952 +}
  953 +
  954 +void kvmppc_decrementer_func(unsigned long data)
  955 +{
  956 + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
  957 +
  958 + kvmppc_set_tsr_bits(vcpu, TSR_DIS);
924 959 }
925 960  
926 961 int __init kvmppc_booke_init(void)
arch/powerpc/kvm/booke.h
... ... @@ -55,6 +55,10 @@
55 55 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
56 56 void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
57 57  
  58 +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
  59 +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
  60 +void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
  61 +
58 62 int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
59 63 unsigned int inst, int *advance);
60 64 int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
arch/powerpc/kvm/booke_emulate.c
... ... @@ -13,6 +13,7 @@
13 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 14 *
15 15 * Copyright IBM Corp. 2008
  16 + * Copyright 2011 Freescale Semiconductor, Inc.
16 17 *
17 18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 19 */
19 20  
... ... @@ -115,10 +116,10 @@
115 116 case SPRN_DBSR:
116 117 vcpu->arch.dbsr &= ~spr_val; break;
117 118 case SPRN_TSR:
118   - vcpu->arch.tsr &= ~spr_val; break;
  119 + kvmppc_clr_tsr_bits(vcpu, spr_val);
  120 + break;
119 121 case SPRN_TCR:
120   - vcpu->arch.tcr = spr_val;
121   - kvmppc_emulate_dec(vcpu);
  122 + kvmppc_set_tcr(vcpu, spr_val);
122 123 break;
123 124  
124 125 /* Note: SPRG4-7 are user-readable. These values are
... ... @@ -209,6 +210,10 @@
209 210 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
210 211 case SPRN_DBSR:
211 212 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
  213 + case SPRN_TSR:
  214 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break;
  215 + case SPRN_TCR:
  216 + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break;
212 217  
213 218 case SPRN_IVOR0:
214 219 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
arch/powerpc/kvm/emulate.c
... ... @@ -13,6 +13,7 @@
13 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 14 *
15 15 * Copyright IBM Corp. 2007
  16 + * Copyright 2011 Freescale Semiconductor, Inc.
16 17 *
17 18 * Authors: Hollis Blanchard <hollisb@us.ibm.com>
18 19 */
19 20  
20 21  
21 22  
22 23  
23 24  
... ... @@ -69,57 +70,55 @@
69 70 #define OP_STH 44
70 71 #define OP_STHU 45
71 72  
72   -#ifdef CONFIG_PPC_BOOK3S
73   -static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
74   -{
75   - return 1;
76   -}
77   -#else
78   -static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
79   -{
80   - /* On BOOKE, DEC = 0 is as good as decrementer not enabled */
81   - return (vcpu->arch.tcr & TCR_DIE) && vcpu->arch.dec;
82   -}
83   -#endif
84   -
85 73 void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
86 74 {
87 75 unsigned long dec_nsec;
88 76 unsigned long long dec_time;
89 77  
90 78 pr_debug("mtDEC: %x\n", vcpu->arch.dec);
  79 + hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
  80 +
91 81 #ifdef CONFIG_PPC_BOOK3S
92 82 /* mtdec lowers the interrupt line when positive. */
93 83 kvmppc_core_dequeue_dec(vcpu);
94 84  
95 85 /* POWER4+ triggers a dec interrupt if the value is < 0 */
96 86 if (vcpu->arch.dec & 0x80000000) {
97   - hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
98 87 kvmppc_core_queue_dec(vcpu);
99 88 return;
100 89 }
101 90 #endif
102   - if (kvmppc_dec_enabled(vcpu)) {
103   - /* The decrementer ticks at the same rate as the timebase, so
104   - * that's how we convert the guest DEC value to the number of
105   - * host ticks. */
106 91  
107   - hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
108   - dec_time = vcpu->arch.dec;
109   - dec_time *= 1000;
110   - do_div(dec_time, tb_ticks_per_usec);
111   - dec_nsec = do_div(dec_time, NSEC_PER_SEC);
112   - hrtimer_start(&vcpu->arch.dec_timer,
113   - ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
114   - vcpu->arch.dec_jiffies = get_tb();
115   - } else {
116   - hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
117   - }
  92 +#ifdef CONFIG_BOOKE
  93 + /* On BOOKE, DEC = 0 is as good as decrementer not enabled */
  94 + if (vcpu->arch.dec == 0)
  95 + return;
  96 +#endif
  97 +
  98 + /*
  99 + * The decrementer ticks at the same rate as the timebase, so
  100 + * that's how we convert the guest DEC value to the number of
  101 + * host ticks.
  102 + */
  103 +
  104 + dec_time = vcpu->arch.dec;
  105 + dec_time *= 1000;
  106 + do_div(dec_time, tb_ticks_per_usec);
  107 + dec_nsec = do_div(dec_time, NSEC_PER_SEC);
  108 + hrtimer_start(&vcpu->arch.dec_timer,
  109 + ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
  110 + vcpu->arch.dec_jiffies = get_tb();
118 111 }
119 112  
120 113 u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
121 114 {
122 115 u64 jd = tb - vcpu->arch.dec_jiffies;
  116 +
  117 +#ifdef CONFIG_BOOKE
  118 + if (vcpu->arch.dec < jd)
  119 + return 0;
  120 +#endif
  121 +
123 122 return vcpu->arch.dec - jd;
124 123 }
125 124  
arch/powerpc/kvm/powerpc.c
... ... @@ -39,7 +39,8 @@
39 39 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
40 40 {
41 41 return !(v->arch.shared->msr & MSR_WE) ||
42   - !!(v->arch.pending_exceptions);
  42 + !!(v->arch.pending_exceptions) ||
  43 + v->requests;
43 44 }
44 45  
45 46 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
... ... @@ -311,18 +312,6 @@
311 312 return kvmppc_core_pending_dec(vcpu);
312 313 }
313 314  
314   -static void kvmppc_decrementer_func(unsigned long data)
315   -{
316   - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
317   -
318   - kvmppc_core_queue_dec(vcpu);
319   -
320   - if (waitqueue_active(vcpu->arch.wqp)) {
321   - wake_up_interruptible(vcpu->arch.wqp);
322   - vcpu->stat.halt_wakeup++;
323   - }
324   -}
325   -
326 315 /*
327 316 * low level hrtimer wake routine. Because this runs in hardirq context
328 317 * we schedule a tasklet to do the real work.
... ... @@ -567,6 +556,16 @@
567 556 return r;
568 557 }
569 558  
  559 +void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
  560 +{
  561 + if (waitqueue_active(&vcpu->wq)) {
  562 + wake_up_interruptible(vcpu->arch.wqp);
  563 + vcpu->stat.halt_wakeup++;
  564 + } else if (vcpu->cpu != -1) {
  565 + smp_send_reschedule(vcpu->cpu);
  566 + }
  567 +}
  568 +
570 569 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
571 570 {
572 571 if (irq->irq == KVM_INTERRUPT_UNSET) {
... ... @@ -575,13 +574,7 @@
575 574 }
576 575  
577 576 kvmppc_core_queue_external(vcpu, irq);
578   -
579   - if (waitqueue_active(vcpu->arch.wqp)) {
580   - wake_up_interruptible(vcpu->arch.wqp);
581   - vcpu->stat.halt_wakeup++;
582   - } else if (vcpu->cpu != -1) {
583   - smp_send_reschedule(vcpu->cpu);
584   - }
  577 + kvm_vcpu_kick(vcpu);
585 578  
586 579 return 0;
587 580 }