Commit 4b4b4512da2a844b8da2585609b67fae1ce4f4db

Authored by Christoffer Dall
1 parent 4cf1bc4c7c

arm/arm64: KVM: Rework the arch timer to use level-triggered semantics

The arch timer currently uses edge-triggered semantics in the sense that
the line is never sampled by the vgic and lowering the line from the
timer to the vgic doesn't have any effect on the pending state of
virtual interrupts in the vgic.  This means that we do not support a
guest with the otherwise valid behavior of (1) disable interrupts (2)
enable the timer (3) disable the timer (4) enable interrupts.  Such a
guest would validly not expect to see any interrupts on real hardware,
but will see interrupts on KVM.

This patch fixes this shortcoming through the following series of
changes.

First, we change the flow of the timer/vgic sync/flush operations.  Now
the timer is always flushed/synced before the vgic, because the vgic
samples the state of the timer output.  This has the implication that we
move the timer operations in to non-preempible sections, but that is
fine after the previous commit getting rid of hrtimer schedules on every
entry/exit.

Second, we change the internal behavior of the timer, letting the timer
keep track of its previous output state, and only lower/raise the line
to the vgic when the state changes.  Note that in theory this could have
been accomplished more simply by signalling the vgic every time the
state *potentially* changed, but we don't want to be hitting the vgic
more often than necessary.

Third, we get rid of the use of the map->active field in the vgic and
instead simply set the interrupt as active on the physical distributor
whenever the input to the GIC is asserted and conversely clear the
physical active state when the input to the GIC is deasserted.

Fourth, and finally, we now initialize the timer PPIs (and all the other
unused PPIs for now), to be level-triggered, and modify the sync code to
sample the line state on HW sync and re-inject a new interrupt if it is
still pending at that time.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>

Showing 5 changed files with 91 additions and 108 deletions Side-by-side Diff

... ... @@ -561,9 +561,9 @@
561 561  
562 562 if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
563 563 local_irq_enable();
  564 + kvm_timer_sync_hwstate(vcpu);
564 565 kvm_vgic_sync_hwstate(vcpu);
565 566 preempt_enable();
566   - kvm_timer_sync_hwstate(vcpu);
567 567 continue;
568 568 }
569 569  
570 570  
... ... @@ -608,11 +608,16 @@
608 608 kvm_guest_exit();
609 609 trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
610 610  
  611 + /*
  612 + * We must sync the timer state before the vgic state so that
  613 + * the vgic can properly sample the updated state of the
  614 + * interrupt line.
  615 + */
  616 + kvm_timer_sync_hwstate(vcpu);
  617 +
611 618 kvm_vgic_sync_hwstate(vcpu);
612 619  
613 620 preempt_enable();
614   -
615   - kvm_timer_sync_hwstate(vcpu);
616 621  
617 622 ret = handle_exit(vcpu, run, ret);
618 623 }
include/kvm/arm_arch_timer.h
... ... @@ -51,7 +51,7 @@
51 51 bool armed;
52 52  
53 53 /* Timer IRQ */
54   - const struct kvm_irq_level *irq;
  54 + struct kvm_irq_level irq;
55 55  
56 56 /* VGIC mapping */
57 57 struct irq_phys_map *map;
include/kvm/arm_vgic.h
... ... @@ -159,7 +159,6 @@
159 159 u32 virt_irq;
160 160 u32 phys_irq;
161 161 u32 irq;
162   - bool active;
163 162 };
164 163  
165 164 struct irq_phys_map_entry {
... ... @@ -354,8 +353,6 @@
354 353 struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
355 354 int virt_irq, int irq);
356 355 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
357   -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
358   -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
359 356  
360 357 #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
361 358 #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
virt/kvm/arm/arch_timer.c
... ... @@ -59,18 +59,6 @@
59 59 }
60 60 }
61 61  
62   -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
63   -{
64   - int ret;
65   - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
66   -
67   - kvm_vgic_set_phys_irq_active(timer->map, true);
68   - ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
69   - timer->map,
70   - timer->irq->level);
71   - WARN_ON(ret);
72   -}
73   -
74 62 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
75 63 {
76 64 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
... ... @@ -116,8 +104,7 @@
116 104 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
117 105  
118 106 return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
119   - (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) &&
120   - !kvm_vgic_get_phys_irq_active(timer->map);
  107 + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
121 108 }
122 109  
123 110 bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
124 111  
... ... @@ -134,7 +121,42 @@
134 121 return cval <= now;
135 122 }
136 123  
  124 +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
  125 +{
  126 + int ret;
  127 + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
  128 +
  129 + BUG_ON(!vgic_initialized(vcpu->kvm));
  130 +
  131 + timer->irq.level = new_level;
  132 + ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
  133 + timer->map,
  134 + timer->irq.level);
  135 + WARN_ON(ret);
  136 +}
  137 +
137 138 /*
  139 + * Check if there was a change in the timer state (should we raise or lower
  140 + * the line level to the GIC).
  141 + */
  142 +static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
  143 +{
  144 + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
  145 +
  146 + /*
  147 + * If userspace modified the timer registers via SET_ONE_REG before
  148 + * the vgic was initialized, we mustn't set the timer->irq.level value
  149 + * because the guest would never see the interrupt. Instead wait
  150 + * until we call this function from kvm_timer_flush_hwstate.
  151 + */
  152 + if (!vgic_initialized(vcpu->kvm))
  153 + return;
  154 +
  155 + if (kvm_timer_should_fire(vcpu) != timer->irq.level)
  156 + kvm_timer_update_irq(vcpu, !timer->irq.level);
  157 +}
  158 +
  159 +/*
138 160 * Schedule the background timer before calling kvm_vcpu_block, so that this
139 161 * thread is removed from its waitqueue and made runnable when there's a timer
140 162 * interrupt to handle.
141 163  
142 164  
... ... @@ -192,17 +214,20 @@
192 214 bool phys_active;
193 215 int ret;
194 216  
195   - if (kvm_timer_should_fire(vcpu))
196   - kvm_timer_inject_irq(vcpu);
  217 + kvm_timer_update_state(vcpu);
197 218  
198 219 /*
199   - * We keep track of whether the edge-triggered interrupt has been
200   - * signalled to the vgic/guest, and if so, we mask the interrupt and
201   - * the physical distributor to prevent the timer from raising a
202   - * physical interrupt whenever we run a guest, preventing forward
203   - * VCPU progress.
  220 + * If we enter the guest with the virtual input level to the VGIC
  221 + * asserted, then we have already told the VGIC what we need to, and
  222 + * we don't need to exit from the guest until the guest deactivates
  223 + * the already injected interrupt, so therefore we should set the
  224 + * hardware active state to prevent unnecessary exits from the guest.
  225 + *
  226 + * Conversely, if the virtual input level is deasserted, then always
  227 + * clear the hardware active state to ensure that hardware interrupts
  228 + * from the timer triggers a guest exit.
204 229 */
205   - if (kvm_vgic_get_phys_irq_active(timer->map))
  230 + if (timer->irq.level)
206 231 phys_active = true;
207 232 else
208 233 phys_active = false;
... ... @@ -226,8 +251,11 @@
226 251  
227 252 BUG_ON(timer_is_armed(timer));
228 253  
229   - if (kvm_timer_should_fire(vcpu))
230   - kvm_timer_inject_irq(vcpu);
  254 + /*
  255 + * The guest could have modified the timer registers or the timer
  256 + * could have expired, update the timer state.
  257 + */
  258 + kvm_timer_update_state(vcpu);
231 259 }
232 260  
233 261 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
... ... @@ -242,7 +270,7 @@
242 270 * kvm_vcpu_set_target(). To handle this, we determine
243 271 * vcpu timer irq number when the vcpu is reset.
244 272 */
245   - timer->irq = irq;
  273 + timer->irq.irq = irq->irq;
246 274  
247 275 /*
248 276 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
... ... @@ -251,6 +279,7 @@
251 279 * the ARMv7 architecture.
252 280 */
253 281 timer->cntv_ctl = 0;
  282 + kvm_timer_update_state(vcpu);
254 283  
255 284 /*
256 285 * Tell the VGIC that the virtual interrupt is tied to a
... ... @@ -295,6 +324,8 @@
295 324 default:
296 325 return -1;
297 326 }
  327 +
  328 + kvm_timer_update_state(vcpu);
298 329 return 0;
299 330 }
300 331  
... ... @@ -537,34 +537,6 @@
537 537 return false;
538 538 }
539 539  
540   -/*
541   - * If a mapped interrupt's state has been modified by the guest such that it
542   - * is no longer active or pending, without it have gone through the sync path,
543   - * then the map->active field must be cleared so the interrupt can be taken
544   - * again.
545   - */
546   -static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu)
547   -{
548   - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
549   - struct list_head *root;
550   - struct irq_phys_map_entry *entry;
551   - struct irq_phys_map *map;
552   -
553   - rcu_read_lock();
554   -
555   - /* Check for PPIs */
556   - root = &vgic_cpu->irq_phys_map_list;
557   - list_for_each_entry_rcu(entry, root, entry) {
558   - map = &entry->map;
559   -
560   - if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) &&
561   - !vgic_irq_is_active(vcpu, map->virt_irq))
562   - map->active = false;
563   - }
564   -
565   - rcu_read_unlock();
566   -}
567   -
568 540 bool vgic_handle_clear_pending_reg(struct kvm *kvm,
569 541 struct kvm_exit_mmio *mmio,
570 542 phys_addr_t offset, int vcpu_id)
... ... @@ -595,7 +567,6 @@
595 567 vcpu_id, offset);
596 568 vgic_reg_access(mmio, reg, offset, mode);
597 569  
598   - vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
599 570 vgic_update_state(kvm);
600 571 return true;
601 572 }
... ... @@ -633,7 +604,6 @@
633 604 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
634 605  
635 606 if (mmio->is_write) {
636   - vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
637 607 vgic_update_state(kvm);
638 608 return true;
639 609 }
640 610  
641 611  
642 612  
643 613  
644 614  
645 615  
646 616  
... ... @@ -1443,29 +1413,37 @@
1443 1413 /*
1444 1414 * Save the physical active state, and reset it to inactive.
1445 1415 *
1446   - * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
  1416 + * Return true if there's a pending level triggered interrupt line to queue.
1447 1417 */
1448   -static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
  1418 +static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
1449 1419 {
  1420 + struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1450 1421 struct irq_phys_map *map;
  1422 + bool phys_active;
  1423 + bool level_pending;
1451 1424 int ret;
1452 1425  
1453 1426 if (!(vlr.state & LR_HW))
1454   - return 0;
  1427 + return false;
1455 1428  
1456 1429 map = vgic_irq_map_search(vcpu, vlr.irq);
1457 1430 BUG_ON(!map);
1458 1431  
1459 1432 ret = irq_get_irqchip_state(map->irq,
1460 1433 IRQCHIP_STATE_ACTIVE,
1461   - &map->active);
  1434 + &phys_active);
1462 1435  
1463 1436 WARN_ON(ret);
1464 1437  
1465   - if (map->active)
  1438 + if (phys_active)
1466 1439 return 0;
1467 1440  
1468   - return 1;
  1441 + /* Mapped edge-triggered interrupts not yet supported. */
  1442 + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
  1443 + spin_lock(&dist->lock);
  1444 + level_pending = process_level_irq(vcpu, lr, vlr);
  1445 + spin_unlock(&dist->lock);
  1446 + return level_pending;
1469 1447 }
1470 1448  
1471 1449 /* Sync back the VGIC state after a guest run */
... ... @@ -1490,18 +1468,8 @@
1490 1468 continue;
1491 1469  
1492 1470 vlr = vgic_get_lr(vcpu, lr);
1493   - if (vgic_sync_hwirq(vcpu, vlr)) {
1494   - /*
1495   - * So this is a HW interrupt that the guest
1496   - * EOI-ed. Clean the LR state and allow the
1497   - * interrupt to be sampled again.
1498   - */
1499   - vlr.state = 0;
1500   - vlr.hwirq = 0;
1501   - vgic_set_lr(vcpu, lr, vlr);
1502   - vgic_irq_clear_queued(vcpu, vlr.irq);
1503   - set_bit(lr, elrsr_ptr);
1504   - }
  1471 + if (vgic_sync_hwirq(vcpu, lr, vlr))
  1472 + level_pending = true;
1505 1473  
1506 1474 if (!test_bit(lr, elrsr_ptr))
1507 1475 continue;
... ... @@ -1881,30 +1849,6 @@
1881 1849 }
1882 1850  
1883 1851 /**
1884   - * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
1885   - *
1886   - * Return the logical active state of a mapped interrupt. This doesn't
1887   - * necessarily reflects the current HW state.
1888   - */
1889   -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
1890   -{
1891   - BUG_ON(!map);
1892   - return map->active;
1893   -}
1894   -
1895   -/**
1896   - * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
1897   - *
1898   - * Set the logical active state of a mapped interrupt. This doesn't
1899   - * immediately affects the HW state.
1900   - */
1901   -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
1902   -{
1903   - BUG_ON(!map);
1904   - map->active = active;
1905   -}
1906   -
1907   -/**
1908 1852 * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
1909 1853 * @vcpu: The VCPU pointer
1910 1854 * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
1911 1855  
1912 1856  
1913 1857  
... ... @@ -2129,17 +2073,23 @@
2129 2073 }
2130 2074  
2131 2075 /*
2132   - * Enable all SGIs and configure all private IRQs as
2133   - * edge-triggered.
  2076 + * Enable and configure all SGIs to be edge-triggere and
  2077 + * configure all PPIs as level-triggered.
2134 2078 */
2135 2079 for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
2136   - if (i < VGIC_NR_SGIS)
  2080 + if (i < VGIC_NR_SGIS) {
  2081 + /* SGIs */
2137 2082 vgic_bitmap_set_irq_val(&dist->irq_enabled,
2138 2083 vcpu->vcpu_id, i, 1);
2139   - if (i < VGIC_NR_PRIVATE_IRQS)
2140 2084 vgic_bitmap_set_irq_val(&dist->irq_cfg,
2141 2085 vcpu->vcpu_id, i,
2142 2086 VGIC_CFG_EDGE);
  2087 + } else if (i < VGIC_NR_PRIVATE_IRQS) {
  2088 + /* PPIs */
  2089 + vgic_bitmap_set_irq_val(&dist->irq_cfg,
  2090 + vcpu->vcpu_id, i,
  2091 + VGIC_CFG_LEVEL);
  2092 + }
2143 2093 }
2144 2094  
2145 2095 vgic_enable(vcpu);