Commit 1a577b72475d161b6677c05abe57301362023bb2
Committed by
Marcelo Tosatti
1 parent
d63d3e6217
Exists in
master
and in
20 other branches
KVM: fix race with level interrupts
When more than 1 source id is in use for the same GSI, we have the following race related to handling irq_states race: CPU 0 clears bit 0. CPU 0 read irq_state as 0. CPU 1 sets level to 1. CPU 1 calls kvm_ioapic_set_irq(1). CPU 0 calls kvm_ioapic_set_irq(0). Now ioapic thinks the level is 0 but irq_state is not 0. Fix by performing all irq_states bitmap handling under pic/ioapic lock. This also removes the need for atomics with irq_states handling. Reported-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Showing 5 changed files with 51 additions and 35 deletions Side-by-side Diff
arch/x86/include/asm/kvm_host.h
... | ... | @@ -816,7 +816,20 @@ |
816 | 816 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); |
817 | 817 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
818 | 818 | |
819 | -int kvm_pic_set_irq(void *opaque, int irq, int level); | |
819 | +static inline int __kvm_irq_line_state(unsigned long *irq_state, | |
820 | + int irq_source_id, int level) | |
821 | +{ | |
822 | + /* Logical OR for level trig interrupt */ | |
823 | + if (level) | |
824 | + __set_bit(irq_source_id, irq_state); | |
825 | + else | |
826 | + __clear_bit(irq_source_id, irq_state); | |
827 | + | |
828 | + return !!(*irq_state); | |
829 | +} | |
830 | + | |
831 | +int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); | |
832 | +void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); | |
820 | 833 | |
821 | 834 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
822 | 835 |
arch/x86/kvm/i8259.c
... | ... | @@ -188,14 +188,15 @@ |
188 | 188 | pic_unlock(s); |
189 | 189 | } |
190 | 190 | |
191 | -int kvm_pic_set_irq(void *opaque, int irq, int level) | |
191 | +int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level) | |
192 | 192 | { |
193 | - struct kvm_pic *s = opaque; | |
194 | 193 | int ret = -1; |
195 | 194 | |
196 | 195 | pic_lock(s); |
197 | 196 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
198 | - ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | |
197 | + int irq_level = __kvm_irq_line_state(&s->irq_states[irq], | |
198 | + irq_source_id, level); | |
199 | + ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); | |
199 | 200 | pic_update_irq(s); |
200 | 201 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
201 | 202 | s->pics[irq >> 3].imr, ret == 0); |
... | ... | @@ -203,6 +204,16 @@ |
203 | 204 | pic_unlock(s); |
204 | 205 | |
205 | 206 | return ret; |
207 | +} | |
208 | + | |
209 | +void kvm_pic_clear_all(struct kvm_pic *s, int irq_source_id) | |
210 | +{ | |
211 | + int i; | |
212 | + | |
213 | + pic_lock(s); | |
214 | + for (i = 0; i < PIC_NUM_PINS; i++) | |
215 | + __clear_bit(irq_source_id, &s->irq_states[i]); | |
216 | + pic_unlock(s); | |
206 | 217 | } |
207 | 218 | |
208 | 219 | /* |
virt/kvm/ioapic.c
... | ... | @@ -191,7 +191,8 @@ |
191 | 191 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); |
192 | 192 | } |
193 | 193 | |
194 | -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |
194 | +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | |
195 | + int level) | |
195 | 196 | { |
196 | 197 | u32 old_irr; |
197 | 198 | u32 mask = 1 << irq; |
198 | 199 | |
... | ... | @@ -201,9 +202,11 @@ |
201 | 202 | spin_lock(&ioapic->lock); |
202 | 203 | old_irr = ioapic->irr; |
203 | 204 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { |
205 | + int irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], | |
206 | + irq_source_id, level); | |
204 | 207 | entry = ioapic->redirtbl[irq]; |
205 | - level ^= entry.fields.polarity; | |
206 | - if (!level) | |
208 | + irq_level ^= entry.fields.polarity; | |
209 | + if (!irq_level) | |
207 | 210 | ioapic->irr &= ~mask; |
208 | 211 | else { |
209 | 212 | int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); |
... | ... | @@ -219,6 +222,16 @@ |
219 | 222 | spin_unlock(&ioapic->lock); |
220 | 223 | |
221 | 224 | return ret; |
225 | +} | |
226 | + | |
227 | +void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) | |
228 | +{ | |
229 | + int i; | |
230 | + | |
231 | + spin_lock(&ioapic->lock); | |
232 | + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) | |
233 | + __clear_bit(irq_source_id, &ioapic->irq_states[i]); | |
234 | + spin_unlock(&ioapic->lock); | |
222 | 235 | } |
223 | 236 | |
224 | 237 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, |
virt/kvm/ioapic.h
... | ... | @@ -74,7 +74,9 @@ |
74 | 74 | bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); |
75 | 75 | int kvm_ioapic_init(struct kvm *kvm); |
76 | 76 | void kvm_ioapic_destroy(struct kvm *kvm); |
77 | -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); | |
77 | +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | |
78 | + int level); | |
79 | +void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); | |
78 | 80 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
79 | 81 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
80 | 82 | struct kvm_lapic_irq *irq); |
virt/kvm/irq_comm.c
... | ... | @@ -33,26 +33,12 @@ |
33 | 33 | |
34 | 34 | #include "ioapic.h" |
35 | 35 | |
36 | -static inline int kvm_irq_line_state(unsigned long *irq_state, | |
37 | - int irq_source_id, int level) | |
38 | -{ | |
39 | - /* Logical OR for level trig interrupt */ | |
40 | - if (level) | |
41 | - set_bit(irq_source_id, irq_state); | |
42 | - else | |
43 | - clear_bit(irq_source_id, irq_state); | |
44 | - | |
45 | - return !!(*irq_state); | |
46 | -} | |
47 | - | |
48 | 36 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, |
49 | 37 | struct kvm *kvm, int irq_source_id, int level) |
50 | 38 | { |
51 | 39 | #ifdef CONFIG_X86 |
52 | 40 | struct kvm_pic *pic = pic_irqchip(kvm); |
53 | - level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], | |
54 | - irq_source_id, level); | |
55 | - return kvm_pic_set_irq(pic, e->irqchip.pin, level); | |
41 | + return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); | |
56 | 42 | #else |
57 | 43 | return -1; |
58 | 44 | #endif |
... | ... | @@ -62,10 +48,7 @@ |
62 | 48 | struct kvm *kvm, int irq_source_id, int level) |
63 | 49 | { |
64 | 50 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
65 | - level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], | |
66 | - irq_source_id, level); | |
67 | - | |
68 | - return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); | |
51 | + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level); | |
69 | 52 | } |
70 | 53 | |
71 | 54 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) |
... | ... | @@ -249,8 +232,6 @@ |
249 | 232 | |
250 | 233 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) |
251 | 234 | { |
252 | - int i; | |
253 | - | |
254 | 235 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
255 | 236 | |
256 | 237 | mutex_lock(&kvm->irq_lock); |
257 | 238 | |
258 | 239 | |
... | ... | @@ -263,14 +244,10 @@ |
263 | 244 | if (!irqchip_in_kernel(kvm)) |
264 | 245 | goto unlock; |
265 | 246 | |
266 | - for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { | |
267 | - clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); | |
268 | - if (i >= 16) | |
269 | - continue; | |
247 | + kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); | |
270 | 248 | #ifdef CONFIG_X86 |
271 | - clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); | |
249 | + kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); | |
272 | 250 | #endif |
273 | - } | |
274 | 251 | unlock: |
275 | 252 | mutex_unlock(&kvm->irq_lock); |
276 | 253 | } |