Commit 104452f052dfcf62dbf0c4110c9234a3285f59bf

Authored by Linus Torvalds

Merge git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Avi Kivity:
 "Fixing a scheduling-while-atomic bug in the ppc code, and a bug which
  allowed pci bridges to be assigned to guests."

* git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: PPC: Book3S HV: Drop locks around call to kvmppc_pin_guest_page
  KVM: Fix PCI header check on device assignment

Showing 2 changed files Side-by-side Diff

arch/powerpc/kvm/book3s_hv.c
... ... @@ -268,25 +268,46 @@
268 268 return err;
269 269 }
270 270  
271   -static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap)
  271 +static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
272 272 {
  273 + struct kvm *kvm = vcpu->kvm;
273 274 void *va;
274 275 unsigned long nb;
  276 + unsigned long gpa;
275 277  
276   - vpap->update_pending = 0;
277   - va = NULL;
278   - if (vpap->next_gpa) {
279   - va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
280   - if (nb < vpap->len) {
281   - /*
282   - * If it's now too short, it must be that userspace
283   - * has changed the mappings underlying guest memory,
284   - * so unregister the region.
285   - */
  278 + /*
  279 + * We need to pin the page pointed to by vpap->next_gpa,
  280 + * but we can't call kvmppc_pin_guest_page under the lock
  281 + * as it does get_user_pages() and down_read(). So we
  282 + * have to drop the lock, pin the page, then get the lock
  283 + * again and check that a new area didn't get registered
  284 + * in the meantime.
  285 + */
  286 + for (;;) {
  287 + gpa = vpap->next_gpa;
  288 + spin_unlock(&vcpu->arch.vpa_update_lock);
  289 + va = NULL;
  290 + nb = 0;
  291 + if (gpa)
  292 + va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
  293 + spin_lock(&vcpu->arch.vpa_update_lock);
  294 + if (gpa == vpap->next_gpa)
  295 + break;
  296 + /* sigh... unpin that one and try again */
  297 + if (va)
286 298 kvmppc_unpin_guest_page(kvm, va);
287   - va = NULL;
288   - }
289 299 }
  300 +
  301 + vpap->update_pending = 0;
  302 + if (va && nb < vpap->len) {
  303 + /*
  304 + * If it's now too short, it must be that userspace
  305 + * has changed the mappings underlying guest memory,
  306 + * so unregister the region.
  307 + */
  308 + kvmppc_unpin_guest_page(kvm, va);
  309 + va = NULL;
  310 + }
290 311 if (vpap->pinned_addr)
291 312 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
292 313 vpap->pinned_addr = va;
293 314  
294 315  
295 316  
... ... @@ -296,20 +317,18 @@
296 317  
297 318 static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
298 319 {
299   - struct kvm *kvm = vcpu->kvm;
300   -
301 320 spin_lock(&vcpu->arch.vpa_update_lock);
302 321 if (vcpu->arch.vpa.update_pending) {
303   - kvmppc_update_vpa(kvm, &vcpu->arch.vpa);
  322 + kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
304 323 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
305 324 }
306 325 if (vcpu->arch.dtl.update_pending) {
307   - kvmppc_update_vpa(kvm, &vcpu->arch.dtl);
  326 + kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
308 327 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
309 328 vcpu->arch.dtl_index = 0;
310 329 }
311 330 if (vcpu->arch.slb_shadow.update_pending)
312   - kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow);
  331 + kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
313 332 spin_unlock(&vcpu->arch.vpa_update_lock);
314 333 }
315 334  
316 335  
317 336  
318 337  
... ... @@ -800,14 +819,41 @@
800 819 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
801 820 long ret;
802 821 u64 now;
803   - int ptid, i;
  822 + int ptid, i, need_vpa_update;
804 823  
805 824 /* don't start if any threads have a signal pending */
806   - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
  825 + need_vpa_update = 0;
  826 + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
807 827 if (signal_pending(vcpu->arch.run_task))
808 828 return 0;
  829 + need_vpa_update |= vcpu->arch.vpa.update_pending |
  830 + vcpu->arch.slb_shadow.update_pending |
  831 + vcpu->arch.dtl.update_pending;
  832 + }
809 833  
810 834 /*
  835 + * Initialize *vc, in particular vc->vcore_state, so we can
  836 + * drop the vcore lock if necessary.
  837 + */
  838 + vc->n_woken = 0;
  839 + vc->nap_count = 0;
  840 + vc->entry_exit_count = 0;
  841 + vc->vcore_state = VCORE_RUNNING;
  842 + vc->in_guest = 0;
  843 + vc->napping_threads = 0;
  844 +
  845 + /*
  846 + * Updating any of the vpas requires calling kvmppc_pin_guest_page,
  847 + * which can't be called with any spinlocks held.
  848 + */
  849 + if (need_vpa_update) {
  850 + spin_unlock(&vc->lock);
  851 + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
  852 + kvmppc_update_vpas(vcpu);
  853 + spin_lock(&vc->lock);
  854 + }
  855 +
  856 + /*
811 857 * Make sure we are running on thread 0, and that
812 858 * secondary threads are offline.
813 859 * XXX we should also block attempts to bring any
814 860  
815 861  
816 862  
... ... @@ -838,20 +884,10 @@
838 884 if (vcpu->arch.ceded)
839 885 vcpu->arch.ptid = ptid++;
840 886  
841   - vc->n_woken = 0;
842   - vc->nap_count = 0;
843   - vc->entry_exit_count = 0;
844   - vc->vcore_state = VCORE_RUNNING;
845 887 vc->stolen_tb += mftb() - vc->preempt_tb;
846   - vc->in_guest = 0;
847 888 vc->pcpu = smp_processor_id();
848   - vc->napping_threads = 0;
849 889 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
850 890 kvmppc_start_thread(vcpu);
851   - if (vcpu->arch.vpa.update_pending ||
852   - vcpu->arch.slb_shadow.update_pending ||
853   - vcpu->arch.dtl.update_pending)
854   - kvmppc_update_vpas(vcpu);
855 891 kvmppc_create_dtl_entry(vcpu, vc);
856 892 }
857 893 /* Grab any remaining hw threads so they can't go into the kernel */
virt/kvm/assigned-dev.c
... ... @@ -635,7 +635,6 @@
635 635 int r = 0, idx;
636 636 struct kvm_assigned_dev_kernel *match;
637 637 struct pci_dev *dev;
638   - u8 header_type;
639 638  
640 639 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
641 640 return -EINVAL;
... ... @@ -668,8 +667,7 @@
668 667 }
669 668  
670 669 /* Don't allow bridges to be assigned */
671   - pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
672   - if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) {
  670 + if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
673 671 r = -EPERM;
674 672 goto out_put;
675 673 }