Commit 3273cba1956437820ae25d98e3ae57d1c094205c

Authored by Linus Torvalds

Merge tag 'for-linus-4.4-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen bug fixes from David Vrabel:
 - XSA-155 security fixes to backend drivers.
 - XSA-157 security fixes to pciback.

* tag 'for-linus-4.4-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen-pciback: fix up cleanup path when alloc fails
  xen/pciback: Don't allow MSI-X ops if PCI_COMMAND_MEMORY is not set.
  xen/pciback: For XEN_PCI_OP_disable_msi[|x] only disable if device has MSI(X) enabled.
  xen/pciback: Do not install an IRQ handler for MSI interrupts.
  xen/pciback: Return error on XEN_PCI_OP_enable_msix when device has MSI or MSI-X enabled
  xen/pciback: Return error on XEN_PCI_OP_enable_msi when device has MSI or MSI-X enabled
  xen/pciback: Save xen_pci_op commands before processing it
  xen-scsiback: safely copy requests
  xen-blkback: read from indirect descriptors only once
  xen-blkback: only read request operation from shared ring once
  xen-netback: use RING_COPY_REQUEST() throughout
  xen-netback: don't use last request to determine minimum Tx credit
  xen: Add RING_COPY_REQUEST()
  xen/x86/pvh: Use HVM's flush_tlb_others op
  xen: Resume PMU from non-atomic context
  xen/events/fifo: Consume unprocessed events when a CPU dies

Showing 11 changed files Side-by-side Diff

... ... @@ -2495,14 +2495,9 @@
2495 2495 {
2496 2496 x86_init.paging.pagetable_init = xen_pagetable_init;
2497 2497  
2498   - /* Optimization - we can use the HVM one but it has no idea which
2499   - * VCPUs are descheduled - which means that it will needlessly IPI
2500   - * them. Xen knows so let it do the job.
2501   - */
2502   - if (xen_feature(XENFEAT_auto_translated_physmap)) {
2503   - pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
  2498 + if (xen_feature(XENFEAT_auto_translated_physmap))
2504 2499 return;
2505   - }
  2500 +
2506 2501 pv_mmu_ops = xen_mmu_ops;
2507 2502  
2508 2503 memset(dummy_mapping, 0xff, PAGE_SIZE);
arch/x86/xen/suspend.c
... ... @@ -68,26 +68,16 @@
68 68  
69 69 void xen_arch_pre_suspend(void)
70 70 {
71   - int cpu;
72   -
73   - for_each_online_cpu(cpu)
74   - xen_pmu_finish(cpu);
75   -
76 71 if (xen_pv_domain())
77 72 xen_pv_pre_suspend();
78 73 }
79 74  
80 75 void xen_arch_post_suspend(int cancelled)
81 76 {
82   - int cpu;
83   -
84 77 if (xen_pv_domain())
85 78 xen_pv_post_suspend(cancelled);
86 79 else
87 80 xen_hvm_post_suspend(cancelled);
88   -
89   - for_each_online_cpu(cpu)
90   - xen_pmu_init(cpu);
91 81 }
92 82  
93 83 static void xen_vcpu_notify_restore(void *data)
94 84  
95 85  
... ... @@ -106,11 +96,21 @@
106 96  
107 97 void xen_arch_resume(void)
108 98 {
  99 + int cpu;
  100 +
109 101 on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
  102 +
  103 + for_each_online_cpu(cpu)
  104 + xen_pmu_init(cpu);
110 105 }
111 106  
112 107 void xen_arch_suspend(void)
113 108 {
  109 + int cpu;
  110 +
  111 + for_each_online_cpu(cpu)
  112 + xen_pmu_finish(cpu);
  113 +
114 114 on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
115 115 }
drivers/block/xen-blkback/blkback.c
... ... @@ -950,6 +950,8 @@
950 950 goto unmap;
951 951  
952 952 for (n = 0, i = 0; n < nseg; n++) {
  953 + uint8_t first_sect, last_sect;
  954 +
953 955 if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
954 956 /* Map indirect segments */
955 957 if (segments)
956 958  
957 959  
... ... @@ -957,15 +959,18 @@
957 959 segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
958 960 }
959 961 i = n % SEGS_PER_INDIRECT_FRAME;
  962 +
960 963 pending_req->segments[n]->gref = segments[i].gref;
961   - seg[n].nsec = segments[i].last_sect -
962   - segments[i].first_sect + 1;
963   - seg[n].offset = (segments[i].first_sect << 9);
964   - if ((segments[i].last_sect >= (XEN_PAGE_SIZE >> 9)) ||
965   - (segments[i].last_sect < segments[i].first_sect)) {
  964 +
  965 + first_sect = READ_ONCE(segments[i].first_sect);
  966 + last_sect = READ_ONCE(segments[i].last_sect);
  967 + if (last_sect >= (XEN_PAGE_SIZE >> 9) || last_sect < first_sect) {
966 968 rc = -EINVAL;
967 969 goto unmap;
968 970 }
  971 +
  972 + seg[n].nsec = last_sect - first_sect + 1;
  973 + seg[n].offset = first_sect << 9;
969 974 preq->nr_sects += seg[n].nsec;
970 975 }
971 976  
drivers/block/xen-blkback/common.h
... ... @@ -408,8 +408,8 @@
408 408 struct blkif_x86_32_request *src)
409 409 {
410 410 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
411   - dst->operation = src->operation;
412   - switch (src->operation) {
  411 + dst->operation = READ_ONCE(src->operation);
  412 + switch (dst->operation) {
413 413 case BLKIF_OP_READ:
414 414 case BLKIF_OP_WRITE:
415 415 case BLKIF_OP_WRITE_BARRIER:
... ... @@ -456,8 +456,8 @@
456 456 struct blkif_x86_64_request *src)
457 457 {
458 458 int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
459   - dst->operation = src->operation;
460   - switch (src->operation) {
  459 + dst->operation = READ_ONCE(src->operation);
  460 + switch (dst->operation) {
461 461 case BLKIF_OP_READ:
462 462 case BLKIF_OP_WRITE:
463 463 case BLKIF_OP_WRITE_BARRIER:
drivers/net/xen-netback/netback.c
... ... @@ -258,18 +258,18 @@
258 258 struct netrx_pending_operations *npo)
259 259 {
260 260 struct xenvif_rx_meta *meta;
261   - struct xen_netif_rx_request *req;
  261 + struct xen_netif_rx_request req;
262 262  
263   - req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
  263 + RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
264 264  
265 265 meta = npo->meta + npo->meta_prod++;
266 266 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
267 267 meta->gso_size = 0;
268 268 meta->size = 0;
269   - meta->id = req->id;
  269 + meta->id = req.id;
270 270  
271 271 npo->copy_off = 0;
272   - npo->copy_gref = req->gref;
  272 + npo->copy_gref = req.gref;
273 273  
274 274 return meta;
275 275 }
... ... @@ -424,7 +424,7 @@
424 424 struct xenvif *vif = netdev_priv(skb->dev);
425 425 int nr_frags = skb_shinfo(skb)->nr_frags;
426 426 int i;
427   - struct xen_netif_rx_request *req;
  427 + struct xen_netif_rx_request req;
428 428 struct xenvif_rx_meta *meta;
429 429 unsigned char *data;
430 430 int head = 1;
431 431  
432 432  
... ... @@ -443,15 +443,15 @@
443 443  
444 444 /* Set up a GSO prefix descriptor, if necessary */
445 445 if ((1 << gso_type) & vif->gso_prefix_mask) {
446   - req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
  446 + RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
447 447 meta = npo->meta + npo->meta_prod++;
448 448 meta->gso_type = gso_type;
449 449 meta->gso_size = skb_shinfo(skb)->gso_size;
450 450 meta->size = 0;
451   - meta->id = req->id;
  451 + meta->id = req.id;
452 452 }
453 453  
454   - req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons++);
  454 + RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
455 455 meta = npo->meta + npo->meta_prod++;
456 456  
457 457 if ((1 << gso_type) & vif->gso_mask) {
458 458  
... ... @@ -463,9 +463,9 @@
463 463 }
464 464  
465 465 meta->size = 0;
466   - meta->id = req->id;
  466 + meta->id = req.id;
467 467 npo->copy_off = 0;
468   - npo->copy_gref = req->gref;
  468 + npo->copy_gref = req.gref;
469 469  
470 470 data = skb->data;
471 471 while (data < skb_tail_pointer(skb)) {
... ... @@ -679,9 +679,7 @@
679 679 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
680 680 * Otherwise the interface can seize up due to insufficient credit.
681 681 */
682   - max_burst = RING_GET_REQUEST(&queue->tx, queue->tx.req_cons)->size;
683   - max_burst = min(max_burst, 131072UL);
684   - max_burst = max(max_burst, queue->credit_bytes);
  682 + max_burst = max(131072UL, queue->credit_bytes);
685 683  
686 684 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
687 685 max_credit = queue->remaining_credit + queue->credit_bytes;
... ... @@ -711,7 +709,7 @@
711 709 spin_unlock_irqrestore(&queue->response_lock, flags);
712 710 if (cons == end)
713 711 break;
714   - txp = RING_GET_REQUEST(&queue->tx, cons++);
  712 + RING_COPY_REQUEST(&queue->tx, cons++, txp);
715 713 } while (1);
716 714 queue->tx.req_cons = cons;
717 715 }
... ... @@ -778,8 +776,7 @@
778 776 if (drop_err)
779 777 txp = &dropped_tx;
780 778  
781   - memcpy(txp, RING_GET_REQUEST(&queue->tx, cons + slots),
782   - sizeof(*txp));
  779 + RING_COPY_REQUEST(&queue->tx, cons + slots, txp);
783 780  
784 781 /* If the guest submitted a frame >= 64 KiB then
785 782 * first->size overflowed and following slots will
... ... @@ -1112,8 +1109,7 @@
1112 1109 return -EBADR;
1113 1110 }
1114 1111  
1115   - memcpy(&extra, RING_GET_REQUEST(&queue->tx, cons),
1116   - sizeof(extra));
  1112 + RING_COPY_REQUEST(&queue->tx, cons, &extra);
1117 1113 if (unlikely(!extra.type ||
1118 1114 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1119 1115 queue->tx.req_cons = ++cons;
... ... @@ -1322,7 +1318,7 @@
1322 1318  
1323 1319 idx = queue->tx.req_cons;
1324 1320 rmb(); /* Ensure that we see the request before we copy it. */
1325   - memcpy(&txreq, RING_GET_REQUEST(&queue->tx, idx), sizeof(txreq));
  1321 + RING_COPY_REQUEST(&queue->tx, idx, &txreq);
1326 1322  
1327 1323 /* Credit-based scheduling. */
1328 1324 if (txreq.size > queue->remaining_credit &&
drivers/xen/events/events_fifo.c
... ... @@ -281,7 +281,8 @@
281 281  
282 282 static void consume_one_event(unsigned cpu,
283 283 struct evtchn_fifo_control_block *control_block,
284   - unsigned priority, unsigned long *ready)
  284 + unsigned priority, unsigned long *ready,
  285 + bool drop)
285 286 {
286 287 struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
287 288 uint32_t head;
288 289  
... ... @@ -313,13 +314,17 @@
313 314 if (head == 0)
314 315 clear_bit(priority, ready);
315 316  
316   - if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port))
317   - handle_irq_for_port(port);
  317 + if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
  318 + if (unlikely(drop))
  319 + pr_warn("Dropping pending event for port %u\n", port);
  320 + else
  321 + handle_irq_for_port(port);
  322 + }
318 323  
319 324 q->head[priority] = head;
320 325 }
321 326  
322   -static void evtchn_fifo_handle_events(unsigned cpu)
  327 +static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
323 328 {
324 329 struct evtchn_fifo_control_block *control_block;
325 330 unsigned long ready;
326 331  
... ... @@ -331,11 +336,16 @@
331 336  
332 337 while (ready) {
333 338 q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
334   - consume_one_event(cpu, control_block, q, &ready);
  339 + consume_one_event(cpu, control_block, q, &ready, drop);
335 340 ready |= xchg(&control_block->ready, 0);
336 341 }
337 342 }
338 343  
  344 +static void evtchn_fifo_handle_events(unsigned cpu)
  345 +{
  346 + __evtchn_fifo_handle_events(cpu, false);
  347 +}
  348 +
339 349 static void evtchn_fifo_resume(void)
340 350 {
341 351 unsigned cpu;
... ... @@ -419,6 +429,9 @@
419 429 case CPU_UP_PREPARE:
420 430 if (!per_cpu(cpu_control_block, cpu))
421 431 ret = evtchn_fifo_alloc_control_block(cpu);
  432 + break;
  433 + case CPU_DEAD:
  434 + __evtchn_fifo_handle_events(cpu, true);
422 435 break;
423 436 default:
424 437 break;
drivers/xen/xen-pciback/pciback.h
... ... @@ -37,6 +37,7 @@
37 37 struct xen_pci_sharedinfo *sh_info;
38 38 unsigned long flags;
39 39 struct work_struct op_work;
  40 + struct xen_pci_op op;
40 41 };
41 42  
42 43 struct xen_pcibk_dev_data {
drivers/xen/xen-pciback/pciback_ops.c
... ... @@ -70,6 +70,13 @@
70 70 enable ? "enable" : "disable");
71 71  
72 72 if (enable) {
  73 + /*
  74 + * The MSI or MSI-X should not have an IRQ handler. Otherwise
  75 + * if the guest terminates we BUG_ON in free_msi_irqs.
  76 + */
  77 + if (dev->msi_enabled || dev->msix_enabled)
  78 + goto out;
  79 +
73 80 rc = request_irq(dev_data->irq,
74 81 xen_pcibk_guest_interrupt, IRQF_SHARED,
75 82 dev_data->irq_name, dev);
... ... @@ -144,7 +151,12 @@
144 151 if (unlikely(verbose_request))
145 152 printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
146 153  
147   - status = pci_enable_msi(dev);
  154 + if (dev->msi_enabled)
  155 + status = -EALREADY;
  156 + else if (dev->msix_enabled)
  157 + status = -ENXIO;
  158 + else
  159 + status = pci_enable_msi(dev);
148 160  
149 161 if (status) {
150 162 pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n",
151 163  
152 164  
153 165  
... ... @@ -173,20 +185,23 @@
173 185 int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
174 186 struct pci_dev *dev, struct xen_pci_op *op)
175 187 {
176   - struct xen_pcibk_dev_data *dev_data;
177   -
178 188 if (unlikely(verbose_request))
179 189 printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
180 190 pci_name(dev));
181   - pci_disable_msi(dev);
182 191  
  192 + if (dev->msi_enabled) {
  193 + struct xen_pcibk_dev_data *dev_data;
  194 +
  195 + pci_disable_msi(dev);
  196 +
  197 + dev_data = pci_get_drvdata(dev);
  198 + if (dev_data)
  199 + dev_data->ack_intr = 1;
  200 + }
183 201 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
184 202 if (unlikely(verbose_request))
185 203 printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
186 204 op->value);
187   - dev_data = pci_get_drvdata(dev);
188   - if (dev_data)
189   - dev_data->ack_intr = 1;
190 205 return 0;
191 206 }
192 207  
193 208  
194 209  
... ... @@ -197,13 +212,26 @@
197 212 struct xen_pcibk_dev_data *dev_data;
198 213 int i, result;
199 214 struct msix_entry *entries;
  215 + u16 cmd;
200 216  
201 217 if (unlikely(verbose_request))
202 218 printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
203 219 pci_name(dev));
  220 +
204 221 if (op->value > SH_INFO_MAX_VEC)
205 222 return -EINVAL;
206 223  
  224 + if (dev->msix_enabled)
  225 + return -EALREADY;
  226 +
  227 + /*
  228 + * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able
  229 + * to access the BARs where the MSI-X entries reside.
  230 + */
  231 + pci_read_config_word(dev, PCI_COMMAND, &cmd);
  232 + if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY))
  233 + return -ENXIO;
  234 +
207 235 entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
208 236 if (entries == NULL)
209 237 return -ENOMEM;
210 238  
211 239  
212 240  
... ... @@ -245,23 +273,27 @@
245 273 int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
246 274 struct pci_dev *dev, struct xen_pci_op *op)
247 275 {
248   - struct xen_pcibk_dev_data *dev_data;
249 276 if (unlikely(verbose_request))
250 277 printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
251 278 pci_name(dev));
252   - pci_disable_msix(dev);
253 279  
  280 + if (dev->msix_enabled) {
  281 + struct xen_pcibk_dev_data *dev_data;
  282 +
  283 + pci_disable_msix(dev);
  284 +
  285 + dev_data = pci_get_drvdata(dev);
  286 + if (dev_data)
  287 + dev_data->ack_intr = 1;
  288 + }
254 289 /*
255 290 * SR-IOV devices (which don't have any legacy IRQ) have
256 291 * an undefined IRQ value of zero.
257 292 */
258 293 op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
259 294 if (unlikely(verbose_request))
260   - printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev),
261   - op->value);
262   - dev_data = pci_get_drvdata(dev);
263   - if (dev_data)
264   - dev_data->ack_intr = 1;
  295 + printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n",
  296 + pci_name(dev), op->value);
265 297 return 0;
266 298 }
267 299 #endif
268 300  
... ... @@ -298,9 +330,11 @@
298 330 container_of(data, struct xen_pcibk_device, op_work);
299 331 struct pci_dev *dev;
300 332 struct xen_pcibk_dev_data *dev_data = NULL;
301   - struct xen_pci_op *op = &pdev->sh_info->op;
  333 + struct xen_pci_op *op = &pdev->op;
302 334 int test_intx = 0;
303 335  
  336 + *op = pdev->sh_info->op;
  337 + barrier();
304 338 dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
305 339  
306 340 if (dev == NULL)
... ... @@ -342,6 +376,17 @@
342 376 if ((dev_data->enable_intx != test_intx))
343 377 xen_pcibk_control_isr(dev, 0 /* no reset */);
344 378 }
  379 + pdev->sh_info->op.err = op->err;
  380 + pdev->sh_info->op.value = op->value;
  381 +#ifdef CONFIG_PCI_MSI
  382 + if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) {
  383 + unsigned int i;
  384 +
  385 + for (i = 0; i < op->value; i++)
  386 + pdev->sh_info->op.msix_entries[i].vector =
  387 + op->msix_entries[i].vector;
  388 + }
  389 +#endif
345 390 /* Tell the driver domain that we're done. */
346 391 wmb();
347 392 clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
drivers/xen/xen-pciback/xenbus.c
... ... @@ -44,7 +44,6 @@
44 44 dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
45 45  
46 46 pdev->xdev = xdev;
47   - dev_set_drvdata(&xdev->dev, pdev);
48 47  
49 48 mutex_init(&pdev->dev_lock);
50 49  
... ... @@ -58,6 +57,9 @@
58 57 kfree(pdev);
59 58 pdev = NULL;
60 59 }
  60 +
  61 + dev_set_drvdata(&xdev->dev, pdev);
  62 +
61 63 out:
62 64 return pdev;
63 65 }
drivers/xen/xen-scsiback.c
... ... @@ -726,7 +726,7 @@
726 726 if (!pending_req)
727 727 return 1;
728 728  
729   - ring_req = *RING_GET_REQUEST(ring, rc);
  729 + RING_COPY_REQUEST(ring, rc, &ring_req);
730 730 ring->req_cons = ++rc;
731 731  
732 732 err = prepare_pending_reqs(info, &ring_req, pending_req);
include/xen/interface/io/ring.h
... ... @@ -181,6 +181,20 @@
181 181 #define RING_GET_REQUEST(_r, _idx) \
182 182 (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
183 183  
  184 +/*
  185 + * Get a local copy of a request.
  186 + *
  187 + * Use this in preference to RING_GET_REQUEST() so all processing is
  188 + * done on a local copy that cannot be modified by the other end.
  189 + *
  190 + * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this
  191 + * to be ineffective where _req is a struct which consists of only bitfields.
  192 + */
  193 +#define RING_COPY_REQUEST(_r, _idx, _req) do { \
  194 + /* Use volatile to force the copy into _req. */ \
  195 + *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \
  196 +} while (0)
  197 +
184 198 #define RING_GET_RESPONSE(_r, _idx) \
185 199 (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
186 200