Commit 5f94c1741bdc7a336553122036e8a779e616ccbf

Authored by Laurent Vivier
Committed by Avi Kivity
1 parent 92760499d0

KVM: Add coalesced MMIO support (common part)

This patch adds all needed structures to coalesce MMIOs.
Until an architecture uses it, it is not compiled.

Coalesced MMIO introduces two ioctl() to define where are the MMIO zones that
can be coalesced:

- KVM_REGISTER_COALESCED_MMIO registers a coalesced MMIO zone.
  It requests one parameter (struct kvm_coalesced_mmio_zone) which defines
  a memory area where MMIOs can be coalesced until the next switch to
  user space. The maximum number of MMIO zones is KVM_COALESCED_MMIO_ZONE_MAX.

- KVM_UNREGISTER_COALESCED_MMIO cancels all registered zones inside
  the given bounds (bounds are also given by struct kvm_coalesced_mmio_zone).

The userspace client can check kernel coalesced MMIO availability by asking
ioctl(KVM_CHECK_EXTENSION) for the KVM_CAP_COALESCED_MMIO capability.
The ioctl() call to KVM_CAP_COALESCED_MMIO will return 0 if not supported,
or the page offset where will be stored the ring buffer.
The page offset depends on the architecture.

After an ioctl(KVM_RUN), the first page of the KVM memory mapped points to
a kvm_run structure. The offset given by KVM_CAP_COALESCED_MMIO is
an offset to the coalesced MMIO ring expressed in PAGE_SIZE relatively
to the address of the start of th kvm_run structure. The MMIO ring buffer
is defined by the structure kvm_coalesced_mmio_ring.

[akio: fix oops during guest shutdown]

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Akio Takebe <takebe_akio@jp.fujitsu.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>

Showing 5 changed files with 269 additions and 0 deletions Side-by-side Diff

... ... @@ -173,6 +173,30 @@
173 173 };
174 174 };
175 175  
  176 +/* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */
  177 +
  178 +struct kvm_coalesced_mmio_zone {
  179 + __u64 addr;
  180 + __u32 size;
  181 + __u32 pad;
  182 +};
  183 +
  184 +struct kvm_coalesced_mmio {
  185 + __u64 phys_addr;
  186 + __u32 len;
  187 + __u32 pad;
  188 + __u8 data[8];
  189 +};
  190 +
  191 +struct kvm_coalesced_mmio_ring {
  192 + __u32 first, last;
  193 + struct kvm_coalesced_mmio coalesced_mmio[0];
  194 +};
  195 +
  196 +#define KVM_COALESCED_MMIO_MAX \
  197 + ((PAGE_SIZE - sizeof(struct kvm_coalesced_mmio_ring)) / \
  198 + sizeof(struct kvm_coalesced_mmio))
  199 +
176 200 /* for KVM_TRANSLATE */
177 201 struct kvm_translation {
178 202 /* in */
... ... @@ -346,6 +370,7 @@
346 370 #define KVM_CAP_NOP_IO_DELAY 12
347 371 #define KVM_CAP_PV_MMU 13
348 372 #define KVM_CAP_MP_STATE 14
  373 +#define KVM_CAP_COALESCED_MMIO 15
349 374  
350 375 /*
351 376 * ioctls for VM fds
... ... @@ -371,6 +396,10 @@
371 396 #define KVM_CREATE_PIT _IO(KVMIO, 0x64)
372 397 #define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
373 398 #define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)
  399 +#define KVM_REGISTER_COALESCED_MMIO \
  400 + _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
  401 +#define KVM_UNREGISTER_COALESCED_MMIO \
  402 + _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
374 403  
375 404 /*
376 405 * ioctls for vcpu fds
include/linux/kvm_host.h
... ... @@ -117,6 +117,10 @@
117 117 struct kvm_vm_stat stat;
118 118 struct kvm_arch arch;
119 119 atomic_t users_count;
  120 +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  121 + struct kvm_coalesced_mmio_dev *coalesced_mmio_dev;
  122 + struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
  123 +#endif
120 124 };
121 125  
122 126 /* The guest did something we don't support. */
virt/kvm/coalesced_mmio.c
  1 +/*
  2 + * KVM coalesced MMIO
  3 + *
  4 + * Copyright (c) 2008 Bull S.A.S.
  5 + *
  6 + * Author: Laurent Vivier <Laurent.Vivier@bull.net>
  7 + *
  8 + */
  9 +
  10 +#include "iodev.h"
  11 +
  12 +#include <linux/kvm_host.h>
  13 +#include <linux/kvm.h>
  14 +
  15 +#include "coalesced_mmio.h"
  16 +
  17 +static int coalesced_mmio_in_range(struct kvm_io_device *this,
  18 + gpa_t addr, int len, int is_write)
  19 +{
  20 + struct kvm_coalesced_mmio_dev *dev =
  21 + (struct kvm_coalesced_mmio_dev*)this->private;
  22 + struct kvm_coalesced_mmio_zone *zone;
  23 + int next;
  24 + int i;
  25 +
  26 + if (!is_write)
  27 + return 0;
  28 +
  29 + /* kvm->lock is taken by the caller and must be not released before
  30 + * dev.read/write
  31 + */
  32 +
  33 + /* Are we able to batch it ? */
  34 +
  35 + /* last is the first free entry
  36 + * check if we don't meet the first used entry
  37 + * there is always one unused entry in the buffer
  38 + */
  39 +
  40 + next = (dev->kvm->coalesced_mmio_ring->last + 1) %
  41 + KVM_COALESCED_MMIO_MAX;
  42 + if (next == dev->kvm->coalesced_mmio_ring->first) {
  43 + /* full */
  44 + return 0;
  45 + }
  46 +
  47 + /* is it in a batchable area ? */
  48 +
  49 + for (i = 0; i < dev->nb_zones; i++) {
  50 + zone = &dev->zone[i];
  51 +
  52 + /* (addr,len) is fully included in
  53 + * (zone->addr, zone->size)
  54 + */
  55 +
  56 + if (zone->addr <= addr &&
  57 + addr + len <= zone->addr + zone->size)
  58 + return 1;
  59 + }
  60 + return 0;
  61 +}
  62 +
  63 +static void coalesced_mmio_write(struct kvm_io_device *this,
  64 + gpa_t addr, int len, const void *val)
  65 +{
  66 + struct kvm_coalesced_mmio_dev *dev =
  67 + (struct kvm_coalesced_mmio_dev*)this->private;
  68 + struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
  69 +
  70 + /* kvm->lock must be taken by caller before call to in_range()*/
  71 +
  72 + /* copy data in first free entry of the ring */
  73 +
  74 + ring->coalesced_mmio[ring->last].phys_addr = addr;
  75 + ring->coalesced_mmio[ring->last].len = len;
  76 + memcpy(ring->coalesced_mmio[ring->last].data, val, len);
  77 + smp_wmb();
  78 + ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
  79 +}
  80 +
  81 +static void coalesced_mmio_destructor(struct kvm_io_device *this)
  82 +{
  83 + kfree(this);
  84 +}
  85 +
  86 +int kvm_coalesced_mmio_init(struct kvm *kvm)
  87 +{
  88 + struct kvm_coalesced_mmio_dev *dev;
  89 +
  90 + dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
  91 + if (!dev)
  92 + return -ENOMEM;
  93 + dev->dev.write = coalesced_mmio_write;
  94 + dev->dev.in_range = coalesced_mmio_in_range;
  95 + dev->dev.destructor = coalesced_mmio_destructor;
  96 + dev->dev.private = dev;
  97 + dev->kvm = kvm;
  98 + kvm->coalesced_mmio_dev = dev;
  99 + kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev);
  100 +
  101 + return 0;
  102 +}
  103 +
  104 +int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
  105 + struct kvm_coalesced_mmio_zone *zone)
  106 +{
  107 + struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
  108 +
  109 + if (dev == NULL)
  110 + return -EINVAL;
  111 +
  112 + mutex_lock(&kvm->lock);
  113 + if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
  114 + mutex_unlock(&kvm->lock);
  115 + return -ENOBUFS;
  116 + }
  117 +
  118 + dev->zone[dev->nb_zones] = *zone;
  119 + dev->nb_zones++;
  120 +
  121 + mutex_unlock(&kvm->lock);
  122 + return 0;
  123 +}
  124 +
  125 +int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
  126 + struct kvm_coalesced_mmio_zone *zone)
  127 +{
  128 + int i;
  129 + struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
  130 + struct kvm_coalesced_mmio_zone *z;
  131 +
  132 + if (dev == NULL)
  133 + return -EINVAL;
  134 +
  135 + mutex_lock(&kvm->lock);
  136 +
  137 + i = dev->nb_zones;
  138 + while(i) {
  139 + z = &dev->zone[i - 1];
  140 +
  141 + /* unregister all zones
  142 + * included in (zone->addr, zone->size)
  143 + */
  144 +
  145 + if (zone->addr <= z->addr &&
  146 + z->addr + z->size <= zone->addr + zone->size) {
  147 + dev->nb_zones--;
  148 + *z = dev->zone[dev->nb_zones];
  149 + }
  150 + i--;
  151 + }
  152 +
  153 + mutex_unlock(&kvm->lock);
  154 +
  155 + return 0;
  156 +}
virt/kvm/coalesced_mmio.h
  1 +/*
  2 + * KVM coalesced MMIO
  3 + *
  4 + * Copyright (c) 2008 Bull S.A.S.
  5 + *
  6 + * Author: Laurent Vivier <Laurent.Vivier@bull.net>
  7 + *
  8 + */
  9 +
  10 +#define KVM_COALESCED_MMIO_ZONE_MAX 100
  11 +
  12 +struct kvm_coalesced_mmio_dev {
  13 + struct kvm_io_device dev;
  14 + struct kvm *kvm;
  15 + int nb_zones;
  16 + struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
  17 +};
  18 +
  19 +int kvm_coalesced_mmio_init(struct kvm *kvm);
  20 +int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
  21 + struct kvm_coalesced_mmio_zone *zone);
  22 +int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
  23 + struct kvm_coalesced_mmio_zone *zone);
... ... @@ -47,6 +47,10 @@
47 47 #include <asm/uaccess.h>
48 48 #include <asm/pgtable.h>
49 49  
  50 +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  51 +#include "coalesced_mmio.h"
  52 +#endif
  53 +
50 54 MODULE_AUTHOR("Qumranet");
51 55 MODULE_LICENSE("GPL");
52 56  
53 57  
... ... @@ -185,10 +189,23 @@
185 189 static struct kvm *kvm_create_vm(void)
186 190 {
187 191 struct kvm *kvm = kvm_arch_create_vm();
  192 +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  193 + struct page *page;
  194 +#endif
188 195  
189 196 if (IS_ERR(kvm))
190 197 goto out;
191 198  
  199 +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  200 + page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  201 + if (!page) {
  202 + kfree(kvm);
  203 + return ERR_PTR(-ENOMEM);
  204 + }
  205 + kvm->coalesced_mmio_ring =
  206 + (struct kvm_coalesced_mmio_ring *)page_address(page);
  207 +#endif
  208 +
192 209 kvm->mm = current->mm;
193 210 atomic_inc(&kvm->mm->mm_count);
194 211 spin_lock_init(&kvm->mmu_lock);
... ... @@ -200,6 +217,9 @@
200 217 spin_lock(&kvm_lock);
201 218 list_add(&kvm->vm_list, &vm_list);
202 219 spin_unlock(&kvm_lock);
  220 +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  221 + kvm_coalesced_mmio_init(kvm);
  222 +#endif
203 223 out:
204 224 return kvm;
205 225 }
... ... @@ -242,6 +262,10 @@
242 262 spin_unlock(&kvm_lock);
243 263 kvm_io_bus_destroy(&kvm->pio_bus);
244 264 kvm_io_bus_destroy(&kvm->mmio_bus);
  265 +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  266 + if (kvm->coalesced_mmio_ring != NULL)
  267 + free_page((unsigned long)kvm->coalesced_mmio_ring);
  268 +#endif
245 269 kvm_arch_destroy_vm(kvm);
246 270 mmdrop(mm);
247 271 }
... ... @@ -826,6 +850,10 @@
826 850 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
827 851 page = virt_to_page(vcpu->arch.pio_data);
828 852 #endif
  853 +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  854 + else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
  855 + page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
  856 +#endif
829 857 else
830 858 return VM_FAULT_SIGBUS;
831 859 get_page(page);
... ... @@ -1148,6 +1176,32 @@
1148 1176 goto out;
1149 1177 break;
1150 1178 }
  1179 +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  1180 + case KVM_REGISTER_COALESCED_MMIO: {
  1181 + struct kvm_coalesced_mmio_zone zone;
  1182 + r = -EFAULT;
  1183 + if (copy_from_user(&zone, argp, sizeof zone))
  1184 + goto out;
  1185 + r = -ENXIO;
  1186 + r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
  1187 + if (r)
  1188 + goto out;
  1189 + r = 0;
  1190 + break;
  1191 + }
  1192 + case KVM_UNREGISTER_COALESCED_MMIO: {
  1193 + struct kvm_coalesced_mmio_zone zone;
  1194 + r = -EFAULT;
  1195 + if (copy_from_user(&zone, argp, sizeof zone))
  1196 + goto out;
  1197 + r = -ENXIO;
  1198 + r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
  1199 + if (r)
  1200 + goto out;
  1201 + r = 0;
  1202 + break;
  1203 + }
  1204 +#endif
1151 1205 default:
1152 1206 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1153 1207 }
... ... @@ -1231,6 +1285,9 @@
1231 1285 r = PAGE_SIZE; /* struct kvm_run */
1232 1286 #ifdef CONFIG_X86
1233 1287 r += PAGE_SIZE; /* pio data page */
  1288 +#endif
  1289 +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
  1290 + r += PAGE_SIZE; /* coalesced mmio ring page */
1234 1291 #endif
1235 1292 break;
1236 1293 case KVM_TRACE_ENABLE: