Commit 2f333bcb4edd8daef99dabe4e7df8277af73cff1

Authored by Marcelo Tosatti
Committed by Avi Kivity
1 parent 9f81128591

KVM: MMU: hypercall based pte updates and TLB flushes

Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.

Don't report the feature if two dimensional paging is enabled.

[avi:
 - one mmu_op hypercall instead of one per op
 - allow 64-bit gpa on hypercall
 - don't pass host errors (-ENOMEM) to guest]

[akpm: warning fix on i386]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Avi Kivity <avi@qumranet.com>

Showing 6 changed files with 190 additions and 3 deletions Side-by-side Diff

... ... @@ -28,6 +28,7 @@
28 28 #include <linux/module.h>
29 29 #include <linux/swap.h>
30 30 #include <linux/hugetlb.h>
  31 +#include <linux/compiler.h>
31 32  
32 33 #include <asm/page.h>
33 34 #include <asm/cmpxchg.h>
... ... @@ -40,7 +41,7 @@
40 41 * 2. while doing 1. it walks guest-physical to host-physical
41 42 * If the hardware supports that we don't need to do shadow paging.
42 43 */
43   -static bool tdp_enabled = false;
  44 +bool tdp_enabled = false;
44 45  
45 46 #undef MMU_DEBUG
46 47  
... ... @@ -167,6 +168,13 @@
167 168 #define ACC_USER_MASK PT_USER_MASK
168 169 #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
169 170  
  171 +struct kvm_pv_mmu_op_buffer {
  172 + void *ptr;
  173 + unsigned len;
  174 + unsigned processed;
  175 + char buf[512] __aligned(sizeof(long));
  176 +};
  177 +
170 178 struct kvm_rmap_desc {
171 179 u64 *shadow_ptes[RMAP_EXT];
172 180 struct kvm_rmap_desc *more;
... ... @@ -2001,6 +2009,132 @@
2001 2009 (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
2002 2010  
2003 2011 return nr_mmu_pages;
  2012 +}
  2013 +
  2014 +static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
  2015 + unsigned len)
  2016 +{
  2017 + if (len > buffer->len)
  2018 + return NULL;
  2019 + return buffer->ptr;
  2020 +}
  2021 +
  2022 +static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
  2023 + unsigned len)
  2024 +{
  2025 + void *ret;
  2026 +
  2027 + ret = pv_mmu_peek_buffer(buffer, len);
  2028 + if (!ret)
  2029 + return ret;
  2030 + buffer->ptr += len;
  2031 + buffer->len -= len;
  2032 + buffer->processed += len;
  2033 + return ret;
  2034 +}
  2035 +
  2036 +static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
  2037 + gpa_t addr, gpa_t value)
  2038 +{
  2039 + int bytes = 8;
  2040 + int r;
  2041 +
  2042 + if (!is_long_mode(vcpu) && !is_pae(vcpu))
  2043 + bytes = 4;
  2044 +
  2045 + r = mmu_topup_memory_caches(vcpu);
  2046 + if (r)
  2047 + return r;
  2048 +
  2049 + if (!__emulator_write_phys(vcpu, addr, &value, bytes))
  2050 + return -EFAULT;
  2051 +
  2052 + return 1;
  2053 +}
  2054 +
  2055 +static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
  2056 +{
  2057 + kvm_x86_ops->tlb_flush(vcpu);
  2058 + return 1;
  2059 +}
  2060 +
  2061 +static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
  2062 +{
  2063 + spin_lock(&vcpu->kvm->mmu_lock);
  2064 + mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
  2065 + spin_unlock(&vcpu->kvm->mmu_lock);
  2066 + return 1;
  2067 +}
  2068 +
  2069 +static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
  2070 + struct kvm_pv_mmu_op_buffer *buffer)
  2071 +{
  2072 + struct kvm_mmu_op_header *header;
  2073 +
  2074 + header = pv_mmu_peek_buffer(buffer, sizeof *header);
  2075 + if (!header)
  2076 + return 0;
  2077 + switch (header->op) {
  2078 + case KVM_MMU_OP_WRITE_PTE: {
  2079 + struct kvm_mmu_op_write_pte *wpte;
  2080 +
  2081 + wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
  2082 + if (!wpte)
  2083 + return 0;
  2084 + return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
  2085 + wpte->pte_val);
  2086 + }
  2087 + case KVM_MMU_OP_FLUSH_TLB: {
  2088 + struct kvm_mmu_op_flush_tlb *ftlb;
  2089 +
  2090 + ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
  2091 + if (!ftlb)
  2092 + return 0;
  2093 + return kvm_pv_mmu_flush_tlb(vcpu);
  2094 + }
  2095 + case KVM_MMU_OP_RELEASE_PT: {
  2096 + struct kvm_mmu_op_release_pt *rpt;
  2097 +
  2098 + rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
  2099 + if (!rpt)
  2100 + return 0;
  2101 + return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
  2102 + }
  2103 + default: return 0;
  2104 + }
  2105 +}
  2106 +
  2107 +int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
  2108 + gpa_t addr, unsigned long *ret)
  2109 +{
  2110 + int r;
  2111 + struct kvm_pv_mmu_op_buffer buffer;
  2112 +
  2113 + down_read(&vcpu->kvm->slots_lock);
  2114 + down_read(&current->mm->mmap_sem);
  2115 +
  2116 + buffer.ptr = buffer.buf;
  2117 + buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
  2118 + buffer.processed = 0;
  2119 +
  2120 + r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
  2121 + if (r)
  2122 + goto out;
  2123 +
  2124 + while (buffer.len) {
  2125 + r = kvm_pv_mmu_op_one(vcpu, &buffer);
  2126 + if (r < 0)
  2127 + goto out;
  2128 + if (r == 0)
  2129 + break;
  2130 + }
  2131 +
  2132 + r = 1;
  2133 +out:
  2134 + *ret = buffer.processed;
  2135 + up_read(&current->mm->mmap_sem);
  2136 + up_read(&vcpu->kvm->slots_lock);
  2137 + return r;
2004 2138 }
2005 2139  
2006 2140 #ifdef AUDIT
... ... @@ -832,6 +832,9 @@
832 832 case KVM_CAP_NR_MEMSLOTS:
833 833 r = KVM_MEMORY_SLOTS;
834 834 break;
  835 + case KVM_CAP_PV_MMU:
  836 + r = !tdp_enabled;
  837 + break;
835 838 default:
836 839 r = 0;
837 840 break;
838 841  
... ... @@ -2452,9 +2455,19 @@
2452 2455 }
2453 2456 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
2454 2457  
  2458 +static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
  2459 + unsigned long a1)
  2460 +{
  2461 + if (is_long_mode(vcpu))
  2462 + return a0;
  2463 + else
  2464 + return a0 | ((gpa_t)a1 << 32);
  2465 +}
  2466 +
2455 2467 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2456 2468 {
2457 2469 unsigned long nr, a0, a1, a2, a3, ret;
  2470 + int r = 1;
2458 2471  
2459 2472 kvm_x86_ops->cache_regs(vcpu);
2460 2473  
... ... @@ -2476,6 +2489,9 @@
2476 2489 case KVM_HC_VAPIC_POLL_IRQ:
2477 2490 ret = 0;
2478 2491 break;
  2492 + case KVM_HC_MMU_OP:
  2493 + r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
  2494 + break;
2479 2495 default:
2480 2496 ret = -KVM_ENOSYS;
2481 2497 break;
... ... @@ -2483,7 +2499,7 @@
2483 2499 vcpu->arch.regs[VCPU_REGS_RAX] = ret;
2484 2500 kvm_x86_ops->decache_regs(vcpu);
2485 2501 ++vcpu->stat.hypercalls;
2486   - return 0;
  2502 + return r;
2487 2503 }
2488 2504 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
2489 2505  
include/asm-x86/kvm_host.h
... ... @@ -434,6 +434,10 @@
434 434  
435 435 int __emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
436 436 const void *val, int bytes);
  437 +int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
  438 + gpa_t addr, unsigned long *ret);
  439 +
  440 +extern bool tdp_enabled;
437 441  
438 442 enum emulation_result {
439 443 EMULATE_DONE, /* no further processing */
include/asm-x86/kvm_para.h
... ... @@ -12,9 +12,38 @@
12 12 #define KVM_CPUID_FEATURES 0x40000001
13 13 #define KVM_FEATURE_CLOCKSOURCE 0
14 14 #define KVM_FEATURE_NOP_IO_DELAY 1
  15 +#define KVM_FEATURE_MMU_OP 2
15 16  
16 17 #define MSR_KVM_WALL_CLOCK 0x11
17 18 #define MSR_KVM_SYSTEM_TIME 0x12
  19 +
  20 +#define KVM_MAX_MMU_OP_BATCH 32
  21 +
  22 +/* Operations for KVM_HC_MMU_OP */
  23 +#define KVM_MMU_OP_WRITE_PTE 1
  24 +#define KVM_MMU_OP_FLUSH_TLB 2
  25 +#define KVM_MMU_OP_RELEASE_PT 3
  26 +
  27 +/* Payload for KVM_HC_MMU_OP */
  28 +struct kvm_mmu_op_header {
  29 + __u32 op;
  30 + __u32 pad;
  31 +};
  32 +
  33 +struct kvm_mmu_op_write_pte {
  34 + struct kvm_mmu_op_header header;
  35 + __u64 pte_phys;
  36 + __u64 pte_val;
  37 +};
  38 +
  39 +struct kvm_mmu_op_flush_tlb {
  40 + struct kvm_mmu_op_header header;
  41 +};
  42 +
  43 +struct kvm_mmu_op_release_pt {
  44 + struct kvm_mmu_op_header header;
  45 + __u64 pt_phys;
  46 +};
18 47  
19 48 #ifdef __KERNEL__
20 49 #include <asm/processor.h>
... ... @@ -238,6 +238,7 @@
238 238 #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
239 239 #define KVM_CAP_PIT 11
240 240 #define KVM_CAP_NOP_IO_DELAY 12
  241 +#define KVM_CAP_PV_MMU 13
241 242  
242 243 /*
243 244 * ioctls for VM fds
include/linux/kvm_para.h
... ... @@ -11,8 +11,11 @@
11 11  
12 12 /* Return values for hypercalls */
13 13 #define KVM_ENOSYS 1000
  14 +#define KVM_EFAULT EFAULT
  15 +#define KVM_E2BIG E2BIG
14 16  
15   -#define KVM_HC_VAPIC_POLL_IRQ 1
  17 +#define KVM_HC_VAPIC_POLL_IRQ 1
  18 +#define KVM_HC_MMU_OP 2
16 19  
17 20 /*
18 21 * hypercalls use architecture specific