Commit edba23e51578f7cb6781461568489fc1825db4ac
Committed by
Avi Kivity
1 parent
fa7bff8f8a
Exists in
master
and in
7 other branches
KVM: Return EFAULT from kvm ioctl when guest accesses bad area
Currently if guest access address that belongs to memory slot but is not backed up by page or page is read only KVM treats it like MMIO access. Remove that capability. It was never part of the interface and should not be relied upon. Signed-off-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Showing 3 changed files with 28 additions and 5 deletions Inline Diff
arch/x86/kvm/mmu.c
1 | /* | 1 | /* |
2 | * Kernel-based Virtual Machine driver for Linux | 2 | * Kernel-based Virtual Machine driver for Linux |
3 | * | 3 | * |
4 | * This module enables machines with Intel VT-x extensions to run virtual | 4 | * This module enables machines with Intel VT-x extensions to run virtual |
5 | * machines without emulation or binary translation. | 5 | * machines without emulation or binary translation. |
6 | * | 6 | * |
7 | * MMU support | 7 | * MMU support |
8 | * | 8 | * |
9 | * Copyright (C) 2006 Qumranet, Inc. | 9 | * Copyright (C) 2006 Qumranet, Inc. |
10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | 10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. |
11 | * | 11 | * |
12 | * Authors: | 12 | * Authors: |
13 | * Yaniv Kamay <yaniv@qumranet.com> | 13 | * Yaniv Kamay <yaniv@qumranet.com> |
14 | * Avi Kivity <avi@qumranet.com> | 14 | * Avi Kivity <avi@qumranet.com> |
15 | * | 15 | * |
16 | * This work is licensed under the terms of the GNU GPL, version 2. See | 16 | * This work is licensed under the terms of the GNU GPL, version 2. See |
17 | * the COPYING file in the top-level directory. | 17 | * the COPYING file in the top-level directory. |
18 | * | 18 | * |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include "mmu.h" | 21 | #include "mmu.h" |
22 | #include "x86.h" | 22 | #include "x86.h" |
23 | #include "kvm_cache_regs.h" | 23 | #include "kvm_cache_regs.h" |
24 | 24 | ||
25 | #include <linux/kvm_host.h> | 25 | #include <linux/kvm_host.h> |
26 | #include <linux/types.h> | 26 | #include <linux/types.h> |
27 | #include <linux/string.h> | 27 | #include <linux/string.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/swap.h> | 31 | #include <linux/swap.h> |
32 | #include <linux/hugetlb.h> | 32 | #include <linux/hugetlb.h> |
33 | #include <linux/compiler.h> | 33 | #include <linux/compiler.h> |
34 | #include <linux/srcu.h> | 34 | #include <linux/srcu.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/uaccess.h> | 36 | #include <linux/uaccess.h> |
37 | 37 | ||
38 | #include <asm/page.h> | 38 | #include <asm/page.h> |
39 | #include <asm/cmpxchg.h> | 39 | #include <asm/cmpxchg.h> |
40 | #include <asm/io.h> | 40 | #include <asm/io.h> |
41 | #include <asm/vmx.h> | 41 | #include <asm/vmx.h> |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * When setting this variable to true it enables Two-Dimensional-Paging | 44 | * When setting this variable to true it enables Two-Dimensional-Paging |
45 | * where the hardware walks 2 page tables: | 45 | * where the hardware walks 2 page tables: |
46 | * 1. the guest-virtual to guest-physical | 46 | * 1. the guest-virtual to guest-physical |
47 | * 2. while doing 1. it walks guest-physical to host-physical | 47 | * 2. while doing 1. it walks guest-physical to host-physical |
48 | * If the hardware supports that we don't need to do shadow paging. | 48 | * If the hardware supports that we don't need to do shadow paging. |
49 | */ | 49 | */ |
50 | bool tdp_enabled = false; | 50 | bool tdp_enabled = false; |
51 | 51 | ||
52 | #undef MMU_DEBUG | 52 | #undef MMU_DEBUG |
53 | 53 | ||
54 | #undef AUDIT | 54 | #undef AUDIT |
55 | 55 | ||
56 | #ifdef AUDIT | 56 | #ifdef AUDIT |
57 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg); | 57 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg); |
58 | #else | 58 | #else |
59 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} | 59 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} |
60 | #endif | 60 | #endif |
61 | 61 | ||
62 | #ifdef MMU_DEBUG | 62 | #ifdef MMU_DEBUG |
63 | 63 | ||
64 | #define pgprintk(x...) do { if (dbg) printk(x); } while (0) | 64 | #define pgprintk(x...) do { if (dbg) printk(x); } while (0) |
65 | #define rmap_printk(x...) do { if (dbg) printk(x); } while (0) | 65 | #define rmap_printk(x...) do { if (dbg) printk(x); } while (0) |
66 | 66 | ||
67 | #else | 67 | #else |
68 | 68 | ||
69 | #define pgprintk(x...) do { } while (0) | 69 | #define pgprintk(x...) do { } while (0) |
70 | #define rmap_printk(x...) do { } while (0) | 70 | #define rmap_printk(x...) do { } while (0) |
71 | 71 | ||
72 | #endif | 72 | #endif |
73 | 73 | ||
74 | #if defined(MMU_DEBUG) || defined(AUDIT) | 74 | #if defined(MMU_DEBUG) || defined(AUDIT) |
75 | static int dbg = 0; | 75 | static int dbg = 0; |
76 | module_param(dbg, bool, 0644); | 76 | module_param(dbg, bool, 0644); |
77 | #endif | 77 | #endif |
78 | 78 | ||
79 | static int oos_shadow = 1; | 79 | static int oos_shadow = 1; |
80 | module_param(oos_shadow, bool, 0644); | 80 | module_param(oos_shadow, bool, 0644); |
81 | 81 | ||
82 | #ifndef MMU_DEBUG | 82 | #ifndef MMU_DEBUG |
83 | #define ASSERT(x) do { } while (0) | 83 | #define ASSERT(x) do { } while (0) |
84 | #else | 84 | #else |
85 | #define ASSERT(x) \ | 85 | #define ASSERT(x) \ |
86 | if (!(x)) { \ | 86 | if (!(x)) { \ |
87 | printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ | 87 | printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ |
88 | __FILE__, __LINE__, #x); \ | 88 | __FILE__, __LINE__, #x); \ |
89 | } | 89 | } |
90 | #endif | 90 | #endif |
91 | 91 | ||
92 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 92 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
93 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 93 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
94 | 94 | ||
95 | #define PT64_LEVEL_BITS 9 | 95 | #define PT64_LEVEL_BITS 9 |
96 | 96 | ||
97 | #define PT64_LEVEL_SHIFT(level) \ | 97 | #define PT64_LEVEL_SHIFT(level) \ |
98 | (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS) | 98 | (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS) |
99 | 99 | ||
100 | #define PT64_LEVEL_MASK(level) \ | 100 | #define PT64_LEVEL_MASK(level) \ |
101 | (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level)) | 101 | (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level)) |
102 | 102 | ||
103 | #define PT64_INDEX(address, level)\ | 103 | #define PT64_INDEX(address, level)\ |
104 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) | 104 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) |
105 | 105 | ||
106 | 106 | ||
107 | #define PT32_LEVEL_BITS 10 | 107 | #define PT32_LEVEL_BITS 10 |
108 | 108 | ||
109 | #define PT32_LEVEL_SHIFT(level) \ | 109 | #define PT32_LEVEL_SHIFT(level) \ |
110 | (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS) | 110 | (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS) |
111 | 111 | ||
112 | #define PT32_LEVEL_MASK(level) \ | 112 | #define PT32_LEVEL_MASK(level) \ |
113 | (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level)) | 113 | (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level)) |
114 | #define PT32_LVL_OFFSET_MASK(level) \ | 114 | #define PT32_LVL_OFFSET_MASK(level) \ |
115 | (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 115 | (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
116 | * PT32_LEVEL_BITS))) - 1)) | 116 | * PT32_LEVEL_BITS))) - 1)) |
117 | 117 | ||
118 | #define PT32_INDEX(address, level)\ | 118 | #define PT32_INDEX(address, level)\ |
119 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) | 119 | (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) |
120 | 120 | ||
121 | 121 | ||
122 | #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) | 122 | #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) |
123 | #define PT64_DIR_BASE_ADDR_MASK \ | 123 | #define PT64_DIR_BASE_ADDR_MASK \ |
124 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) | 124 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) |
125 | #define PT64_LVL_ADDR_MASK(level) \ | 125 | #define PT64_LVL_ADDR_MASK(level) \ |
126 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 126 | (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
127 | * PT64_LEVEL_BITS))) - 1)) | 127 | * PT64_LEVEL_BITS))) - 1)) |
128 | #define PT64_LVL_OFFSET_MASK(level) \ | 128 | #define PT64_LVL_OFFSET_MASK(level) \ |
129 | (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 129 | (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
130 | * PT64_LEVEL_BITS))) - 1)) | 130 | * PT64_LEVEL_BITS))) - 1)) |
131 | 131 | ||
132 | #define PT32_BASE_ADDR_MASK PAGE_MASK | 132 | #define PT32_BASE_ADDR_MASK PAGE_MASK |
133 | #define PT32_DIR_BASE_ADDR_MASK \ | 133 | #define PT32_DIR_BASE_ADDR_MASK \ |
134 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1)) | 134 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1)) |
135 | #define PT32_LVL_ADDR_MASK(level) \ | 135 | #define PT32_LVL_ADDR_MASK(level) \ |
136 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 136 | (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
137 | * PT32_LEVEL_BITS))) - 1)) | 137 | * PT32_LEVEL_BITS))) - 1)) |
138 | 138 | ||
139 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 139 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ |
140 | | PT64_NX_MASK) | 140 | | PT64_NX_MASK) |
141 | 141 | ||
142 | #define RMAP_EXT 4 | 142 | #define RMAP_EXT 4 |
143 | 143 | ||
144 | #define ACC_EXEC_MASK 1 | 144 | #define ACC_EXEC_MASK 1 |
145 | #define ACC_WRITE_MASK PT_WRITABLE_MASK | 145 | #define ACC_WRITE_MASK PT_WRITABLE_MASK |
146 | #define ACC_USER_MASK PT_USER_MASK | 146 | #define ACC_USER_MASK PT_USER_MASK |
147 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 147 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
148 | 148 | ||
149 | #include <trace/events/kvm.h> | 149 | #include <trace/events/kvm.h> |
150 | 150 | ||
151 | #define CREATE_TRACE_POINTS | 151 | #define CREATE_TRACE_POINTS |
152 | #include "mmutrace.h" | 152 | #include "mmutrace.h" |
153 | 153 | ||
154 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | 154 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) |
155 | 155 | ||
156 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 156 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
157 | 157 | ||
158 | struct kvm_rmap_desc { | 158 | struct kvm_rmap_desc { |
159 | u64 *sptes[RMAP_EXT]; | 159 | u64 *sptes[RMAP_EXT]; |
160 | struct kvm_rmap_desc *more; | 160 | struct kvm_rmap_desc *more; |
161 | }; | 161 | }; |
162 | 162 | ||
163 | struct kvm_shadow_walk_iterator { | 163 | struct kvm_shadow_walk_iterator { |
164 | u64 addr; | 164 | u64 addr; |
165 | hpa_t shadow_addr; | 165 | hpa_t shadow_addr; |
166 | int level; | 166 | int level; |
167 | u64 *sptep; | 167 | u64 *sptep; |
168 | unsigned index; | 168 | unsigned index; |
169 | }; | 169 | }; |
170 | 170 | ||
171 | #define for_each_shadow_entry(_vcpu, _addr, _walker) \ | 171 | #define for_each_shadow_entry(_vcpu, _addr, _walker) \ |
172 | for (shadow_walk_init(&(_walker), _vcpu, _addr); \ | 172 | for (shadow_walk_init(&(_walker), _vcpu, _addr); \ |
173 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
174 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
175 | 175 | ||
176 | typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte); | 176 | typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte); |
177 | 177 | ||
178 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
179 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
180 | static struct kmem_cache *mmu_page_header_cache; | 180 | static struct kmem_cache *mmu_page_header_cache; |
181 | 181 | ||
182 | static u64 __read_mostly shadow_trap_nonpresent_pte; | 182 | static u64 __read_mostly shadow_trap_nonpresent_pte; |
183 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | 183 | static u64 __read_mostly shadow_notrap_nonpresent_pte; |
184 | static u64 __read_mostly shadow_base_present_pte; | 184 | static u64 __read_mostly shadow_base_present_pte; |
185 | static u64 __read_mostly shadow_nx_mask; | 185 | static u64 __read_mostly shadow_nx_mask; |
186 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | 186 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ |
187 | static u64 __read_mostly shadow_user_mask; | 187 | static u64 __read_mostly shadow_user_mask; |
188 | static u64 __read_mostly shadow_accessed_mask; | 188 | static u64 __read_mostly shadow_accessed_mask; |
189 | static u64 __read_mostly shadow_dirty_mask; | 189 | static u64 __read_mostly shadow_dirty_mask; |
190 | 190 | ||
191 | static inline u64 rsvd_bits(int s, int e) | 191 | static inline u64 rsvd_bits(int s, int e) |
192 | { | 192 | { |
193 | return ((1ULL << (e - s + 1)) - 1) << s; | 193 | return ((1ULL << (e - s + 1)) - 1) << s; |
194 | } | 194 | } |
195 | 195 | ||
196 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | 196 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) |
197 | { | 197 | { |
198 | shadow_trap_nonpresent_pte = trap_pte; | 198 | shadow_trap_nonpresent_pte = trap_pte; |
199 | shadow_notrap_nonpresent_pte = notrap_pte; | 199 | shadow_notrap_nonpresent_pte = notrap_pte; |
200 | } | 200 | } |
201 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | 201 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); |
202 | 202 | ||
203 | void kvm_mmu_set_base_ptes(u64 base_pte) | 203 | void kvm_mmu_set_base_ptes(u64 base_pte) |
204 | { | 204 | { |
205 | shadow_base_present_pte = base_pte; | 205 | shadow_base_present_pte = base_pte; |
206 | } | 206 | } |
207 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | 207 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); |
208 | 208 | ||
209 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 209 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
210 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | 210 | u64 dirty_mask, u64 nx_mask, u64 x_mask) |
211 | { | 211 | { |
212 | shadow_user_mask = user_mask; | 212 | shadow_user_mask = user_mask; |
213 | shadow_accessed_mask = accessed_mask; | 213 | shadow_accessed_mask = accessed_mask; |
214 | shadow_dirty_mask = dirty_mask; | 214 | shadow_dirty_mask = dirty_mask; |
215 | shadow_nx_mask = nx_mask; | 215 | shadow_nx_mask = nx_mask; |
216 | shadow_x_mask = x_mask; | 216 | shadow_x_mask = x_mask; |
217 | } | 217 | } |
218 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 218 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
219 | 219 | ||
220 | static bool is_write_protection(struct kvm_vcpu *vcpu) | 220 | static bool is_write_protection(struct kvm_vcpu *vcpu) |
221 | { | 221 | { |
222 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | 222 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
223 | } | 223 | } |
224 | 224 | ||
225 | static int is_cpuid_PSE36(void) | 225 | static int is_cpuid_PSE36(void) |
226 | { | 226 | { |
227 | return 1; | 227 | return 1; |
228 | } | 228 | } |
229 | 229 | ||
230 | static int is_nx(struct kvm_vcpu *vcpu) | 230 | static int is_nx(struct kvm_vcpu *vcpu) |
231 | { | 231 | { |
232 | return vcpu->arch.efer & EFER_NX; | 232 | return vcpu->arch.efer & EFER_NX; |
233 | } | 233 | } |
234 | 234 | ||
235 | static int is_shadow_present_pte(u64 pte) | 235 | static int is_shadow_present_pte(u64 pte) |
236 | { | 236 | { |
237 | return pte != shadow_trap_nonpresent_pte | 237 | return pte != shadow_trap_nonpresent_pte |
238 | && pte != shadow_notrap_nonpresent_pte; | 238 | && pte != shadow_notrap_nonpresent_pte; |
239 | } | 239 | } |
240 | 240 | ||
241 | static int is_large_pte(u64 pte) | 241 | static int is_large_pte(u64 pte) |
242 | { | 242 | { |
243 | return pte & PT_PAGE_SIZE_MASK; | 243 | return pte & PT_PAGE_SIZE_MASK; |
244 | } | 244 | } |
245 | 245 | ||
246 | static int is_writable_pte(unsigned long pte) | 246 | static int is_writable_pte(unsigned long pte) |
247 | { | 247 | { |
248 | return pte & PT_WRITABLE_MASK; | 248 | return pte & PT_WRITABLE_MASK; |
249 | } | 249 | } |
250 | 250 | ||
251 | static int is_dirty_gpte(unsigned long pte) | 251 | static int is_dirty_gpte(unsigned long pte) |
252 | { | 252 | { |
253 | return pte & PT_DIRTY_MASK; | 253 | return pte & PT_DIRTY_MASK; |
254 | } | 254 | } |
255 | 255 | ||
256 | static int is_rmap_spte(u64 pte) | 256 | static int is_rmap_spte(u64 pte) |
257 | { | 257 | { |
258 | return is_shadow_present_pte(pte); | 258 | return is_shadow_present_pte(pte); |
259 | } | 259 | } |
260 | 260 | ||
261 | static int is_last_spte(u64 pte, int level) | 261 | static int is_last_spte(u64 pte, int level) |
262 | { | 262 | { |
263 | if (level == PT_PAGE_TABLE_LEVEL) | 263 | if (level == PT_PAGE_TABLE_LEVEL) |
264 | return 1; | 264 | return 1; |
265 | if (is_large_pte(pte)) | 265 | if (is_large_pte(pte)) |
266 | return 1; | 266 | return 1; |
267 | return 0; | 267 | return 0; |
268 | } | 268 | } |
269 | 269 | ||
270 | static pfn_t spte_to_pfn(u64 pte) | 270 | static pfn_t spte_to_pfn(u64 pte) |
271 | { | 271 | { |
272 | return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 272 | return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
273 | } | 273 | } |
274 | 274 | ||
275 | static gfn_t pse36_gfn_delta(u32 gpte) | 275 | static gfn_t pse36_gfn_delta(u32 gpte) |
276 | { | 276 | { |
277 | int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT; | 277 | int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT; |
278 | 278 | ||
279 | return (gpte & PT32_DIR_PSE36_MASK) << shift; | 279 | return (gpte & PT32_DIR_PSE36_MASK) << shift; |
280 | } | 280 | } |
281 | 281 | ||
282 | static void __set_spte(u64 *sptep, u64 spte) | 282 | static void __set_spte(u64 *sptep, u64 spte) |
283 | { | 283 | { |
284 | #ifdef CONFIG_X86_64 | 284 | #ifdef CONFIG_X86_64 |
285 | set_64bit((unsigned long *)sptep, spte); | 285 | set_64bit((unsigned long *)sptep, spte); |
286 | #else | 286 | #else |
287 | set_64bit((unsigned long long *)sptep, spte); | 287 | set_64bit((unsigned long long *)sptep, spte); |
288 | #endif | 288 | #endif |
289 | } | 289 | } |
290 | 290 | ||
291 | static u64 __xchg_spte(u64 *sptep, u64 new_spte) | 291 | static u64 __xchg_spte(u64 *sptep, u64 new_spte) |
292 | { | 292 | { |
293 | #ifdef CONFIG_X86_64 | 293 | #ifdef CONFIG_X86_64 |
294 | return xchg(sptep, new_spte); | 294 | return xchg(sptep, new_spte); |
295 | #else | 295 | #else |
296 | u64 old_spte; | 296 | u64 old_spte; |
297 | 297 | ||
298 | do { | 298 | do { |
299 | old_spte = *sptep; | 299 | old_spte = *sptep; |
300 | } while (cmpxchg64(sptep, old_spte, new_spte) != old_spte); | 300 | } while (cmpxchg64(sptep, old_spte, new_spte) != old_spte); |
301 | 301 | ||
302 | return old_spte; | 302 | return old_spte; |
303 | #endif | 303 | #endif |
304 | } | 304 | } |
305 | 305 | ||
306 | static void update_spte(u64 *sptep, u64 new_spte) | 306 | static void update_spte(u64 *sptep, u64 new_spte) |
307 | { | 307 | { |
308 | u64 old_spte; | 308 | u64 old_spte; |
309 | 309 | ||
310 | if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask)) { | 310 | if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask)) { |
311 | __set_spte(sptep, new_spte); | 311 | __set_spte(sptep, new_spte); |
312 | } else { | 312 | } else { |
313 | old_spte = __xchg_spte(sptep, new_spte); | 313 | old_spte = __xchg_spte(sptep, new_spte); |
314 | if (old_spte & shadow_accessed_mask) | 314 | if (old_spte & shadow_accessed_mask) |
315 | mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); | 315 | mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); |
316 | } | 316 | } |
317 | } | 317 | } |
318 | 318 | ||
319 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 319 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
320 | struct kmem_cache *base_cache, int min) | 320 | struct kmem_cache *base_cache, int min) |
321 | { | 321 | { |
322 | void *obj; | 322 | void *obj; |
323 | 323 | ||
324 | if (cache->nobjs >= min) | 324 | if (cache->nobjs >= min) |
325 | return 0; | 325 | return 0; |
326 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 326 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
327 | obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); | 327 | obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); |
328 | if (!obj) | 328 | if (!obj) |
329 | return -ENOMEM; | 329 | return -ENOMEM; |
330 | cache->objects[cache->nobjs++] = obj; | 330 | cache->objects[cache->nobjs++] = obj; |
331 | } | 331 | } |
332 | return 0; | 332 | return 0; |
333 | } | 333 | } |
334 | 334 | ||
335 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, | 335 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, |
336 | struct kmem_cache *cache) | 336 | struct kmem_cache *cache) |
337 | { | 337 | { |
338 | while (mc->nobjs) | 338 | while (mc->nobjs) |
339 | kmem_cache_free(cache, mc->objects[--mc->nobjs]); | 339 | kmem_cache_free(cache, mc->objects[--mc->nobjs]); |
340 | } | 340 | } |
341 | 341 | ||
342 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | 342 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, |
343 | int min) | 343 | int min) |
344 | { | 344 | { |
345 | struct page *page; | 345 | struct page *page; |
346 | 346 | ||
347 | if (cache->nobjs >= min) | 347 | if (cache->nobjs >= min) |
348 | return 0; | 348 | return 0; |
349 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 349 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
350 | page = alloc_page(GFP_KERNEL); | 350 | page = alloc_page(GFP_KERNEL); |
351 | if (!page) | 351 | if (!page) |
352 | return -ENOMEM; | 352 | return -ENOMEM; |
353 | cache->objects[cache->nobjs++] = page_address(page); | 353 | cache->objects[cache->nobjs++] = page_address(page); |
354 | } | 354 | } |
355 | return 0; | 355 | return 0; |
356 | } | 356 | } |
357 | 357 | ||
358 | static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc) | 358 | static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc) |
359 | { | 359 | { |
360 | while (mc->nobjs) | 360 | while (mc->nobjs) |
361 | free_page((unsigned long)mc->objects[--mc->nobjs]); | 361 | free_page((unsigned long)mc->objects[--mc->nobjs]); |
362 | } | 362 | } |
363 | 363 | ||
364 | static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) | 364 | static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) |
365 | { | 365 | { |
366 | int r; | 366 | int r; |
367 | 367 | ||
368 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_chain_cache, | 368 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_chain_cache, |
369 | pte_chain_cache, 4); | 369 | pte_chain_cache, 4); |
370 | if (r) | 370 | if (r) |
371 | goto out; | 371 | goto out; |
372 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, | 372 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, |
373 | rmap_desc_cache, 4); | 373 | rmap_desc_cache, 4); |
374 | if (r) | 374 | if (r) |
375 | goto out; | 375 | goto out; |
376 | r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); | 376 | r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); |
377 | if (r) | 377 | if (r) |
378 | goto out; | 378 | goto out; |
379 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache, | 379 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_page_header_cache, |
380 | mmu_page_header_cache, 4); | 380 | mmu_page_header_cache, 4); |
381 | out: | 381 | out: |
382 | return r; | 382 | return r; |
383 | } | 383 | } |
384 | 384 | ||
385 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | 385 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) |
386 | { | 386 | { |
387 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache); | 387 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache); |
388 | mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache); | 388 | mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache); |
389 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); | 389 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); |
390 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, | 390 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, |
391 | mmu_page_header_cache); | 391 | mmu_page_header_cache); |
392 | } | 392 | } |
393 | 393 | ||
394 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | 394 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, |
395 | size_t size) | 395 | size_t size) |
396 | { | 396 | { |
397 | void *p; | 397 | void *p; |
398 | 398 | ||
399 | BUG_ON(!mc->nobjs); | 399 | BUG_ON(!mc->nobjs); |
400 | p = mc->objects[--mc->nobjs]; | 400 | p = mc->objects[--mc->nobjs]; |
401 | return p; | 401 | return p; |
402 | } | 402 | } |
403 | 403 | ||
404 | static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) | 404 | static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) |
405 | { | 405 | { |
406 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_chain_cache, | 406 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_chain_cache, |
407 | sizeof(struct kvm_pte_chain)); | 407 | sizeof(struct kvm_pte_chain)); |
408 | } | 408 | } |
409 | 409 | ||
410 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) | 410 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) |
411 | { | 411 | { |
412 | kmem_cache_free(pte_chain_cache, pc); | 412 | kmem_cache_free(pte_chain_cache, pc); |
413 | } | 413 | } |
414 | 414 | ||
415 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | 415 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) |
416 | { | 416 | { |
417 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_rmap_desc_cache, | 417 | return mmu_memory_cache_alloc(&vcpu->arch.mmu_rmap_desc_cache, |
418 | sizeof(struct kvm_rmap_desc)); | 418 | sizeof(struct kvm_rmap_desc)); |
419 | } | 419 | } |
420 | 420 | ||
421 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) | 421 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) |
422 | { | 422 | { |
423 | kmem_cache_free(rmap_desc_cache, rd); | 423 | kmem_cache_free(rmap_desc_cache, rd); |
424 | } | 424 | } |
425 | 425 | ||
426 | static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) | 426 | static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) |
427 | { | 427 | { |
428 | if (!sp->role.direct) | 428 | if (!sp->role.direct) |
429 | return sp->gfns[index]; | 429 | return sp->gfns[index]; |
430 | 430 | ||
431 | return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS)); | 431 | return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS)); |
432 | } | 432 | } |
433 | 433 | ||
434 | static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) | 434 | static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) |
435 | { | 435 | { |
436 | if (sp->role.direct) | 436 | if (sp->role.direct) |
437 | BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); | 437 | BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); |
438 | else | 438 | else |
439 | sp->gfns[index] = gfn; | 439 | sp->gfns[index] = gfn; |
440 | } | 440 | } |
441 | 441 | ||
442 | /* | 442 | /* |
443 | * Return the pointer to the largepage write count for a given | 443 | * Return the pointer to the largepage write count for a given |
444 | * gfn, handling slots that are not large page aligned. | 444 | * gfn, handling slots that are not large page aligned. |
445 | */ | 445 | */ |
446 | static int *slot_largepage_idx(gfn_t gfn, | 446 | static int *slot_largepage_idx(gfn_t gfn, |
447 | struct kvm_memory_slot *slot, | 447 | struct kvm_memory_slot *slot, |
448 | int level) | 448 | int level) |
449 | { | 449 | { |
450 | unsigned long idx; | 450 | unsigned long idx; |
451 | 451 | ||
452 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 452 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
453 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 453 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
454 | return &slot->lpage_info[level - 2][idx].write_count; | 454 | return &slot->lpage_info[level - 2][idx].write_count; |
455 | } | 455 | } |
456 | 456 | ||
457 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) | 457 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) |
458 | { | 458 | { |
459 | struct kvm_memory_slot *slot; | 459 | struct kvm_memory_slot *slot; |
460 | int *write_count; | 460 | int *write_count; |
461 | int i; | 461 | int i; |
462 | 462 | ||
463 | slot = gfn_to_memslot(kvm, gfn); | 463 | slot = gfn_to_memslot(kvm, gfn); |
464 | for (i = PT_DIRECTORY_LEVEL; | 464 | for (i = PT_DIRECTORY_LEVEL; |
465 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 465 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
466 | write_count = slot_largepage_idx(gfn, slot, i); | 466 | write_count = slot_largepage_idx(gfn, slot, i); |
467 | *write_count += 1; | 467 | *write_count += 1; |
468 | } | 468 | } |
469 | } | 469 | } |
470 | 470 | ||
471 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | 471 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) |
472 | { | 472 | { |
473 | struct kvm_memory_slot *slot; | 473 | struct kvm_memory_slot *slot; |
474 | int *write_count; | 474 | int *write_count; |
475 | int i; | 475 | int i; |
476 | 476 | ||
477 | slot = gfn_to_memslot(kvm, gfn); | 477 | slot = gfn_to_memslot(kvm, gfn); |
478 | for (i = PT_DIRECTORY_LEVEL; | 478 | for (i = PT_DIRECTORY_LEVEL; |
479 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 479 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
480 | write_count = slot_largepage_idx(gfn, slot, i); | 480 | write_count = slot_largepage_idx(gfn, slot, i); |
481 | *write_count -= 1; | 481 | *write_count -= 1; |
482 | WARN_ON(*write_count < 0); | 482 | WARN_ON(*write_count < 0); |
483 | } | 483 | } |
484 | } | 484 | } |
485 | 485 | ||
486 | static int has_wrprotected_page(struct kvm *kvm, | 486 | static int has_wrprotected_page(struct kvm *kvm, |
487 | gfn_t gfn, | 487 | gfn_t gfn, |
488 | int level) | 488 | int level) |
489 | { | 489 | { |
490 | struct kvm_memory_slot *slot; | 490 | struct kvm_memory_slot *slot; |
491 | int *largepage_idx; | 491 | int *largepage_idx; |
492 | 492 | ||
493 | slot = gfn_to_memslot(kvm, gfn); | 493 | slot = gfn_to_memslot(kvm, gfn); |
494 | if (slot) { | 494 | if (slot) { |
495 | largepage_idx = slot_largepage_idx(gfn, slot, level); | 495 | largepage_idx = slot_largepage_idx(gfn, slot, level); |
496 | return *largepage_idx; | 496 | return *largepage_idx; |
497 | } | 497 | } |
498 | 498 | ||
499 | return 1; | 499 | return 1; |
500 | } | 500 | } |
501 | 501 | ||
502 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | 502 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) |
503 | { | 503 | { |
504 | unsigned long page_size; | 504 | unsigned long page_size; |
505 | int i, ret = 0; | 505 | int i, ret = 0; |
506 | 506 | ||
507 | page_size = kvm_host_page_size(kvm, gfn); | 507 | page_size = kvm_host_page_size(kvm, gfn); |
508 | 508 | ||
509 | for (i = PT_PAGE_TABLE_LEVEL; | 509 | for (i = PT_PAGE_TABLE_LEVEL; |
510 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { | 510 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { |
511 | if (page_size >= KVM_HPAGE_SIZE(i)) | 511 | if (page_size >= KVM_HPAGE_SIZE(i)) |
512 | ret = i; | 512 | ret = i; |
513 | else | 513 | else |
514 | break; | 514 | break; |
515 | } | 515 | } |
516 | 516 | ||
517 | return ret; | 517 | return ret; |
518 | } | 518 | } |
519 | 519 | ||
520 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 520 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
521 | { | 521 | { |
522 | struct kvm_memory_slot *slot; | 522 | struct kvm_memory_slot *slot; |
523 | int host_level, level, max_level; | 523 | int host_level, level, max_level; |
524 | 524 | ||
525 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 525 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); |
526 | if (slot && slot->dirty_bitmap) | 526 | if (slot && slot->dirty_bitmap) |
527 | return PT_PAGE_TABLE_LEVEL; | 527 | return PT_PAGE_TABLE_LEVEL; |
528 | 528 | ||
529 | host_level = host_mapping_level(vcpu->kvm, large_gfn); | 529 | host_level = host_mapping_level(vcpu->kvm, large_gfn); |
530 | 530 | ||
531 | if (host_level == PT_PAGE_TABLE_LEVEL) | 531 | if (host_level == PT_PAGE_TABLE_LEVEL) |
532 | return host_level; | 532 | return host_level; |
533 | 533 | ||
534 | max_level = kvm_x86_ops->get_lpage_level() < host_level ? | 534 | max_level = kvm_x86_ops->get_lpage_level() < host_level ? |
535 | kvm_x86_ops->get_lpage_level() : host_level; | 535 | kvm_x86_ops->get_lpage_level() : host_level; |
536 | 536 | ||
537 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | 537 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) |
538 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) | 538 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) |
539 | break; | 539 | break; |
540 | 540 | ||
541 | return level - 1; | 541 | return level - 1; |
542 | } | 542 | } |
543 | 543 | ||
544 | /* | 544 | /* |
545 | * Take gfn and return the reverse mapping to it. | 545 | * Take gfn and return the reverse mapping to it. |
546 | */ | 546 | */ |
547 | 547 | ||
548 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 548 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
549 | { | 549 | { |
550 | struct kvm_memory_slot *slot; | 550 | struct kvm_memory_slot *slot; |
551 | unsigned long idx; | 551 | unsigned long idx; |
552 | 552 | ||
553 | slot = gfn_to_memslot(kvm, gfn); | 553 | slot = gfn_to_memslot(kvm, gfn); |
554 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 554 | if (likely(level == PT_PAGE_TABLE_LEVEL)) |
555 | return &slot->rmap[gfn - slot->base_gfn]; | 555 | return &slot->rmap[gfn - slot->base_gfn]; |
556 | 556 | ||
557 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 557 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
558 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 558 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
559 | 559 | ||
560 | return &slot->lpage_info[level - 2][idx].rmap_pde; | 560 | return &slot->lpage_info[level - 2][idx].rmap_pde; |
561 | } | 561 | } |
562 | 562 | ||
563 | /* | 563 | /* |
564 | * Reverse mapping data structures: | 564 | * Reverse mapping data structures: |
565 | * | 565 | * |
566 | * If rmapp bit zero is zero, then rmapp point to the shadw page table entry | 566 | * If rmapp bit zero is zero, then rmapp point to the shadw page table entry |
567 | * that points to page_address(page). | 567 | * that points to page_address(page). |
568 | * | 568 | * |
569 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc | 569 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc |
570 | * containing more mappings. | 570 | * containing more mappings. |
571 | * | 571 | * |
572 | * Returns the number of rmap entries before the spte was added or zero if | 572 | * Returns the number of rmap entries before the spte was added or zero if |
573 | * the spte was not added. | 573 | * the spte was not added. |
574 | * | 574 | * |
575 | */ | 575 | */ |
576 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 576 | static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
577 | { | 577 | { |
578 | struct kvm_mmu_page *sp; | 578 | struct kvm_mmu_page *sp; |
579 | struct kvm_rmap_desc *desc; | 579 | struct kvm_rmap_desc *desc; |
580 | unsigned long *rmapp; | 580 | unsigned long *rmapp; |
581 | int i, count = 0; | 581 | int i, count = 0; |
582 | 582 | ||
583 | if (!is_rmap_spte(*spte)) | 583 | if (!is_rmap_spte(*spte)) |
584 | return count; | 584 | return count; |
585 | sp = page_header(__pa(spte)); | 585 | sp = page_header(__pa(spte)); |
586 | kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); | 586 | kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); |
587 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 587 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
588 | if (!*rmapp) { | 588 | if (!*rmapp) { |
589 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | 589 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); |
590 | *rmapp = (unsigned long)spte; | 590 | *rmapp = (unsigned long)spte; |
591 | } else if (!(*rmapp & 1)) { | 591 | } else if (!(*rmapp & 1)) { |
592 | rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); | 592 | rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); |
593 | desc = mmu_alloc_rmap_desc(vcpu); | 593 | desc = mmu_alloc_rmap_desc(vcpu); |
594 | desc->sptes[0] = (u64 *)*rmapp; | 594 | desc->sptes[0] = (u64 *)*rmapp; |
595 | desc->sptes[1] = spte; | 595 | desc->sptes[1] = spte; |
596 | *rmapp = (unsigned long)desc | 1; | 596 | *rmapp = (unsigned long)desc | 1; |
597 | } else { | 597 | } else { |
598 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); | 598 | rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); |
599 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 599 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
600 | while (desc->sptes[RMAP_EXT-1] && desc->more) { | 600 | while (desc->sptes[RMAP_EXT-1] && desc->more) { |
601 | desc = desc->more; | 601 | desc = desc->more; |
602 | count += RMAP_EXT; | 602 | count += RMAP_EXT; |
603 | } | 603 | } |
604 | if (desc->sptes[RMAP_EXT-1]) { | 604 | if (desc->sptes[RMAP_EXT-1]) { |
605 | desc->more = mmu_alloc_rmap_desc(vcpu); | 605 | desc->more = mmu_alloc_rmap_desc(vcpu); |
606 | desc = desc->more; | 606 | desc = desc->more; |
607 | } | 607 | } |
608 | for (i = 0; desc->sptes[i]; ++i) | 608 | for (i = 0; desc->sptes[i]; ++i) |
609 | ; | 609 | ; |
610 | desc->sptes[i] = spte; | 610 | desc->sptes[i] = spte; |
611 | } | 611 | } |
612 | return count; | 612 | return count; |
613 | } | 613 | } |
614 | 614 | ||
615 | static void rmap_desc_remove_entry(unsigned long *rmapp, | 615 | static void rmap_desc_remove_entry(unsigned long *rmapp, |
616 | struct kvm_rmap_desc *desc, | 616 | struct kvm_rmap_desc *desc, |
617 | int i, | 617 | int i, |
618 | struct kvm_rmap_desc *prev_desc) | 618 | struct kvm_rmap_desc *prev_desc) |
619 | { | 619 | { |
620 | int j; | 620 | int j; |
621 | 621 | ||
622 | for (j = RMAP_EXT - 1; !desc->sptes[j] && j > i; --j) | 622 | for (j = RMAP_EXT - 1; !desc->sptes[j] && j > i; --j) |
623 | ; | 623 | ; |
624 | desc->sptes[i] = desc->sptes[j]; | 624 | desc->sptes[i] = desc->sptes[j]; |
625 | desc->sptes[j] = NULL; | 625 | desc->sptes[j] = NULL; |
626 | if (j != 0) | 626 | if (j != 0) |
627 | return; | 627 | return; |
628 | if (!prev_desc && !desc->more) | 628 | if (!prev_desc && !desc->more) |
629 | *rmapp = (unsigned long)desc->sptes[0]; | 629 | *rmapp = (unsigned long)desc->sptes[0]; |
630 | else | 630 | else |
631 | if (prev_desc) | 631 | if (prev_desc) |
632 | prev_desc->more = desc->more; | 632 | prev_desc->more = desc->more; |
633 | else | 633 | else |
634 | *rmapp = (unsigned long)desc->more | 1; | 634 | *rmapp = (unsigned long)desc->more | 1; |
635 | mmu_free_rmap_desc(desc); | 635 | mmu_free_rmap_desc(desc); |
636 | } | 636 | } |
637 | 637 | ||
638 | static void rmap_remove(struct kvm *kvm, u64 *spte) | 638 | static void rmap_remove(struct kvm *kvm, u64 *spte) |
639 | { | 639 | { |
640 | struct kvm_rmap_desc *desc; | 640 | struct kvm_rmap_desc *desc; |
641 | struct kvm_rmap_desc *prev_desc; | 641 | struct kvm_rmap_desc *prev_desc; |
642 | struct kvm_mmu_page *sp; | 642 | struct kvm_mmu_page *sp; |
643 | gfn_t gfn; | 643 | gfn_t gfn; |
644 | unsigned long *rmapp; | 644 | unsigned long *rmapp; |
645 | int i; | 645 | int i; |
646 | 646 | ||
647 | sp = page_header(__pa(spte)); | 647 | sp = page_header(__pa(spte)); |
648 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); | 648 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); |
649 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); | 649 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); |
650 | if (!*rmapp) { | 650 | if (!*rmapp) { |
651 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 651 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
652 | BUG(); | 652 | BUG(); |
653 | } else if (!(*rmapp & 1)) { | 653 | } else if (!(*rmapp & 1)) { |
654 | rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); | 654 | rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte); |
655 | if ((u64 *)*rmapp != spte) { | 655 | if ((u64 *)*rmapp != spte) { |
656 | printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", | 656 | printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n", |
657 | spte, *spte); | 657 | spte, *spte); |
658 | BUG(); | 658 | BUG(); |
659 | } | 659 | } |
660 | *rmapp = 0; | 660 | *rmapp = 0; |
661 | } else { | 661 | } else { |
662 | rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); | 662 | rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte); |
663 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 663 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
664 | prev_desc = NULL; | 664 | prev_desc = NULL; |
665 | while (desc) { | 665 | while (desc) { |
666 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) | 666 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) |
667 | if (desc->sptes[i] == spte) { | 667 | if (desc->sptes[i] == spte) { |
668 | rmap_desc_remove_entry(rmapp, | 668 | rmap_desc_remove_entry(rmapp, |
669 | desc, i, | 669 | desc, i, |
670 | prev_desc); | 670 | prev_desc); |
671 | return; | 671 | return; |
672 | } | 672 | } |
673 | prev_desc = desc; | 673 | prev_desc = desc; |
674 | desc = desc->more; | 674 | desc = desc->more; |
675 | } | 675 | } |
676 | pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); | 676 | pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); |
677 | BUG(); | 677 | BUG(); |
678 | } | 678 | } |
679 | } | 679 | } |
680 | 680 | ||
681 | static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) | 681 | static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) |
682 | { | 682 | { |
683 | pfn_t pfn; | 683 | pfn_t pfn; |
684 | u64 old_spte; | 684 | u64 old_spte; |
685 | 685 | ||
686 | old_spte = __xchg_spte(sptep, new_spte); | 686 | old_spte = __xchg_spte(sptep, new_spte); |
687 | if (!is_rmap_spte(old_spte)) | 687 | if (!is_rmap_spte(old_spte)) |
688 | return; | 688 | return; |
689 | pfn = spte_to_pfn(old_spte); | 689 | pfn = spte_to_pfn(old_spte); |
690 | if (old_spte & shadow_accessed_mask) | 690 | if (old_spte & shadow_accessed_mask) |
691 | kvm_set_pfn_accessed(pfn); | 691 | kvm_set_pfn_accessed(pfn); |
692 | if (is_writable_pte(old_spte)) | 692 | if (is_writable_pte(old_spte)) |
693 | kvm_set_pfn_dirty(pfn); | 693 | kvm_set_pfn_dirty(pfn); |
694 | rmap_remove(kvm, sptep); | 694 | rmap_remove(kvm, sptep); |
695 | } | 695 | } |
696 | 696 | ||
697 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 697 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
698 | { | 698 | { |
699 | struct kvm_rmap_desc *desc; | 699 | struct kvm_rmap_desc *desc; |
700 | u64 *prev_spte; | 700 | u64 *prev_spte; |
701 | int i; | 701 | int i; |
702 | 702 | ||
703 | if (!*rmapp) | 703 | if (!*rmapp) |
704 | return NULL; | 704 | return NULL; |
705 | else if (!(*rmapp & 1)) { | 705 | else if (!(*rmapp & 1)) { |
706 | if (!spte) | 706 | if (!spte) |
707 | return (u64 *)*rmapp; | 707 | return (u64 *)*rmapp; |
708 | return NULL; | 708 | return NULL; |
709 | } | 709 | } |
710 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 710 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
711 | prev_spte = NULL; | 711 | prev_spte = NULL; |
712 | while (desc) { | 712 | while (desc) { |
713 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { | 713 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { |
714 | if (prev_spte == spte) | 714 | if (prev_spte == spte) |
715 | return desc->sptes[i]; | 715 | return desc->sptes[i]; |
716 | prev_spte = desc->sptes[i]; | 716 | prev_spte = desc->sptes[i]; |
717 | } | 717 | } |
718 | desc = desc->more; | 718 | desc = desc->more; |
719 | } | 719 | } |
720 | return NULL; | 720 | return NULL; |
721 | } | 721 | } |
722 | 722 | ||
723 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | 723 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) |
724 | { | 724 | { |
725 | unsigned long *rmapp; | 725 | unsigned long *rmapp; |
726 | u64 *spte; | 726 | u64 *spte; |
727 | int i, write_protected = 0; | 727 | int i, write_protected = 0; |
728 | 728 | ||
729 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); | 729 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); |
730 | 730 | ||
731 | spte = rmap_next(kvm, rmapp, NULL); | 731 | spte = rmap_next(kvm, rmapp, NULL); |
732 | while (spte) { | 732 | while (spte) { |
733 | BUG_ON(!spte); | 733 | BUG_ON(!spte); |
734 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 734 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
735 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 735 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
736 | if (is_writable_pte(*spte)) { | 736 | if (is_writable_pte(*spte)) { |
737 | update_spte(spte, *spte & ~PT_WRITABLE_MASK); | 737 | update_spte(spte, *spte & ~PT_WRITABLE_MASK); |
738 | write_protected = 1; | 738 | write_protected = 1; |
739 | } | 739 | } |
740 | spte = rmap_next(kvm, rmapp, spte); | 740 | spte = rmap_next(kvm, rmapp, spte); |
741 | } | 741 | } |
742 | if (write_protected) { | 742 | if (write_protected) { |
743 | pfn_t pfn; | 743 | pfn_t pfn; |
744 | 744 | ||
745 | spte = rmap_next(kvm, rmapp, NULL); | 745 | spte = rmap_next(kvm, rmapp, NULL); |
746 | pfn = spte_to_pfn(*spte); | 746 | pfn = spte_to_pfn(*spte); |
747 | kvm_set_pfn_dirty(pfn); | 747 | kvm_set_pfn_dirty(pfn); |
748 | } | 748 | } |
749 | 749 | ||
750 | /* check for huge page mappings */ | 750 | /* check for huge page mappings */ |
751 | for (i = PT_DIRECTORY_LEVEL; | 751 | for (i = PT_DIRECTORY_LEVEL; |
752 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 752 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
753 | rmapp = gfn_to_rmap(kvm, gfn, i); | 753 | rmapp = gfn_to_rmap(kvm, gfn, i); |
754 | spte = rmap_next(kvm, rmapp, NULL); | 754 | spte = rmap_next(kvm, rmapp, NULL); |
755 | while (spte) { | 755 | while (spte) { |
756 | BUG_ON(!spte); | 756 | BUG_ON(!spte); |
757 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 757 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
758 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 758 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); |
759 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 759 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
760 | if (is_writable_pte(*spte)) { | 760 | if (is_writable_pte(*spte)) { |
761 | drop_spte(kvm, spte, | 761 | drop_spte(kvm, spte, |
762 | shadow_trap_nonpresent_pte); | 762 | shadow_trap_nonpresent_pte); |
763 | --kvm->stat.lpages; | 763 | --kvm->stat.lpages; |
764 | spte = NULL; | 764 | spte = NULL; |
765 | write_protected = 1; | 765 | write_protected = 1; |
766 | } | 766 | } |
767 | spte = rmap_next(kvm, rmapp, spte); | 767 | spte = rmap_next(kvm, rmapp, spte); |
768 | } | 768 | } |
769 | } | 769 | } |
770 | 770 | ||
771 | return write_protected; | 771 | return write_protected; |
772 | } | 772 | } |
773 | 773 | ||
774 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 774 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
775 | unsigned long data) | 775 | unsigned long data) |
776 | { | 776 | { |
777 | u64 *spte; | 777 | u64 *spte; |
778 | int need_tlb_flush = 0; | 778 | int need_tlb_flush = 0; |
779 | 779 | ||
780 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | 780 | while ((spte = rmap_next(kvm, rmapp, NULL))) { |
781 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 781 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
782 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | 782 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); |
783 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); | 783 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); |
784 | need_tlb_flush = 1; | 784 | need_tlb_flush = 1; |
785 | } | 785 | } |
786 | return need_tlb_flush; | 786 | return need_tlb_flush; |
787 | } | 787 | } |
788 | 788 | ||
789 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | 789 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, |
790 | unsigned long data) | 790 | unsigned long data) |
791 | { | 791 | { |
792 | int need_flush = 0; | 792 | int need_flush = 0; |
793 | u64 *spte, new_spte, old_spte; | 793 | u64 *spte, new_spte, old_spte; |
794 | pte_t *ptep = (pte_t *)data; | 794 | pte_t *ptep = (pte_t *)data; |
795 | pfn_t new_pfn; | 795 | pfn_t new_pfn; |
796 | 796 | ||
797 | WARN_ON(pte_huge(*ptep)); | 797 | WARN_ON(pte_huge(*ptep)); |
798 | new_pfn = pte_pfn(*ptep); | 798 | new_pfn = pte_pfn(*ptep); |
799 | spte = rmap_next(kvm, rmapp, NULL); | 799 | spte = rmap_next(kvm, rmapp, NULL); |
800 | while (spte) { | 800 | while (spte) { |
801 | BUG_ON(!is_shadow_present_pte(*spte)); | 801 | BUG_ON(!is_shadow_present_pte(*spte)); |
802 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | 802 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); |
803 | need_flush = 1; | 803 | need_flush = 1; |
804 | if (pte_write(*ptep)) { | 804 | if (pte_write(*ptep)) { |
805 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); | 805 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); |
806 | spte = rmap_next(kvm, rmapp, NULL); | 806 | spte = rmap_next(kvm, rmapp, NULL); |
807 | } else { | 807 | } else { |
808 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | 808 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); |
809 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | 809 | new_spte |= (u64)new_pfn << PAGE_SHIFT; |
810 | 810 | ||
811 | new_spte &= ~PT_WRITABLE_MASK; | 811 | new_spte &= ~PT_WRITABLE_MASK; |
812 | new_spte &= ~SPTE_HOST_WRITEABLE; | 812 | new_spte &= ~SPTE_HOST_WRITEABLE; |
813 | new_spte &= ~shadow_accessed_mask; | 813 | new_spte &= ~shadow_accessed_mask; |
814 | if (is_writable_pte(*spte)) | 814 | if (is_writable_pte(*spte)) |
815 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | 815 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); |
816 | old_spte = __xchg_spte(spte, new_spte); | 816 | old_spte = __xchg_spte(spte, new_spte); |
817 | if (is_shadow_present_pte(old_spte) | 817 | if (is_shadow_present_pte(old_spte) |
818 | && (old_spte & shadow_accessed_mask)) | 818 | && (old_spte & shadow_accessed_mask)) |
819 | mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); | 819 | mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); |
820 | spte = rmap_next(kvm, rmapp, spte); | 820 | spte = rmap_next(kvm, rmapp, spte); |
821 | } | 821 | } |
822 | } | 822 | } |
823 | if (need_flush) | 823 | if (need_flush) |
824 | kvm_flush_remote_tlbs(kvm); | 824 | kvm_flush_remote_tlbs(kvm); |
825 | 825 | ||
826 | return 0; | 826 | return 0; |
827 | } | 827 | } |
828 | 828 | ||
829 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 829 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, |
830 | unsigned long data, | 830 | unsigned long data, |
831 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 831 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, |
832 | unsigned long data)) | 832 | unsigned long data)) |
833 | { | 833 | { |
834 | int i, j; | 834 | int i, j; |
835 | int ret; | 835 | int ret; |
836 | int retval = 0; | 836 | int retval = 0; |
837 | struct kvm_memslots *slots; | 837 | struct kvm_memslots *slots; |
838 | 838 | ||
839 | slots = kvm_memslots(kvm); | 839 | slots = kvm_memslots(kvm); |
840 | 840 | ||
841 | for (i = 0; i < slots->nmemslots; i++) { | 841 | for (i = 0; i < slots->nmemslots; i++) { |
842 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 842 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
843 | unsigned long start = memslot->userspace_addr; | 843 | unsigned long start = memslot->userspace_addr; |
844 | unsigned long end; | 844 | unsigned long end; |
845 | 845 | ||
846 | end = start + (memslot->npages << PAGE_SHIFT); | 846 | end = start + (memslot->npages << PAGE_SHIFT); |
847 | if (hva >= start && hva < end) { | 847 | if (hva >= start && hva < end) { |
848 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 848 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
849 | 849 | ||
850 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 850 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
851 | 851 | ||
852 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 852 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
853 | int idx = gfn_offset; | 853 | int idx = gfn_offset; |
854 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 854 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); |
855 | ret |= handler(kvm, | 855 | ret |= handler(kvm, |
856 | &memslot->lpage_info[j][idx].rmap_pde, | 856 | &memslot->lpage_info[j][idx].rmap_pde, |
857 | data); | 857 | data); |
858 | } | 858 | } |
859 | trace_kvm_age_page(hva, memslot, ret); | 859 | trace_kvm_age_page(hva, memslot, ret); |
860 | retval |= ret; | 860 | retval |= ret; |
861 | } | 861 | } |
862 | } | 862 | } |
863 | 863 | ||
864 | return retval; | 864 | return retval; |
865 | } | 865 | } |
866 | 866 | ||
867 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 867 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
868 | { | 868 | { |
869 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); | 869 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); |
870 | } | 870 | } |
871 | 871 | ||
872 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | 872 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) |
873 | { | 873 | { |
874 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); | 874 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); |
875 | } | 875 | } |
876 | 876 | ||
877 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 877 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
878 | unsigned long data) | 878 | unsigned long data) |
879 | { | 879 | { |
880 | u64 *spte; | 880 | u64 *spte; |
881 | int young = 0; | 881 | int young = 0; |
882 | 882 | ||
883 | /* | 883 | /* |
884 | * Emulate the accessed bit for EPT, by checking if this page has | 884 | * Emulate the accessed bit for EPT, by checking if this page has |
885 | * an EPT mapping, and clearing it if it does. On the next access, | 885 | * an EPT mapping, and clearing it if it does. On the next access, |
886 | * a new EPT mapping will be established. | 886 | * a new EPT mapping will be established. |
887 | * This has some overhead, but not as much as the cost of swapping | 887 | * This has some overhead, but not as much as the cost of swapping |
888 | * out actively used pages or breaking up actively used hugepages. | 888 | * out actively used pages or breaking up actively used hugepages. |
889 | */ | 889 | */ |
890 | if (!shadow_accessed_mask) | 890 | if (!shadow_accessed_mask) |
891 | return kvm_unmap_rmapp(kvm, rmapp, data); | 891 | return kvm_unmap_rmapp(kvm, rmapp, data); |
892 | 892 | ||
893 | spte = rmap_next(kvm, rmapp, NULL); | 893 | spte = rmap_next(kvm, rmapp, NULL); |
894 | while (spte) { | 894 | while (spte) { |
895 | int _young; | 895 | int _young; |
896 | u64 _spte = *spte; | 896 | u64 _spte = *spte; |
897 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | 897 | BUG_ON(!(_spte & PT_PRESENT_MASK)); |
898 | _young = _spte & PT_ACCESSED_MASK; | 898 | _young = _spte & PT_ACCESSED_MASK; |
899 | if (_young) { | 899 | if (_young) { |
900 | young = 1; | 900 | young = 1; |
901 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | 901 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); |
902 | } | 902 | } |
903 | spte = rmap_next(kvm, rmapp, spte); | 903 | spte = rmap_next(kvm, rmapp, spte); |
904 | } | 904 | } |
905 | return young; | 905 | return young; |
906 | } | 906 | } |
907 | 907 | ||
908 | #define RMAP_RECYCLE_THRESHOLD 1000 | 908 | #define RMAP_RECYCLE_THRESHOLD 1000 |
909 | 909 | ||
910 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 910 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
911 | { | 911 | { |
912 | unsigned long *rmapp; | 912 | unsigned long *rmapp; |
913 | struct kvm_mmu_page *sp; | 913 | struct kvm_mmu_page *sp; |
914 | 914 | ||
915 | sp = page_header(__pa(spte)); | 915 | sp = page_header(__pa(spte)); |
916 | 916 | ||
917 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 917 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
918 | 918 | ||
919 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); | 919 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); |
920 | kvm_flush_remote_tlbs(vcpu->kvm); | 920 | kvm_flush_remote_tlbs(vcpu->kvm); |
921 | } | 921 | } |
922 | 922 | ||
923 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 923 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) |
924 | { | 924 | { |
925 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); | 925 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); |
926 | } | 926 | } |
927 | 927 | ||
928 | #ifdef MMU_DEBUG | 928 | #ifdef MMU_DEBUG |
929 | static int is_empty_shadow_page(u64 *spt) | 929 | static int is_empty_shadow_page(u64 *spt) |
930 | { | 930 | { |
931 | u64 *pos; | 931 | u64 *pos; |
932 | u64 *end; | 932 | u64 *end; |
933 | 933 | ||
934 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) | 934 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) |
935 | if (is_shadow_present_pte(*pos)) { | 935 | if (is_shadow_present_pte(*pos)) { |
936 | printk(KERN_ERR "%s: %p %llx\n", __func__, | 936 | printk(KERN_ERR "%s: %p %llx\n", __func__, |
937 | pos, *pos); | 937 | pos, *pos); |
938 | return 0; | 938 | return 0; |
939 | } | 939 | } |
940 | return 1; | 940 | return 1; |
941 | } | 941 | } |
942 | #endif | 942 | #endif |
943 | 943 | ||
944 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 944 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
945 | { | 945 | { |
946 | ASSERT(is_empty_shadow_page(sp->spt)); | 946 | ASSERT(is_empty_shadow_page(sp->spt)); |
947 | hlist_del(&sp->hash_link); | 947 | hlist_del(&sp->hash_link); |
948 | list_del(&sp->link); | 948 | list_del(&sp->link); |
949 | __free_page(virt_to_page(sp->spt)); | 949 | __free_page(virt_to_page(sp->spt)); |
950 | if (!sp->role.direct) | 950 | if (!sp->role.direct) |
951 | __free_page(virt_to_page(sp->gfns)); | 951 | __free_page(virt_to_page(sp->gfns)); |
952 | kmem_cache_free(mmu_page_header_cache, sp); | 952 | kmem_cache_free(mmu_page_header_cache, sp); |
953 | ++kvm->arch.n_free_mmu_pages; | 953 | ++kvm->arch.n_free_mmu_pages; |
954 | } | 954 | } |
955 | 955 | ||
956 | static unsigned kvm_page_table_hashfn(gfn_t gfn) | 956 | static unsigned kvm_page_table_hashfn(gfn_t gfn) |
957 | { | 957 | { |
958 | return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1); | 958 | return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1); |
959 | } | 959 | } |
960 | 960 | ||
961 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 961 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
962 | u64 *parent_pte, int direct) | 962 | u64 *parent_pte, int direct) |
963 | { | 963 | { |
964 | struct kvm_mmu_page *sp; | 964 | struct kvm_mmu_page *sp; |
965 | 965 | ||
966 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); | 966 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); |
967 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 967 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
968 | if (!direct) | 968 | if (!direct) |
969 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, | 969 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, |
970 | PAGE_SIZE); | 970 | PAGE_SIZE); |
971 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 971 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
972 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 972 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
973 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 973 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
974 | sp->multimapped = 0; | 974 | sp->multimapped = 0; |
975 | sp->parent_pte = parent_pte; | 975 | sp->parent_pte = parent_pte; |
976 | --vcpu->kvm->arch.n_free_mmu_pages; | 976 | --vcpu->kvm->arch.n_free_mmu_pages; |
977 | return sp; | 977 | return sp; |
978 | } | 978 | } |
979 | 979 | ||
980 | static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, | 980 | static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, |
981 | struct kvm_mmu_page *sp, u64 *parent_pte) | 981 | struct kvm_mmu_page *sp, u64 *parent_pte) |
982 | { | 982 | { |
983 | struct kvm_pte_chain *pte_chain; | 983 | struct kvm_pte_chain *pte_chain; |
984 | struct hlist_node *node; | 984 | struct hlist_node *node; |
985 | int i; | 985 | int i; |
986 | 986 | ||
987 | if (!parent_pte) | 987 | if (!parent_pte) |
988 | return; | 988 | return; |
989 | if (!sp->multimapped) { | 989 | if (!sp->multimapped) { |
990 | u64 *old = sp->parent_pte; | 990 | u64 *old = sp->parent_pte; |
991 | 991 | ||
992 | if (!old) { | 992 | if (!old) { |
993 | sp->parent_pte = parent_pte; | 993 | sp->parent_pte = parent_pte; |
994 | return; | 994 | return; |
995 | } | 995 | } |
996 | sp->multimapped = 1; | 996 | sp->multimapped = 1; |
997 | pte_chain = mmu_alloc_pte_chain(vcpu); | 997 | pte_chain = mmu_alloc_pte_chain(vcpu); |
998 | INIT_HLIST_HEAD(&sp->parent_ptes); | 998 | INIT_HLIST_HEAD(&sp->parent_ptes); |
999 | hlist_add_head(&pte_chain->link, &sp->parent_ptes); | 999 | hlist_add_head(&pte_chain->link, &sp->parent_ptes); |
1000 | pte_chain->parent_ptes[0] = old; | 1000 | pte_chain->parent_ptes[0] = old; |
1001 | } | 1001 | } |
1002 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) { | 1002 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) { |
1003 | if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1]) | 1003 | if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1]) |
1004 | continue; | 1004 | continue; |
1005 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) | 1005 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) |
1006 | if (!pte_chain->parent_ptes[i]) { | 1006 | if (!pte_chain->parent_ptes[i]) { |
1007 | pte_chain->parent_ptes[i] = parent_pte; | 1007 | pte_chain->parent_ptes[i] = parent_pte; |
1008 | return; | 1008 | return; |
1009 | } | 1009 | } |
1010 | } | 1010 | } |
1011 | pte_chain = mmu_alloc_pte_chain(vcpu); | 1011 | pte_chain = mmu_alloc_pte_chain(vcpu); |
1012 | BUG_ON(!pte_chain); | 1012 | BUG_ON(!pte_chain); |
1013 | hlist_add_head(&pte_chain->link, &sp->parent_ptes); | 1013 | hlist_add_head(&pte_chain->link, &sp->parent_ptes); |
1014 | pte_chain->parent_ptes[0] = parent_pte; | 1014 | pte_chain->parent_ptes[0] = parent_pte; |
1015 | } | 1015 | } |
1016 | 1016 | ||
1017 | static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | 1017 | static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, |
1018 | u64 *parent_pte) | 1018 | u64 *parent_pte) |
1019 | { | 1019 | { |
1020 | struct kvm_pte_chain *pte_chain; | 1020 | struct kvm_pte_chain *pte_chain; |
1021 | struct hlist_node *node; | 1021 | struct hlist_node *node; |
1022 | int i; | 1022 | int i; |
1023 | 1023 | ||
1024 | if (!sp->multimapped) { | 1024 | if (!sp->multimapped) { |
1025 | BUG_ON(sp->parent_pte != parent_pte); | 1025 | BUG_ON(sp->parent_pte != parent_pte); |
1026 | sp->parent_pte = NULL; | 1026 | sp->parent_pte = NULL; |
1027 | return; | 1027 | return; |
1028 | } | 1028 | } |
1029 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1029 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
1030 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | 1030 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { |
1031 | if (!pte_chain->parent_ptes[i]) | 1031 | if (!pte_chain->parent_ptes[i]) |
1032 | break; | 1032 | break; |
1033 | if (pte_chain->parent_ptes[i] != parent_pte) | 1033 | if (pte_chain->parent_ptes[i] != parent_pte) |
1034 | continue; | 1034 | continue; |
1035 | while (i + 1 < NR_PTE_CHAIN_ENTRIES | 1035 | while (i + 1 < NR_PTE_CHAIN_ENTRIES |
1036 | && pte_chain->parent_ptes[i + 1]) { | 1036 | && pte_chain->parent_ptes[i + 1]) { |
1037 | pte_chain->parent_ptes[i] | 1037 | pte_chain->parent_ptes[i] |
1038 | = pte_chain->parent_ptes[i + 1]; | 1038 | = pte_chain->parent_ptes[i + 1]; |
1039 | ++i; | 1039 | ++i; |
1040 | } | 1040 | } |
1041 | pte_chain->parent_ptes[i] = NULL; | 1041 | pte_chain->parent_ptes[i] = NULL; |
1042 | if (i == 0) { | 1042 | if (i == 0) { |
1043 | hlist_del(&pte_chain->link); | 1043 | hlist_del(&pte_chain->link); |
1044 | mmu_free_pte_chain(pte_chain); | 1044 | mmu_free_pte_chain(pte_chain); |
1045 | if (hlist_empty(&sp->parent_ptes)) { | 1045 | if (hlist_empty(&sp->parent_ptes)) { |
1046 | sp->multimapped = 0; | 1046 | sp->multimapped = 0; |
1047 | sp->parent_pte = NULL; | 1047 | sp->parent_pte = NULL; |
1048 | } | 1048 | } |
1049 | } | 1049 | } |
1050 | return; | 1050 | return; |
1051 | } | 1051 | } |
1052 | BUG(); | 1052 | BUG(); |
1053 | } | 1053 | } |
1054 | 1054 | ||
1055 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) | 1055 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
1056 | { | 1056 | { |
1057 | struct kvm_pte_chain *pte_chain; | 1057 | struct kvm_pte_chain *pte_chain; |
1058 | struct hlist_node *node; | 1058 | struct hlist_node *node; |
1059 | struct kvm_mmu_page *parent_sp; | 1059 | struct kvm_mmu_page *parent_sp; |
1060 | int i; | 1060 | int i; |
1061 | 1061 | ||
1062 | if (!sp->multimapped && sp->parent_pte) { | 1062 | if (!sp->multimapped && sp->parent_pte) { |
1063 | parent_sp = page_header(__pa(sp->parent_pte)); | 1063 | parent_sp = page_header(__pa(sp->parent_pte)); |
1064 | fn(parent_sp, sp->parent_pte); | 1064 | fn(parent_sp, sp->parent_pte); |
1065 | return; | 1065 | return; |
1066 | } | 1066 | } |
1067 | 1067 | ||
1068 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1068 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
1069 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | 1069 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { |
1070 | u64 *spte = pte_chain->parent_ptes[i]; | 1070 | u64 *spte = pte_chain->parent_ptes[i]; |
1071 | 1071 | ||
1072 | if (!spte) | 1072 | if (!spte) |
1073 | break; | 1073 | break; |
1074 | parent_sp = page_header(__pa(spte)); | 1074 | parent_sp = page_header(__pa(spte)); |
1075 | fn(parent_sp, spte); | 1075 | fn(parent_sp, spte); |
1076 | } | 1076 | } |
1077 | } | 1077 | } |
1078 | 1078 | ||
1079 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte); | 1079 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte); |
1080 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) | 1080 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) |
1081 | { | 1081 | { |
1082 | mmu_parent_walk(sp, mark_unsync); | 1082 | mmu_parent_walk(sp, mark_unsync); |
1083 | } | 1083 | } |
1084 | 1084 | ||
1085 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte) | 1085 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte) |
1086 | { | 1086 | { |
1087 | unsigned int index; | 1087 | unsigned int index; |
1088 | 1088 | ||
1089 | index = spte - sp->spt; | 1089 | index = spte - sp->spt; |
1090 | if (__test_and_set_bit(index, sp->unsync_child_bitmap)) | 1090 | if (__test_and_set_bit(index, sp->unsync_child_bitmap)) |
1091 | return; | 1091 | return; |
1092 | if (sp->unsync_children++) | 1092 | if (sp->unsync_children++) |
1093 | return; | 1093 | return; |
1094 | kvm_mmu_mark_parents_unsync(sp); | 1094 | kvm_mmu_mark_parents_unsync(sp); |
1095 | } | 1095 | } |
1096 | 1096 | ||
1097 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | 1097 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, |
1098 | struct kvm_mmu_page *sp) | 1098 | struct kvm_mmu_page *sp) |
1099 | { | 1099 | { |
1100 | int i; | 1100 | int i; |
1101 | 1101 | ||
1102 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 1102 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
1103 | sp->spt[i] = shadow_trap_nonpresent_pte; | 1103 | sp->spt[i] = shadow_trap_nonpresent_pte; |
1104 | } | 1104 | } |
1105 | 1105 | ||
1106 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | 1106 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, |
1107 | struct kvm_mmu_page *sp, bool clear_unsync) | 1107 | struct kvm_mmu_page *sp, bool clear_unsync) |
1108 | { | 1108 | { |
1109 | return 1; | 1109 | return 1; |
1110 | } | 1110 | } |
1111 | 1111 | ||
1112 | static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | 1112 | static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) |
1113 | { | 1113 | { |
1114 | } | 1114 | } |
1115 | 1115 | ||
1116 | #define KVM_PAGE_ARRAY_NR 16 | 1116 | #define KVM_PAGE_ARRAY_NR 16 |
1117 | 1117 | ||
1118 | struct kvm_mmu_pages { | 1118 | struct kvm_mmu_pages { |
1119 | struct mmu_page_and_offset { | 1119 | struct mmu_page_and_offset { |
1120 | struct kvm_mmu_page *sp; | 1120 | struct kvm_mmu_page *sp; |
1121 | unsigned int idx; | 1121 | unsigned int idx; |
1122 | } page[KVM_PAGE_ARRAY_NR]; | 1122 | } page[KVM_PAGE_ARRAY_NR]; |
1123 | unsigned int nr; | 1123 | unsigned int nr; |
1124 | }; | 1124 | }; |
1125 | 1125 | ||
1126 | #define for_each_unsync_children(bitmap, idx) \ | 1126 | #define for_each_unsync_children(bitmap, idx) \ |
1127 | for (idx = find_first_bit(bitmap, 512); \ | 1127 | for (idx = find_first_bit(bitmap, 512); \ |
1128 | idx < 512; \ | 1128 | idx < 512; \ |
1129 | idx = find_next_bit(bitmap, 512, idx+1)) | 1129 | idx = find_next_bit(bitmap, 512, idx+1)) |
1130 | 1130 | ||
1131 | static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, | 1131 | static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, |
1132 | int idx) | 1132 | int idx) |
1133 | { | 1133 | { |
1134 | int i; | 1134 | int i; |
1135 | 1135 | ||
1136 | if (sp->unsync) | 1136 | if (sp->unsync) |
1137 | for (i=0; i < pvec->nr; i++) | 1137 | for (i=0; i < pvec->nr; i++) |
1138 | if (pvec->page[i].sp == sp) | 1138 | if (pvec->page[i].sp == sp) |
1139 | return 0; | 1139 | return 0; |
1140 | 1140 | ||
1141 | pvec->page[pvec->nr].sp = sp; | 1141 | pvec->page[pvec->nr].sp = sp; |
1142 | pvec->page[pvec->nr].idx = idx; | 1142 | pvec->page[pvec->nr].idx = idx; |
1143 | pvec->nr++; | 1143 | pvec->nr++; |
1144 | return (pvec->nr == KVM_PAGE_ARRAY_NR); | 1144 | return (pvec->nr == KVM_PAGE_ARRAY_NR); |
1145 | } | 1145 | } |
1146 | 1146 | ||
1147 | static int __mmu_unsync_walk(struct kvm_mmu_page *sp, | 1147 | static int __mmu_unsync_walk(struct kvm_mmu_page *sp, |
1148 | struct kvm_mmu_pages *pvec) | 1148 | struct kvm_mmu_pages *pvec) |
1149 | { | 1149 | { |
1150 | int i, ret, nr_unsync_leaf = 0; | 1150 | int i, ret, nr_unsync_leaf = 0; |
1151 | 1151 | ||
1152 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | 1152 | for_each_unsync_children(sp->unsync_child_bitmap, i) { |
1153 | struct kvm_mmu_page *child; | 1153 | struct kvm_mmu_page *child; |
1154 | u64 ent = sp->spt[i]; | 1154 | u64 ent = sp->spt[i]; |
1155 | 1155 | ||
1156 | if (!is_shadow_present_pte(ent) || is_large_pte(ent)) | 1156 | if (!is_shadow_present_pte(ent) || is_large_pte(ent)) |
1157 | goto clear_child_bitmap; | 1157 | goto clear_child_bitmap; |
1158 | 1158 | ||
1159 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 1159 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
1160 | 1160 | ||
1161 | if (child->unsync_children) { | 1161 | if (child->unsync_children) { |
1162 | if (mmu_pages_add(pvec, child, i)) | 1162 | if (mmu_pages_add(pvec, child, i)) |
1163 | return -ENOSPC; | 1163 | return -ENOSPC; |
1164 | 1164 | ||
1165 | ret = __mmu_unsync_walk(child, pvec); | 1165 | ret = __mmu_unsync_walk(child, pvec); |
1166 | if (!ret) | 1166 | if (!ret) |
1167 | goto clear_child_bitmap; | 1167 | goto clear_child_bitmap; |
1168 | else if (ret > 0) | 1168 | else if (ret > 0) |
1169 | nr_unsync_leaf += ret; | 1169 | nr_unsync_leaf += ret; |
1170 | else | 1170 | else |
1171 | return ret; | 1171 | return ret; |
1172 | } else if (child->unsync) { | 1172 | } else if (child->unsync) { |
1173 | nr_unsync_leaf++; | 1173 | nr_unsync_leaf++; |
1174 | if (mmu_pages_add(pvec, child, i)) | 1174 | if (mmu_pages_add(pvec, child, i)) |
1175 | return -ENOSPC; | 1175 | return -ENOSPC; |
1176 | } else | 1176 | } else |
1177 | goto clear_child_bitmap; | 1177 | goto clear_child_bitmap; |
1178 | 1178 | ||
1179 | continue; | 1179 | continue; |
1180 | 1180 | ||
1181 | clear_child_bitmap: | 1181 | clear_child_bitmap: |
1182 | __clear_bit(i, sp->unsync_child_bitmap); | 1182 | __clear_bit(i, sp->unsync_child_bitmap); |
1183 | sp->unsync_children--; | 1183 | sp->unsync_children--; |
1184 | WARN_ON((int)sp->unsync_children < 0); | 1184 | WARN_ON((int)sp->unsync_children < 0); |
1185 | } | 1185 | } |
1186 | 1186 | ||
1187 | 1187 | ||
1188 | return nr_unsync_leaf; | 1188 | return nr_unsync_leaf; |
1189 | } | 1189 | } |
1190 | 1190 | ||
1191 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, | 1191 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, |
1192 | struct kvm_mmu_pages *pvec) | 1192 | struct kvm_mmu_pages *pvec) |
1193 | { | 1193 | { |
1194 | if (!sp->unsync_children) | 1194 | if (!sp->unsync_children) |
1195 | return 0; | 1195 | return 0; |
1196 | 1196 | ||
1197 | mmu_pages_add(pvec, sp, 0); | 1197 | mmu_pages_add(pvec, sp, 0); |
1198 | return __mmu_unsync_walk(sp, pvec); | 1198 | return __mmu_unsync_walk(sp, pvec); |
1199 | } | 1199 | } |
1200 | 1200 | ||
1201 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1201 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1202 | { | 1202 | { |
1203 | WARN_ON(!sp->unsync); | 1203 | WARN_ON(!sp->unsync); |
1204 | trace_kvm_mmu_sync_page(sp); | 1204 | trace_kvm_mmu_sync_page(sp); |
1205 | sp->unsync = 0; | 1205 | sp->unsync = 0; |
1206 | --kvm->stat.mmu_unsync; | 1206 | --kvm->stat.mmu_unsync; |
1207 | } | 1207 | } |
1208 | 1208 | ||
1209 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | 1209 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
1210 | struct list_head *invalid_list); | 1210 | struct list_head *invalid_list); |
1211 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 1211 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
1212 | struct list_head *invalid_list); | 1212 | struct list_head *invalid_list); |
1213 | 1213 | ||
1214 | #define for_each_gfn_sp(kvm, sp, gfn, pos) \ | 1214 | #define for_each_gfn_sp(kvm, sp, gfn, pos) \ |
1215 | hlist_for_each_entry(sp, pos, \ | 1215 | hlist_for_each_entry(sp, pos, \ |
1216 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1216 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ |
1217 | if ((sp)->gfn != (gfn)) {} else | 1217 | if ((sp)->gfn != (gfn)) {} else |
1218 | 1218 | ||
1219 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ | 1219 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ |
1220 | hlist_for_each_entry(sp, pos, \ | 1220 | hlist_for_each_entry(sp, pos, \ |
1221 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1221 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ |
1222 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ | 1222 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ |
1223 | (sp)->role.invalid) {} else | 1223 | (sp)->role.invalid) {} else |
1224 | 1224 | ||
1225 | /* @sp->gfn should be write-protected at the call site */ | 1225 | /* @sp->gfn should be write-protected at the call site */ |
1226 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1226 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
1227 | struct list_head *invalid_list, bool clear_unsync) | 1227 | struct list_head *invalid_list, bool clear_unsync) |
1228 | { | 1228 | { |
1229 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { | 1229 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
1230 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); | 1230 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1231 | return 1; | 1231 | return 1; |
1232 | } | 1232 | } |
1233 | 1233 | ||
1234 | if (clear_unsync) | 1234 | if (clear_unsync) |
1235 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1235 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
1236 | 1236 | ||
1237 | if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { | 1237 | if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { |
1238 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); | 1238 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1239 | return 1; | 1239 | return 1; |
1240 | } | 1240 | } |
1241 | 1241 | ||
1242 | kvm_mmu_flush_tlb(vcpu); | 1242 | kvm_mmu_flush_tlb(vcpu); |
1243 | return 0; | 1243 | return 0; |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, | 1246 | static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, |
1247 | struct kvm_mmu_page *sp) | 1247 | struct kvm_mmu_page *sp) |
1248 | { | 1248 | { |
1249 | LIST_HEAD(invalid_list); | 1249 | LIST_HEAD(invalid_list); |
1250 | int ret; | 1250 | int ret; |
1251 | 1251 | ||
1252 | ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); | 1252 | ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); |
1253 | if (ret) | 1253 | if (ret) |
1254 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 1254 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
1255 | 1255 | ||
1256 | return ret; | 1256 | return ret; |
1257 | } | 1257 | } |
1258 | 1258 | ||
1259 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1259 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
1260 | struct list_head *invalid_list) | 1260 | struct list_head *invalid_list) |
1261 | { | 1261 | { |
1262 | return __kvm_sync_page(vcpu, sp, invalid_list, true); | 1262 | return __kvm_sync_page(vcpu, sp, invalid_list, true); |
1263 | } | 1263 | } |
1264 | 1264 | ||
1265 | /* @gfn should be write-protected at the call site */ | 1265 | /* @gfn should be write-protected at the call site */ |
1266 | static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | 1266 | static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) |
1267 | { | 1267 | { |
1268 | struct kvm_mmu_page *s; | 1268 | struct kvm_mmu_page *s; |
1269 | struct hlist_node *node; | 1269 | struct hlist_node *node; |
1270 | LIST_HEAD(invalid_list); | 1270 | LIST_HEAD(invalid_list); |
1271 | bool flush = false; | 1271 | bool flush = false; |
1272 | 1272 | ||
1273 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | 1273 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { |
1274 | if (!s->unsync) | 1274 | if (!s->unsync) |
1275 | continue; | 1275 | continue; |
1276 | 1276 | ||
1277 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | 1277 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); |
1278 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || | 1278 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || |
1279 | (vcpu->arch.mmu.sync_page(vcpu, s, true))) { | 1279 | (vcpu->arch.mmu.sync_page(vcpu, s, true))) { |
1280 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); | 1280 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); |
1281 | continue; | 1281 | continue; |
1282 | } | 1282 | } |
1283 | kvm_unlink_unsync_page(vcpu->kvm, s); | 1283 | kvm_unlink_unsync_page(vcpu->kvm, s); |
1284 | flush = true; | 1284 | flush = true; |
1285 | } | 1285 | } |
1286 | 1286 | ||
1287 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 1287 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
1288 | if (flush) | 1288 | if (flush) |
1289 | kvm_mmu_flush_tlb(vcpu); | 1289 | kvm_mmu_flush_tlb(vcpu); |
1290 | } | 1290 | } |
1291 | 1291 | ||
1292 | struct mmu_page_path { | 1292 | struct mmu_page_path { |
1293 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; | 1293 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; |
1294 | unsigned int idx[PT64_ROOT_LEVEL-1]; | 1294 | unsigned int idx[PT64_ROOT_LEVEL-1]; |
1295 | }; | 1295 | }; |
1296 | 1296 | ||
1297 | #define for_each_sp(pvec, sp, parents, i) \ | 1297 | #define for_each_sp(pvec, sp, parents, i) \ |
1298 | for (i = mmu_pages_next(&pvec, &parents, -1), \ | 1298 | for (i = mmu_pages_next(&pvec, &parents, -1), \ |
1299 | sp = pvec.page[i].sp; \ | 1299 | sp = pvec.page[i].sp; \ |
1300 | i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ | 1300 | i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ |
1301 | i = mmu_pages_next(&pvec, &parents, i)) | 1301 | i = mmu_pages_next(&pvec, &parents, i)) |
1302 | 1302 | ||
1303 | static int mmu_pages_next(struct kvm_mmu_pages *pvec, | 1303 | static int mmu_pages_next(struct kvm_mmu_pages *pvec, |
1304 | struct mmu_page_path *parents, | 1304 | struct mmu_page_path *parents, |
1305 | int i) | 1305 | int i) |
1306 | { | 1306 | { |
1307 | int n; | 1307 | int n; |
1308 | 1308 | ||
1309 | for (n = i+1; n < pvec->nr; n++) { | 1309 | for (n = i+1; n < pvec->nr; n++) { |
1310 | struct kvm_mmu_page *sp = pvec->page[n].sp; | 1310 | struct kvm_mmu_page *sp = pvec->page[n].sp; |
1311 | 1311 | ||
1312 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) { | 1312 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) { |
1313 | parents->idx[0] = pvec->page[n].idx; | 1313 | parents->idx[0] = pvec->page[n].idx; |
1314 | return n; | 1314 | return n; |
1315 | } | 1315 | } |
1316 | 1316 | ||
1317 | parents->parent[sp->role.level-2] = sp; | 1317 | parents->parent[sp->role.level-2] = sp; |
1318 | parents->idx[sp->role.level-1] = pvec->page[n].idx; | 1318 | parents->idx[sp->role.level-1] = pvec->page[n].idx; |
1319 | } | 1319 | } |
1320 | 1320 | ||
1321 | return n; | 1321 | return n; |
1322 | } | 1322 | } |
1323 | 1323 | ||
1324 | static void mmu_pages_clear_parents(struct mmu_page_path *parents) | 1324 | static void mmu_pages_clear_parents(struct mmu_page_path *parents) |
1325 | { | 1325 | { |
1326 | struct kvm_mmu_page *sp; | 1326 | struct kvm_mmu_page *sp; |
1327 | unsigned int level = 0; | 1327 | unsigned int level = 0; |
1328 | 1328 | ||
1329 | do { | 1329 | do { |
1330 | unsigned int idx = parents->idx[level]; | 1330 | unsigned int idx = parents->idx[level]; |
1331 | 1331 | ||
1332 | sp = parents->parent[level]; | 1332 | sp = parents->parent[level]; |
1333 | if (!sp) | 1333 | if (!sp) |
1334 | return; | 1334 | return; |
1335 | 1335 | ||
1336 | --sp->unsync_children; | 1336 | --sp->unsync_children; |
1337 | WARN_ON((int)sp->unsync_children < 0); | 1337 | WARN_ON((int)sp->unsync_children < 0); |
1338 | __clear_bit(idx, sp->unsync_child_bitmap); | 1338 | __clear_bit(idx, sp->unsync_child_bitmap); |
1339 | level++; | 1339 | level++; |
1340 | } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); | 1340 | } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); |
1341 | } | 1341 | } |
1342 | 1342 | ||
1343 | static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, | 1343 | static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, |
1344 | struct mmu_page_path *parents, | 1344 | struct mmu_page_path *parents, |
1345 | struct kvm_mmu_pages *pvec) | 1345 | struct kvm_mmu_pages *pvec) |
1346 | { | 1346 | { |
1347 | parents->parent[parent->role.level-1] = NULL; | 1347 | parents->parent[parent->role.level-1] = NULL; |
1348 | pvec->nr = 0; | 1348 | pvec->nr = 0; |
1349 | } | 1349 | } |
1350 | 1350 | ||
1351 | static void mmu_sync_children(struct kvm_vcpu *vcpu, | 1351 | static void mmu_sync_children(struct kvm_vcpu *vcpu, |
1352 | struct kvm_mmu_page *parent) | 1352 | struct kvm_mmu_page *parent) |
1353 | { | 1353 | { |
1354 | int i; | 1354 | int i; |
1355 | struct kvm_mmu_page *sp; | 1355 | struct kvm_mmu_page *sp; |
1356 | struct mmu_page_path parents; | 1356 | struct mmu_page_path parents; |
1357 | struct kvm_mmu_pages pages; | 1357 | struct kvm_mmu_pages pages; |
1358 | LIST_HEAD(invalid_list); | 1358 | LIST_HEAD(invalid_list); |
1359 | 1359 | ||
1360 | kvm_mmu_pages_init(parent, &parents, &pages); | 1360 | kvm_mmu_pages_init(parent, &parents, &pages); |
1361 | while (mmu_unsync_walk(parent, &pages)) { | 1361 | while (mmu_unsync_walk(parent, &pages)) { |
1362 | int protected = 0; | 1362 | int protected = 0; |
1363 | 1363 | ||
1364 | for_each_sp(pages, sp, parents, i) | 1364 | for_each_sp(pages, sp, parents, i) |
1365 | protected |= rmap_write_protect(vcpu->kvm, sp->gfn); | 1365 | protected |= rmap_write_protect(vcpu->kvm, sp->gfn); |
1366 | 1366 | ||
1367 | if (protected) | 1367 | if (protected) |
1368 | kvm_flush_remote_tlbs(vcpu->kvm); | 1368 | kvm_flush_remote_tlbs(vcpu->kvm); |
1369 | 1369 | ||
1370 | for_each_sp(pages, sp, parents, i) { | 1370 | for_each_sp(pages, sp, parents, i) { |
1371 | kvm_sync_page(vcpu, sp, &invalid_list); | 1371 | kvm_sync_page(vcpu, sp, &invalid_list); |
1372 | mmu_pages_clear_parents(&parents); | 1372 | mmu_pages_clear_parents(&parents); |
1373 | } | 1373 | } |
1374 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 1374 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
1375 | cond_resched_lock(&vcpu->kvm->mmu_lock); | 1375 | cond_resched_lock(&vcpu->kvm->mmu_lock); |
1376 | kvm_mmu_pages_init(parent, &parents, &pages); | 1376 | kvm_mmu_pages_init(parent, &parents, &pages); |
1377 | } | 1377 | } |
1378 | } | 1378 | } |
1379 | 1379 | ||
1380 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1380 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
1381 | gfn_t gfn, | 1381 | gfn_t gfn, |
1382 | gva_t gaddr, | 1382 | gva_t gaddr, |
1383 | unsigned level, | 1383 | unsigned level, |
1384 | int direct, | 1384 | int direct, |
1385 | unsigned access, | 1385 | unsigned access, |
1386 | u64 *parent_pte) | 1386 | u64 *parent_pte) |
1387 | { | 1387 | { |
1388 | union kvm_mmu_page_role role; | 1388 | union kvm_mmu_page_role role; |
1389 | unsigned quadrant; | 1389 | unsigned quadrant; |
1390 | struct kvm_mmu_page *sp; | 1390 | struct kvm_mmu_page *sp; |
1391 | struct hlist_node *node; | 1391 | struct hlist_node *node; |
1392 | bool need_sync = false; | 1392 | bool need_sync = false; |
1393 | 1393 | ||
1394 | role = vcpu->arch.mmu.base_role; | 1394 | role = vcpu->arch.mmu.base_role; |
1395 | role.level = level; | 1395 | role.level = level; |
1396 | role.direct = direct; | 1396 | role.direct = direct; |
1397 | if (role.direct) | 1397 | if (role.direct) |
1398 | role.cr4_pae = 0; | 1398 | role.cr4_pae = 0; |
1399 | role.access = access; | 1399 | role.access = access; |
1400 | if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1400 | if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
1401 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1401 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
1402 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 1402 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
1403 | role.quadrant = quadrant; | 1403 | role.quadrant = quadrant; |
1404 | } | 1404 | } |
1405 | for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { | 1405 | for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { |
1406 | if (!need_sync && sp->unsync) | 1406 | if (!need_sync && sp->unsync) |
1407 | need_sync = true; | 1407 | need_sync = true; |
1408 | 1408 | ||
1409 | if (sp->role.word != role.word) | 1409 | if (sp->role.word != role.word) |
1410 | continue; | 1410 | continue; |
1411 | 1411 | ||
1412 | if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) | 1412 | if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) |
1413 | break; | 1413 | break; |
1414 | 1414 | ||
1415 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1415 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1416 | if (sp->unsync_children) { | 1416 | if (sp->unsync_children) { |
1417 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); | 1417 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); |
1418 | kvm_mmu_mark_parents_unsync(sp); | 1418 | kvm_mmu_mark_parents_unsync(sp); |
1419 | } else if (sp->unsync) | 1419 | } else if (sp->unsync) |
1420 | kvm_mmu_mark_parents_unsync(sp); | 1420 | kvm_mmu_mark_parents_unsync(sp); |
1421 | 1421 | ||
1422 | trace_kvm_mmu_get_page(sp, false); | 1422 | trace_kvm_mmu_get_page(sp, false); |
1423 | return sp; | 1423 | return sp; |
1424 | } | 1424 | } |
1425 | ++vcpu->kvm->stat.mmu_cache_miss; | 1425 | ++vcpu->kvm->stat.mmu_cache_miss; |
1426 | sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); | 1426 | sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); |
1427 | if (!sp) | 1427 | if (!sp) |
1428 | return sp; | 1428 | return sp; |
1429 | sp->gfn = gfn; | 1429 | sp->gfn = gfn; |
1430 | sp->role = role; | 1430 | sp->role = role; |
1431 | hlist_add_head(&sp->hash_link, | 1431 | hlist_add_head(&sp->hash_link, |
1432 | &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); | 1432 | &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); |
1433 | if (!direct) { | 1433 | if (!direct) { |
1434 | if (rmap_write_protect(vcpu->kvm, gfn)) | 1434 | if (rmap_write_protect(vcpu->kvm, gfn)) |
1435 | kvm_flush_remote_tlbs(vcpu->kvm); | 1435 | kvm_flush_remote_tlbs(vcpu->kvm); |
1436 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) | 1436 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) |
1437 | kvm_sync_pages(vcpu, gfn); | 1437 | kvm_sync_pages(vcpu, gfn); |
1438 | 1438 | ||
1439 | account_shadowed(vcpu->kvm, gfn); | 1439 | account_shadowed(vcpu->kvm, gfn); |
1440 | } | 1440 | } |
1441 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1441 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
1442 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | 1442 | vcpu->arch.mmu.prefetch_page(vcpu, sp); |
1443 | else | 1443 | else |
1444 | nonpaging_prefetch_page(vcpu, sp); | 1444 | nonpaging_prefetch_page(vcpu, sp); |
1445 | trace_kvm_mmu_get_page(sp, true); | 1445 | trace_kvm_mmu_get_page(sp, true); |
1446 | return sp; | 1446 | return sp; |
1447 | } | 1447 | } |
1448 | 1448 | ||
1449 | static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, | 1449 | static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, |
1450 | struct kvm_vcpu *vcpu, u64 addr) | 1450 | struct kvm_vcpu *vcpu, u64 addr) |
1451 | { | 1451 | { |
1452 | iterator->addr = addr; | 1452 | iterator->addr = addr; |
1453 | iterator->shadow_addr = vcpu->arch.mmu.root_hpa; | 1453 | iterator->shadow_addr = vcpu->arch.mmu.root_hpa; |
1454 | iterator->level = vcpu->arch.mmu.shadow_root_level; | 1454 | iterator->level = vcpu->arch.mmu.shadow_root_level; |
1455 | if (iterator->level == PT32E_ROOT_LEVEL) { | 1455 | if (iterator->level == PT32E_ROOT_LEVEL) { |
1456 | iterator->shadow_addr | 1456 | iterator->shadow_addr |
1457 | = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; | 1457 | = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; |
1458 | iterator->shadow_addr &= PT64_BASE_ADDR_MASK; | 1458 | iterator->shadow_addr &= PT64_BASE_ADDR_MASK; |
1459 | --iterator->level; | 1459 | --iterator->level; |
1460 | if (!iterator->shadow_addr) | 1460 | if (!iterator->shadow_addr) |
1461 | iterator->level = 0; | 1461 | iterator->level = 0; |
1462 | } | 1462 | } |
1463 | } | 1463 | } |
1464 | 1464 | ||
1465 | static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) | 1465 | static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) |
1466 | { | 1466 | { |
1467 | if (iterator->level < PT_PAGE_TABLE_LEVEL) | 1467 | if (iterator->level < PT_PAGE_TABLE_LEVEL) |
1468 | return false; | 1468 | return false; |
1469 | 1469 | ||
1470 | if (iterator->level == PT_PAGE_TABLE_LEVEL) | 1470 | if (iterator->level == PT_PAGE_TABLE_LEVEL) |
1471 | if (is_large_pte(*iterator->sptep)) | 1471 | if (is_large_pte(*iterator->sptep)) |
1472 | return false; | 1472 | return false; |
1473 | 1473 | ||
1474 | iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level); | 1474 | iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level); |
1475 | iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index; | 1475 | iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index; |
1476 | return true; | 1476 | return true; |
1477 | } | 1477 | } |
1478 | 1478 | ||
1479 | static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | 1479 | static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) |
1480 | { | 1480 | { |
1481 | iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK; | 1481 | iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK; |
1482 | --iterator->level; | 1482 | --iterator->level; |
1483 | } | 1483 | } |
1484 | 1484 | ||
1485 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1485 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
1486 | struct kvm_mmu_page *sp) | 1486 | struct kvm_mmu_page *sp) |
1487 | { | 1487 | { |
1488 | unsigned i; | 1488 | unsigned i; |
1489 | u64 *pt; | 1489 | u64 *pt; |
1490 | u64 ent; | 1490 | u64 ent; |
1491 | 1491 | ||
1492 | pt = sp->spt; | 1492 | pt = sp->spt; |
1493 | 1493 | ||
1494 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 1494 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
1495 | ent = pt[i]; | 1495 | ent = pt[i]; |
1496 | 1496 | ||
1497 | if (is_shadow_present_pte(ent)) { | 1497 | if (is_shadow_present_pte(ent)) { |
1498 | if (!is_last_spte(ent, sp->role.level)) { | 1498 | if (!is_last_spte(ent, sp->role.level)) { |
1499 | ent &= PT64_BASE_ADDR_MASK; | 1499 | ent &= PT64_BASE_ADDR_MASK; |
1500 | mmu_page_remove_parent_pte(page_header(ent), | 1500 | mmu_page_remove_parent_pte(page_header(ent), |
1501 | &pt[i]); | 1501 | &pt[i]); |
1502 | } else { | 1502 | } else { |
1503 | if (is_large_pte(ent)) | 1503 | if (is_large_pte(ent)) |
1504 | --kvm->stat.lpages; | 1504 | --kvm->stat.lpages; |
1505 | drop_spte(kvm, &pt[i], | 1505 | drop_spte(kvm, &pt[i], |
1506 | shadow_trap_nonpresent_pte); | 1506 | shadow_trap_nonpresent_pte); |
1507 | } | 1507 | } |
1508 | } | 1508 | } |
1509 | pt[i] = shadow_trap_nonpresent_pte; | 1509 | pt[i] = shadow_trap_nonpresent_pte; |
1510 | } | 1510 | } |
1511 | } | 1511 | } |
1512 | 1512 | ||
1513 | static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) | 1513 | static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) |
1514 | { | 1514 | { |
1515 | mmu_page_remove_parent_pte(sp, parent_pte); | 1515 | mmu_page_remove_parent_pte(sp, parent_pte); |
1516 | } | 1516 | } |
1517 | 1517 | ||
1518 | static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm) | 1518 | static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm) |
1519 | { | 1519 | { |
1520 | int i; | 1520 | int i; |
1521 | struct kvm_vcpu *vcpu; | 1521 | struct kvm_vcpu *vcpu; |
1522 | 1522 | ||
1523 | kvm_for_each_vcpu(i, vcpu, kvm) | 1523 | kvm_for_each_vcpu(i, vcpu, kvm) |
1524 | vcpu->arch.last_pte_updated = NULL; | 1524 | vcpu->arch.last_pte_updated = NULL; |
1525 | } | 1525 | } |
1526 | 1526 | ||
1527 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | 1527 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) |
1528 | { | 1528 | { |
1529 | u64 *parent_pte; | 1529 | u64 *parent_pte; |
1530 | 1530 | ||
1531 | while (sp->multimapped || sp->parent_pte) { | 1531 | while (sp->multimapped || sp->parent_pte) { |
1532 | if (!sp->multimapped) | 1532 | if (!sp->multimapped) |
1533 | parent_pte = sp->parent_pte; | 1533 | parent_pte = sp->parent_pte; |
1534 | else { | 1534 | else { |
1535 | struct kvm_pte_chain *chain; | 1535 | struct kvm_pte_chain *chain; |
1536 | 1536 | ||
1537 | chain = container_of(sp->parent_ptes.first, | 1537 | chain = container_of(sp->parent_ptes.first, |
1538 | struct kvm_pte_chain, link); | 1538 | struct kvm_pte_chain, link); |
1539 | parent_pte = chain->parent_ptes[0]; | 1539 | parent_pte = chain->parent_ptes[0]; |
1540 | } | 1540 | } |
1541 | BUG_ON(!parent_pte); | 1541 | BUG_ON(!parent_pte); |
1542 | kvm_mmu_put_page(sp, parent_pte); | 1542 | kvm_mmu_put_page(sp, parent_pte); |
1543 | __set_spte(parent_pte, shadow_trap_nonpresent_pte); | 1543 | __set_spte(parent_pte, shadow_trap_nonpresent_pte); |
1544 | } | 1544 | } |
1545 | } | 1545 | } |
1546 | 1546 | ||
1547 | static int mmu_zap_unsync_children(struct kvm *kvm, | 1547 | static int mmu_zap_unsync_children(struct kvm *kvm, |
1548 | struct kvm_mmu_page *parent, | 1548 | struct kvm_mmu_page *parent, |
1549 | struct list_head *invalid_list) | 1549 | struct list_head *invalid_list) |
1550 | { | 1550 | { |
1551 | int i, zapped = 0; | 1551 | int i, zapped = 0; |
1552 | struct mmu_page_path parents; | 1552 | struct mmu_page_path parents; |
1553 | struct kvm_mmu_pages pages; | 1553 | struct kvm_mmu_pages pages; |
1554 | 1554 | ||
1555 | if (parent->role.level == PT_PAGE_TABLE_LEVEL) | 1555 | if (parent->role.level == PT_PAGE_TABLE_LEVEL) |
1556 | return 0; | 1556 | return 0; |
1557 | 1557 | ||
1558 | kvm_mmu_pages_init(parent, &parents, &pages); | 1558 | kvm_mmu_pages_init(parent, &parents, &pages); |
1559 | while (mmu_unsync_walk(parent, &pages)) { | 1559 | while (mmu_unsync_walk(parent, &pages)) { |
1560 | struct kvm_mmu_page *sp; | 1560 | struct kvm_mmu_page *sp; |
1561 | 1561 | ||
1562 | for_each_sp(pages, sp, parents, i) { | 1562 | for_each_sp(pages, sp, parents, i) { |
1563 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); | 1563 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); |
1564 | mmu_pages_clear_parents(&parents); | 1564 | mmu_pages_clear_parents(&parents); |
1565 | zapped++; | 1565 | zapped++; |
1566 | } | 1566 | } |
1567 | kvm_mmu_pages_init(parent, &parents, &pages); | 1567 | kvm_mmu_pages_init(parent, &parents, &pages); |
1568 | } | 1568 | } |
1569 | 1569 | ||
1570 | return zapped; | 1570 | return zapped; |
1571 | } | 1571 | } |
1572 | 1572 | ||
1573 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | 1573 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
1574 | struct list_head *invalid_list) | 1574 | struct list_head *invalid_list) |
1575 | { | 1575 | { |
1576 | int ret; | 1576 | int ret; |
1577 | 1577 | ||
1578 | trace_kvm_mmu_prepare_zap_page(sp); | 1578 | trace_kvm_mmu_prepare_zap_page(sp); |
1579 | ++kvm->stat.mmu_shadow_zapped; | 1579 | ++kvm->stat.mmu_shadow_zapped; |
1580 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); | 1580 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); |
1581 | kvm_mmu_page_unlink_children(kvm, sp); | 1581 | kvm_mmu_page_unlink_children(kvm, sp); |
1582 | kvm_mmu_unlink_parents(kvm, sp); | 1582 | kvm_mmu_unlink_parents(kvm, sp); |
1583 | if (!sp->role.invalid && !sp->role.direct) | 1583 | if (!sp->role.invalid && !sp->role.direct) |
1584 | unaccount_shadowed(kvm, sp->gfn); | 1584 | unaccount_shadowed(kvm, sp->gfn); |
1585 | if (sp->unsync) | 1585 | if (sp->unsync) |
1586 | kvm_unlink_unsync_page(kvm, sp); | 1586 | kvm_unlink_unsync_page(kvm, sp); |
1587 | if (!sp->root_count) { | 1587 | if (!sp->root_count) { |
1588 | /* Count self */ | 1588 | /* Count self */ |
1589 | ret++; | 1589 | ret++; |
1590 | list_move(&sp->link, invalid_list); | 1590 | list_move(&sp->link, invalid_list); |
1591 | } else { | 1591 | } else { |
1592 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 1592 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
1593 | kvm_reload_remote_mmus(kvm); | 1593 | kvm_reload_remote_mmus(kvm); |
1594 | } | 1594 | } |
1595 | 1595 | ||
1596 | sp->role.invalid = 1; | 1596 | sp->role.invalid = 1; |
1597 | kvm_mmu_reset_last_pte_updated(kvm); | 1597 | kvm_mmu_reset_last_pte_updated(kvm); |
1598 | return ret; | 1598 | return ret; |
1599 | } | 1599 | } |
1600 | 1600 | ||
1601 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 1601 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
1602 | struct list_head *invalid_list) | 1602 | struct list_head *invalid_list) |
1603 | { | 1603 | { |
1604 | struct kvm_mmu_page *sp; | 1604 | struct kvm_mmu_page *sp; |
1605 | 1605 | ||
1606 | if (list_empty(invalid_list)) | 1606 | if (list_empty(invalid_list)) |
1607 | return; | 1607 | return; |
1608 | 1608 | ||
1609 | kvm_flush_remote_tlbs(kvm); | 1609 | kvm_flush_remote_tlbs(kvm); |
1610 | 1610 | ||
1611 | do { | 1611 | do { |
1612 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | 1612 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); |
1613 | WARN_ON(!sp->role.invalid || sp->root_count); | 1613 | WARN_ON(!sp->role.invalid || sp->root_count); |
1614 | kvm_mmu_free_page(kvm, sp); | 1614 | kvm_mmu_free_page(kvm, sp); |
1615 | } while (!list_empty(invalid_list)); | 1615 | } while (!list_empty(invalid_list)); |
1616 | 1616 | ||
1617 | } | 1617 | } |
1618 | 1618 | ||
1619 | /* | 1619 | /* |
1620 | * Changing the number of mmu pages allocated to the vm | 1620 | * Changing the number of mmu pages allocated to the vm |
1621 | * Note: if kvm_nr_mmu_pages is too small, you will get dead lock | 1621 | * Note: if kvm_nr_mmu_pages is too small, you will get dead lock |
1622 | */ | 1622 | */ |
1623 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | 1623 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) |
1624 | { | 1624 | { |
1625 | int used_pages; | 1625 | int used_pages; |
1626 | LIST_HEAD(invalid_list); | 1626 | LIST_HEAD(invalid_list); |
1627 | 1627 | ||
1628 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; | 1628 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; |
1629 | used_pages = max(0, used_pages); | 1629 | used_pages = max(0, used_pages); |
1630 | 1630 | ||
1631 | /* | 1631 | /* |
1632 | * If we set the number of mmu pages to be smaller be than the | 1632 | * If we set the number of mmu pages to be smaller be than the |
1633 | * number of actived pages , we must to free some mmu pages before we | 1633 | * number of actived pages , we must to free some mmu pages before we |
1634 | * change the value | 1634 | * change the value |
1635 | */ | 1635 | */ |
1636 | 1636 | ||
1637 | if (used_pages > kvm_nr_mmu_pages) { | 1637 | if (used_pages > kvm_nr_mmu_pages) { |
1638 | while (used_pages > kvm_nr_mmu_pages && | 1638 | while (used_pages > kvm_nr_mmu_pages && |
1639 | !list_empty(&kvm->arch.active_mmu_pages)) { | 1639 | !list_empty(&kvm->arch.active_mmu_pages)) { |
1640 | struct kvm_mmu_page *page; | 1640 | struct kvm_mmu_page *page; |
1641 | 1641 | ||
1642 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1642 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1643 | struct kvm_mmu_page, link); | 1643 | struct kvm_mmu_page, link); |
1644 | used_pages -= kvm_mmu_prepare_zap_page(kvm, page, | 1644 | used_pages -= kvm_mmu_prepare_zap_page(kvm, page, |
1645 | &invalid_list); | 1645 | &invalid_list); |
1646 | } | 1646 | } |
1647 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 1647 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
1648 | kvm_nr_mmu_pages = used_pages; | 1648 | kvm_nr_mmu_pages = used_pages; |
1649 | kvm->arch.n_free_mmu_pages = 0; | 1649 | kvm->arch.n_free_mmu_pages = 0; |
1650 | } | 1650 | } |
1651 | else | 1651 | else |
1652 | kvm->arch.n_free_mmu_pages += kvm_nr_mmu_pages | 1652 | kvm->arch.n_free_mmu_pages += kvm_nr_mmu_pages |
1653 | - kvm->arch.n_alloc_mmu_pages; | 1653 | - kvm->arch.n_alloc_mmu_pages; |
1654 | 1654 | ||
1655 | kvm->arch.n_alloc_mmu_pages = kvm_nr_mmu_pages; | 1655 | kvm->arch.n_alloc_mmu_pages = kvm_nr_mmu_pages; |
1656 | } | 1656 | } |
1657 | 1657 | ||
1658 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 1658 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
1659 | { | 1659 | { |
1660 | struct kvm_mmu_page *sp; | 1660 | struct kvm_mmu_page *sp; |
1661 | struct hlist_node *node; | 1661 | struct hlist_node *node; |
1662 | LIST_HEAD(invalid_list); | 1662 | LIST_HEAD(invalid_list); |
1663 | int r; | 1663 | int r; |
1664 | 1664 | ||
1665 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); | 1665 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); |
1666 | r = 0; | 1666 | r = 0; |
1667 | 1667 | ||
1668 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 1668 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1669 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1669 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
1670 | sp->role.word); | 1670 | sp->role.word); |
1671 | r = 1; | 1671 | r = 1; |
1672 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | 1672 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
1673 | } | 1673 | } |
1674 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 1674 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
1675 | return r; | 1675 | return r; |
1676 | } | 1676 | } |
1677 | 1677 | ||
1678 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | 1678 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) |
1679 | { | 1679 | { |
1680 | struct kvm_mmu_page *sp; | 1680 | struct kvm_mmu_page *sp; |
1681 | struct hlist_node *node; | 1681 | struct hlist_node *node; |
1682 | LIST_HEAD(invalid_list); | 1682 | LIST_HEAD(invalid_list); |
1683 | 1683 | ||
1684 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { | 1684 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1685 | pgprintk("%s: zap %lx %x\n", | 1685 | pgprintk("%s: zap %lx %x\n", |
1686 | __func__, gfn, sp->role.word); | 1686 | __func__, gfn, sp->role.word); |
1687 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); | 1687 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
1688 | } | 1688 | } |
1689 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 1689 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
1690 | } | 1690 | } |
1691 | 1691 | ||
1692 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 1692 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
1693 | { | 1693 | { |
1694 | int slot = memslot_id(kvm, gfn); | 1694 | int slot = memslot_id(kvm, gfn); |
1695 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | 1695 | struct kvm_mmu_page *sp = page_header(__pa(pte)); |
1696 | 1696 | ||
1697 | __set_bit(slot, sp->slot_bitmap); | 1697 | __set_bit(slot, sp->slot_bitmap); |
1698 | } | 1698 | } |
1699 | 1699 | ||
1700 | static void mmu_convert_notrap(struct kvm_mmu_page *sp) | 1700 | static void mmu_convert_notrap(struct kvm_mmu_page *sp) |
1701 | { | 1701 | { |
1702 | int i; | 1702 | int i; |
1703 | u64 *pt = sp->spt; | 1703 | u64 *pt = sp->spt; |
1704 | 1704 | ||
1705 | if (shadow_trap_nonpresent_pte == shadow_notrap_nonpresent_pte) | 1705 | if (shadow_trap_nonpresent_pte == shadow_notrap_nonpresent_pte) |
1706 | return; | 1706 | return; |
1707 | 1707 | ||
1708 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 1708 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
1709 | if (pt[i] == shadow_notrap_nonpresent_pte) | 1709 | if (pt[i] == shadow_notrap_nonpresent_pte) |
1710 | __set_spte(&pt[i], shadow_trap_nonpresent_pte); | 1710 | __set_spte(&pt[i], shadow_trap_nonpresent_pte); |
1711 | } | 1711 | } |
1712 | } | 1712 | } |
1713 | 1713 | ||
1714 | /* | 1714 | /* |
1715 | * The function is based on mtrr_type_lookup() in | 1715 | * The function is based on mtrr_type_lookup() in |
1716 | * arch/x86/kernel/cpu/mtrr/generic.c | 1716 | * arch/x86/kernel/cpu/mtrr/generic.c |
1717 | */ | 1717 | */ |
1718 | static int get_mtrr_type(struct mtrr_state_type *mtrr_state, | 1718 | static int get_mtrr_type(struct mtrr_state_type *mtrr_state, |
1719 | u64 start, u64 end) | 1719 | u64 start, u64 end) |
1720 | { | 1720 | { |
1721 | int i; | 1721 | int i; |
1722 | u64 base, mask; | 1722 | u64 base, mask; |
1723 | u8 prev_match, curr_match; | 1723 | u8 prev_match, curr_match; |
1724 | int num_var_ranges = KVM_NR_VAR_MTRR; | 1724 | int num_var_ranges = KVM_NR_VAR_MTRR; |
1725 | 1725 | ||
1726 | if (!mtrr_state->enabled) | 1726 | if (!mtrr_state->enabled) |
1727 | return 0xFF; | 1727 | return 0xFF; |
1728 | 1728 | ||
1729 | /* Make end inclusive end, instead of exclusive */ | 1729 | /* Make end inclusive end, instead of exclusive */ |
1730 | end--; | 1730 | end--; |
1731 | 1731 | ||
1732 | /* Look in fixed ranges. Just return the type as per start */ | 1732 | /* Look in fixed ranges. Just return the type as per start */ |
1733 | if (mtrr_state->have_fixed && (start < 0x100000)) { | 1733 | if (mtrr_state->have_fixed && (start < 0x100000)) { |
1734 | int idx; | 1734 | int idx; |
1735 | 1735 | ||
1736 | if (start < 0x80000) { | 1736 | if (start < 0x80000) { |
1737 | idx = 0; | 1737 | idx = 0; |
1738 | idx += (start >> 16); | 1738 | idx += (start >> 16); |
1739 | return mtrr_state->fixed_ranges[idx]; | 1739 | return mtrr_state->fixed_ranges[idx]; |
1740 | } else if (start < 0xC0000) { | 1740 | } else if (start < 0xC0000) { |
1741 | idx = 1 * 8; | 1741 | idx = 1 * 8; |
1742 | idx += ((start - 0x80000) >> 14); | 1742 | idx += ((start - 0x80000) >> 14); |
1743 | return mtrr_state->fixed_ranges[idx]; | 1743 | return mtrr_state->fixed_ranges[idx]; |
1744 | } else if (start < 0x1000000) { | 1744 | } else if (start < 0x1000000) { |
1745 | idx = 3 * 8; | 1745 | idx = 3 * 8; |
1746 | idx += ((start - 0xC0000) >> 12); | 1746 | idx += ((start - 0xC0000) >> 12); |
1747 | return mtrr_state->fixed_ranges[idx]; | 1747 | return mtrr_state->fixed_ranges[idx]; |
1748 | } | 1748 | } |
1749 | } | 1749 | } |
1750 | 1750 | ||
1751 | /* | 1751 | /* |
1752 | * Look in variable ranges | 1752 | * Look in variable ranges |
1753 | * Look of multiple ranges matching this address and pick type | 1753 | * Look of multiple ranges matching this address and pick type |
1754 | * as per MTRR precedence | 1754 | * as per MTRR precedence |
1755 | */ | 1755 | */ |
1756 | if (!(mtrr_state->enabled & 2)) | 1756 | if (!(mtrr_state->enabled & 2)) |
1757 | return mtrr_state->def_type; | 1757 | return mtrr_state->def_type; |
1758 | 1758 | ||
1759 | prev_match = 0xFF; | 1759 | prev_match = 0xFF; |
1760 | for (i = 0; i < num_var_ranges; ++i) { | 1760 | for (i = 0; i < num_var_ranges; ++i) { |
1761 | unsigned short start_state, end_state; | 1761 | unsigned short start_state, end_state; |
1762 | 1762 | ||
1763 | if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11))) | 1763 | if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11))) |
1764 | continue; | 1764 | continue; |
1765 | 1765 | ||
1766 | base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) + | 1766 | base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) + |
1767 | (mtrr_state->var_ranges[i].base_lo & PAGE_MASK); | 1767 | (mtrr_state->var_ranges[i].base_lo & PAGE_MASK); |
1768 | mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) + | 1768 | mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) + |
1769 | (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK); | 1769 | (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK); |
1770 | 1770 | ||
1771 | start_state = ((start & mask) == (base & mask)); | 1771 | start_state = ((start & mask) == (base & mask)); |
1772 | end_state = ((end & mask) == (base & mask)); | 1772 | end_state = ((end & mask) == (base & mask)); |
1773 | if (start_state != end_state) | 1773 | if (start_state != end_state) |
1774 | return 0xFE; | 1774 | return 0xFE; |
1775 | 1775 | ||
1776 | if ((start & mask) != (base & mask)) | 1776 | if ((start & mask) != (base & mask)) |
1777 | continue; | 1777 | continue; |
1778 | 1778 | ||
1779 | curr_match = mtrr_state->var_ranges[i].base_lo & 0xff; | 1779 | curr_match = mtrr_state->var_ranges[i].base_lo & 0xff; |
1780 | if (prev_match == 0xFF) { | 1780 | if (prev_match == 0xFF) { |
1781 | prev_match = curr_match; | 1781 | prev_match = curr_match; |
1782 | continue; | 1782 | continue; |
1783 | } | 1783 | } |
1784 | 1784 | ||
1785 | if (prev_match == MTRR_TYPE_UNCACHABLE || | 1785 | if (prev_match == MTRR_TYPE_UNCACHABLE || |
1786 | curr_match == MTRR_TYPE_UNCACHABLE) | 1786 | curr_match == MTRR_TYPE_UNCACHABLE) |
1787 | return MTRR_TYPE_UNCACHABLE; | 1787 | return MTRR_TYPE_UNCACHABLE; |
1788 | 1788 | ||
1789 | if ((prev_match == MTRR_TYPE_WRBACK && | 1789 | if ((prev_match == MTRR_TYPE_WRBACK && |
1790 | curr_match == MTRR_TYPE_WRTHROUGH) || | 1790 | curr_match == MTRR_TYPE_WRTHROUGH) || |
1791 | (prev_match == MTRR_TYPE_WRTHROUGH && | 1791 | (prev_match == MTRR_TYPE_WRTHROUGH && |
1792 | curr_match == MTRR_TYPE_WRBACK)) { | 1792 | curr_match == MTRR_TYPE_WRBACK)) { |
1793 | prev_match = MTRR_TYPE_WRTHROUGH; | 1793 | prev_match = MTRR_TYPE_WRTHROUGH; |
1794 | curr_match = MTRR_TYPE_WRTHROUGH; | 1794 | curr_match = MTRR_TYPE_WRTHROUGH; |
1795 | } | 1795 | } |
1796 | 1796 | ||
1797 | if (prev_match != curr_match) | 1797 | if (prev_match != curr_match) |
1798 | return MTRR_TYPE_UNCACHABLE; | 1798 | return MTRR_TYPE_UNCACHABLE; |
1799 | } | 1799 | } |
1800 | 1800 | ||
1801 | if (prev_match != 0xFF) | 1801 | if (prev_match != 0xFF) |
1802 | return prev_match; | 1802 | return prev_match; |
1803 | 1803 | ||
1804 | return mtrr_state->def_type; | 1804 | return mtrr_state->def_type; |
1805 | } | 1805 | } |
1806 | 1806 | ||
1807 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | 1807 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) |
1808 | { | 1808 | { |
1809 | u8 mtrr; | 1809 | u8 mtrr; |
1810 | 1810 | ||
1811 | mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT, | 1811 | mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT, |
1812 | (gfn << PAGE_SHIFT) + PAGE_SIZE); | 1812 | (gfn << PAGE_SHIFT) + PAGE_SIZE); |
1813 | if (mtrr == 0xfe || mtrr == 0xff) | 1813 | if (mtrr == 0xfe || mtrr == 0xff) |
1814 | mtrr = MTRR_TYPE_WRBACK; | 1814 | mtrr = MTRR_TYPE_WRBACK; |
1815 | return mtrr; | 1815 | return mtrr; |
1816 | } | 1816 | } |
1817 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); | 1817 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); |
1818 | 1818 | ||
1819 | static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1819 | static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1820 | { | 1820 | { |
1821 | trace_kvm_mmu_unsync_page(sp); | 1821 | trace_kvm_mmu_unsync_page(sp); |
1822 | ++vcpu->kvm->stat.mmu_unsync; | 1822 | ++vcpu->kvm->stat.mmu_unsync; |
1823 | sp->unsync = 1; | 1823 | sp->unsync = 1; |
1824 | 1824 | ||
1825 | kvm_mmu_mark_parents_unsync(sp); | 1825 | kvm_mmu_mark_parents_unsync(sp); |
1826 | mmu_convert_notrap(sp); | 1826 | mmu_convert_notrap(sp); |
1827 | } | 1827 | } |
1828 | 1828 | ||
1829 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | 1829 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) |
1830 | { | 1830 | { |
1831 | struct kvm_mmu_page *s; | 1831 | struct kvm_mmu_page *s; |
1832 | struct hlist_node *node; | 1832 | struct hlist_node *node; |
1833 | 1833 | ||
1834 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | 1834 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { |
1835 | if (s->unsync) | 1835 | if (s->unsync) |
1836 | continue; | 1836 | continue; |
1837 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | 1837 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); |
1838 | __kvm_unsync_page(vcpu, s); | 1838 | __kvm_unsync_page(vcpu, s); |
1839 | } | 1839 | } |
1840 | } | 1840 | } |
1841 | 1841 | ||
1842 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | 1842 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, |
1843 | bool can_unsync) | 1843 | bool can_unsync) |
1844 | { | 1844 | { |
1845 | struct kvm_mmu_page *s; | 1845 | struct kvm_mmu_page *s; |
1846 | struct hlist_node *node; | 1846 | struct hlist_node *node; |
1847 | bool need_unsync = false; | 1847 | bool need_unsync = false; |
1848 | 1848 | ||
1849 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | 1849 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { |
1850 | if (!can_unsync) | 1850 | if (!can_unsync) |
1851 | return 1; | 1851 | return 1; |
1852 | 1852 | ||
1853 | if (s->role.level != PT_PAGE_TABLE_LEVEL) | 1853 | if (s->role.level != PT_PAGE_TABLE_LEVEL) |
1854 | return 1; | 1854 | return 1; |
1855 | 1855 | ||
1856 | if (!need_unsync && !s->unsync) { | 1856 | if (!need_unsync && !s->unsync) { |
1857 | if (!oos_shadow) | 1857 | if (!oos_shadow) |
1858 | return 1; | 1858 | return 1; |
1859 | need_unsync = true; | 1859 | need_unsync = true; |
1860 | } | 1860 | } |
1861 | } | 1861 | } |
1862 | if (need_unsync) | 1862 | if (need_unsync) |
1863 | kvm_unsync_pages(vcpu, gfn); | 1863 | kvm_unsync_pages(vcpu, gfn); |
1864 | return 0; | 1864 | return 0; |
1865 | } | 1865 | } |
1866 | 1866 | ||
1867 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 1867 | static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
1868 | unsigned pte_access, int user_fault, | 1868 | unsigned pte_access, int user_fault, |
1869 | int write_fault, int dirty, int level, | 1869 | int write_fault, int dirty, int level, |
1870 | gfn_t gfn, pfn_t pfn, bool speculative, | 1870 | gfn_t gfn, pfn_t pfn, bool speculative, |
1871 | bool can_unsync, bool reset_host_protection) | 1871 | bool can_unsync, bool reset_host_protection) |
1872 | { | 1872 | { |
1873 | u64 spte; | 1873 | u64 spte; |
1874 | int ret = 0; | 1874 | int ret = 0; |
1875 | 1875 | ||
1876 | /* | 1876 | /* |
1877 | * We don't set the accessed bit, since we sometimes want to see | 1877 | * We don't set the accessed bit, since we sometimes want to see |
1878 | * whether the guest actually used the pte (in order to detect | 1878 | * whether the guest actually used the pte (in order to detect |
1879 | * demand paging). | 1879 | * demand paging). |
1880 | */ | 1880 | */ |
1881 | spte = shadow_base_present_pte | shadow_dirty_mask; | 1881 | spte = shadow_base_present_pte | shadow_dirty_mask; |
1882 | if (!speculative) | 1882 | if (!speculative) |
1883 | spte |= shadow_accessed_mask; | 1883 | spte |= shadow_accessed_mask; |
1884 | if (!dirty) | 1884 | if (!dirty) |
1885 | pte_access &= ~ACC_WRITE_MASK; | 1885 | pte_access &= ~ACC_WRITE_MASK; |
1886 | if (pte_access & ACC_EXEC_MASK) | 1886 | if (pte_access & ACC_EXEC_MASK) |
1887 | spte |= shadow_x_mask; | 1887 | spte |= shadow_x_mask; |
1888 | else | 1888 | else |
1889 | spte |= shadow_nx_mask; | 1889 | spte |= shadow_nx_mask; |
1890 | if (pte_access & ACC_USER_MASK) | 1890 | if (pte_access & ACC_USER_MASK) |
1891 | spte |= shadow_user_mask; | 1891 | spte |= shadow_user_mask; |
1892 | if (level > PT_PAGE_TABLE_LEVEL) | 1892 | if (level > PT_PAGE_TABLE_LEVEL) |
1893 | spte |= PT_PAGE_SIZE_MASK; | 1893 | spte |= PT_PAGE_SIZE_MASK; |
1894 | if (tdp_enabled) | 1894 | if (tdp_enabled) |
1895 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 1895 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
1896 | kvm_is_mmio_pfn(pfn)); | 1896 | kvm_is_mmio_pfn(pfn)); |
1897 | 1897 | ||
1898 | if (reset_host_protection) | 1898 | if (reset_host_protection) |
1899 | spte |= SPTE_HOST_WRITEABLE; | 1899 | spte |= SPTE_HOST_WRITEABLE; |
1900 | 1900 | ||
1901 | spte |= (u64)pfn << PAGE_SHIFT; | 1901 | spte |= (u64)pfn << PAGE_SHIFT; |
1902 | 1902 | ||
1903 | if ((pte_access & ACC_WRITE_MASK) | 1903 | if ((pte_access & ACC_WRITE_MASK) |
1904 | || (!tdp_enabled && write_fault && !is_write_protection(vcpu) | 1904 | || (!tdp_enabled && write_fault && !is_write_protection(vcpu) |
1905 | && !user_fault)) { | 1905 | && !user_fault)) { |
1906 | 1906 | ||
1907 | if (level > PT_PAGE_TABLE_LEVEL && | 1907 | if (level > PT_PAGE_TABLE_LEVEL && |
1908 | has_wrprotected_page(vcpu->kvm, gfn, level)) { | 1908 | has_wrprotected_page(vcpu->kvm, gfn, level)) { |
1909 | ret = 1; | 1909 | ret = 1; |
1910 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); | 1910 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); |
1911 | goto done; | 1911 | goto done; |
1912 | } | 1912 | } |
1913 | 1913 | ||
1914 | spte |= PT_WRITABLE_MASK; | 1914 | spte |= PT_WRITABLE_MASK; |
1915 | 1915 | ||
1916 | if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK)) | 1916 | if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK)) |
1917 | spte &= ~PT_USER_MASK; | 1917 | spte &= ~PT_USER_MASK; |
1918 | 1918 | ||
1919 | /* | 1919 | /* |
1920 | * Optimization: for pte sync, if spte was writable the hash | 1920 | * Optimization: for pte sync, if spte was writable the hash |
1921 | * lookup is unnecessary (and expensive). Write protection | 1921 | * lookup is unnecessary (and expensive). Write protection |
1922 | * is responsibility of mmu_get_page / kvm_sync_page. | 1922 | * is responsibility of mmu_get_page / kvm_sync_page. |
1923 | * Same reasoning can be applied to dirty page accounting. | 1923 | * Same reasoning can be applied to dirty page accounting. |
1924 | */ | 1924 | */ |
1925 | if (!can_unsync && is_writable_pte(*sptep)) | 1925 | if (!can_unsync && is_writable_pte(*sptep)) |
1926 | goto set_pte; | 1926 | goto set_pte; |
1927 | 1927 | ||
1928 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { | 1928 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
1929 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 1929 | pgprintk("%s: found shadow page for %lx, marking ro\n", |
1930 | __func__, gfn); | 1930 | __func__, gfn); |
1931 | ret = 1; | 1931 | ret = 1; |
1932 | pte_access &= ~ACC_WRITE_MASK; | 1932 | pte_access &= ~ACC_WRITE_MASK; |
1933 | if (is_writable_pte(spte)) | 1933 | if (is_writable_pte(spte)) |
1934 | spte &= ~PT_WRITABLE_MASK; | 1934 | spte &= ~PT_WRITABLE_MASK; |
1935 | } | 1935 | } |
1936 | } | 1936 | } |
1937 | 1937 | ||
1938 | if (pte_access & ACC_WRITE_MASK) | 1938 | if (pte_access & ACC_WRITE_MASK) |
1939 | mark_page_dirty(vcpu->kvm, gfn); | 1939 | mark_page_dirty(vcpu->kvm, gfn); |
1940 | 1940 | ||
1941 | set_pte: | 1941 | set_pte: |
1942 | update_spte(sptep, spte); | 1942 | update_spte(sptep, spte); |
1943 | done: | 1943 | done: |
1944 | return ret; | 1944 | return ret; |
1945 | } | 1945 | } |
1946 | 1946 | ||
1947 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | 1947 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, |
1948 | unsigned pt_access, unsigned pte_access, | 1948 | unsigned pt_access, unsigned pte_access, |
1949 | int user_fault, int write_fault, int dirty, | 1949 | int user_fault, int write_fault, int dirty, |
1950 | int *ptwrite, int level, gfn_t gfn, | 1950 | int *ptwrite, int level, gfn_t gfn, |
1951 | pfn_t pfn, bool speculative, | 1951 | pfn_t pfn, bool speculative, |
1952 | bool reset_host_protection) | 1952 | bool reset_host_protection) |
1953 | { | 1953 | { |
1954 | int was_rmapped = 0; | 1954 | int was_rmapped = 0; |
1955 | int was_writable = is_writable_pte(*sptep); | 1955 | int was_writable = is_writable_pte(*sptep); |
1956 | int rmap_count; | 1956 | int rmap_count; |
1957 | 1957 | ||
1958 | pgprintk("%s: spte %llx access %x write_fault %d" | 1958 | pgprintk("%s: spte %llx access %x write_fault %d" |
1959 | " user_fault %d gfn %lx\n", | 1959 | " user_fault %d gfn %lx\n", |
1960 | __func__, *sptep, pt_access, | 1960 | __func__, *sptep, pt_access, |
1961 | write_fault, user_fault, gfn); | 1961 | write_fault, user_fault, gfn); |
1962 | 1962 | ||
1963 | if (is_rmap_spte(*sptep)) { | 1963 | if (is_rmap_spte(*sptep)) { |
1964 | /* | 1964 | /* |
1965 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink | 1965 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink |
1966 | * the parent of the now unreachable PTE. | 1966 | * the parent of the now unreachable PTE. |
1967 | */ | 1967 | */ |
1968 | if (level > PT_PAGE_TABLE_LEVEL && | 1968 | if (level > PT_PAGE_TABLE_LEVEL && |
1969 | !is_large_pte(*sptep)) { | 1969 | !is_large_pte(*sptep)) { |
1970 | struct kvm_mmu_page *child; | 1970 | struct kvm_mmu_page *child; |
1971 | u64 pte = *sptep; | 1971 | u64 pte = *sptep; |
1972 | 1972 | ||
1973 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1973 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
1974 | mmu_page_remove_parent_pte(child, sptep); | 1974 | mmu_page_remove_parent_pte(child, sptep); |
1975 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 1975 | __set_spte(sptep, shadow_trap_nonpresent_pte); |
1976 | kvm_flush_remote_tlbs(vcpu->kvm); | 1976 | kvm_flush_remote_tlbs(vcpu->kvm); |
1977 | } else if (pfn != spte_to_pfn(*sptep)) { | 1977 | } else if (pfn != spte_to_pfn(*sptep)) { |
1978 | pgprintk("hfn old %lx new %lx\n", | 1978 | pgprintk("hfn old %lx new %lx\n", |
1979 | spte_to_pfn(*sptep), pfn); | 1979 | spte_to_pfn(*sptep), pfn); |
1980 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); | 1980 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); |
1981 | kvm_flush_remote_tlbs(vcpu->kvm); | 1981 | kvm_flush_remote_tlbs(vcpu->kvm); |
1982 | } else | 1982 | } else |
1983 | was_rmapped = 1; | 1983 | was_rmapped = 1; |
1984 | } | 1984 | } |
1985 | 1985 | ||
1986 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 1986 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
1987 | dirty, level, gfn, pfn, speculative, true, | 1987 | dirty, level, gfn, pfn, speculative, true, |
1988 | reset_host_protection)) { | 1988 | reset_host_protection)) { |
1989 | if (write_fault) | 1989 | if (write_fault) |
1990 | *ptwrite = 1; | 1990 | *ptwrite = 1; |
1991 | kvm_mmu_flush_tlb(vcpu); | 1991 | kvm_mmu_flush_tlb(vcpu); |
1992 | } | 1992 | } |
1993 | 1993 | ||
1994 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); | 1994 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); |
1995 | pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", | 1995 | pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", |
1996 | is_large_pte(*sptep)? "2MB" : "4kB", | 1996 | is_large_pte(*sptep)? "2MB" : "4kB", |
1997 | *sptep & PT_PRESENT_MASK ?"RW":"R", gfn, | 1997 | *sptep & PT_PRESENT_MASK ?"RW":"R", gfn, |
1998 | *sptep, sptep); | 1998 | *sptep, sptep); |
1999 | if (!was_rmapped && is_large_pte(*sptep)) | 1999 | if (!was_rmapped && is_large_pte(*sptep)) |
2000 | ++vcpu->kvm->stat.lpages; | 2000 | ++vcpu->kvm->stat.lpages; |
2001 | 2001 | ||
2002 | page_header_update_slot(vcpu->kvm, sptep, gfn); | 2002 | page_header_update_slot(vcpu->kvm, sptep, gfn); |
2003 | if (!was_rmapped) { | 2003 | if (!was_rmapped) { |
2004 | rmap_count = rmap_add(vcpu, sptep, gfn); | 2004 | rmap_count = rmap_add(vcpu, sptep, gfn); |
2005 | kvm_release_pfn_clean(pfn); | 2005 | kvm_release_pfn_clean(pfn); |
2006 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 2006 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
2007 | rmap_recycle(vcpu, sptep, gfn); | 2007 | rmap_recycle(vcpu, sptep, gfn); |
2008 | } else { | 2008 | } else { |
2009 | if (was_writable) | 2009 | if (was_writable) |
2010 | kvm_release_pfn_dirty(pfn); | 2010 | kvm_release_pfn_dirty(pfn); |
2011 | else | 2011 | else |
2012 | kvm_release_pfn_clean(pfn); | 2012 | kvm_release_pfn_clean(pfn); |
2013 | } | 2013 | } |
2014 | if (speculative) { | 2014 | if (speculative) { |
2015 | vcpu->arch.last_pte_updated = sptep; | 2015 | vcpu->arch.last_pte_updated = sptep; |
2016 | vcpu->arch.last_pte_gfn = gfn; | 2016 | vcpu->arch.last_pte_gfn = gfn; |
2017 | } | 2017 | } |
2018 | } | 2018 | } |
2019 | 2019 | ||
2020 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 2020 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
2021 | { | 2021 | { |
2022 | } | 2022 | } |
2023 | 2023 | ||
2024 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 2024 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
2025 | int level, gfn_t gfn, pfn_t pfn) | 2025 | int level, gfn_t gfn, pfn_t pfn) |
2026 | { | 2026 | { |
2027 | struct kvm_shadow_walk_iterator iterator; | 2027 | struct kvm_shadow_walk_iterator iterator; |
2028 | struct kvm_mmu_page *sp; | 2028 | struct kvm_mmu_page *sp; |
2029 | int pt_write = 0; | 2029 | int pt_write = 0; |
2030 | gfn_t pseudo_gfn; | 2030 | gfn_t pseudo_gfn; |
2031 | 2031 | ||
2032 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2032 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
2033 | if (iterator.level == level) { | 2033 | if (iterator.level == level) { |
2034 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 2034 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
2035 | 0, write, 1, &pt_write, | 2035 | 0, write, 1, &pt_write, |
2036 | level, gfn, pfn, false, true); | 2036 | level, gfn, pfn, false, true); |
2037 | ++vcpu->stat.pf_fixed; | 2037 | ++vcpu->stat.pf_fixed; |
2038 | break; | 2038 | break; |
2039 | } | 2039 | } |
2040 | 2040 | ||
2041 | if (*iterator.sptep == shadow_trap_nonpresent_pte) { | 2041 | if (*iterator.sptep == shadow_trap_nonpresent_pte) { |
2042 | u64 base_addr = iterator.addr; | 2042 | u64 base_addr = iterator.addr; |
2043 | 2043 | ||
2044 | base_addr &= PT64_LVL_ADDR_MASK(iterator.level); | 2044 | base_addr &= PT64_LVL_ADDR_MASK(iterator.level); |
2045 | pseudo_gfn = base_addr >> PAGE_SHIFT; | 2045 | pseudo_gfn = base_addr >> PAGE_SHIFT; |
2046 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, | 2046 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, |
2047 | iterator.level - 1, | 2047 | iterator.level - 1, |
2048 | 1, ACC_ALL, iterator.sptep); | 2048 | 1, ACC_ALL, iterator.sptep); |
2049 | if (!sp) { | 2049 | if (!sp) { |
2050 | pgprintk("nonpaging_map: ENOMEM\n"); | 2050 | pgprintk("nonpaging_map: ENOMEM\n"); |
2051 | kvm_release_pfn_clean(pfn); | 2051 | kvm_release_pfn_clean(pfn); |
2052 | return -ENOMEM; | 2052 | return -ENOMEM; |
2053 | } | 2053 | } |
2054 | 2054 | ||
2055 | __set_spte(iterator.sptep, | 2055 | __set_spte(iterator.sptep, |
2056 | __pa(sp->spt) | 2056 | __pa(sp->spt) |
2057 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | 2057 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
2058 | | shadow_user_mask | shadow_x_mask); | 2058 | | shadow_user_mask | shadow_x_mask); |
2059 | } | 2059 | } |
2060 | } | 2060 | } |
2061 | return pt_write; | 2061 | return pt_write; |
2062 | } | 2062 | } |
2063 | 2063 | ||
2064 | static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn) | 2064 | static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn) |
2065 | { | 2065 | { |
2066 | char buf[1]; | 2066 | char buf[1]; |
2067 | void __user *hva; | 2067 | void __user *hva; |
2068 | int r; | 2068 | int r; |
2069 | 2069 | ||
2070 | /* Touch the page, so send SIGBUS */ | 2070 | /* Touch the page, so send SIGBUS */ |
2071 | hva = (void __user *)gfn_to_hva(kvm, gfn); | 2071 | hva = (void __user *)gfn_to_hva(kvm, gfn); |
2072 | r = copy_from_user(buf, hva, 1); | 2072 | r = copy_from_user(buf, hva, 1); |
2073 | } | 2073 | } |
2074 | 2074 | ||
2075 | static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | 2075 | static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) |
2076 | { | 2076 | { |
2077 | kvm_release_pfn_clean(pfn); | 2077 | kvm_release_pfn_clean(pfn); |
2078 | if (is_hwpoison_pfn(pfn)) { | 2078 | if (is_hwpoison_pfn(pfn)) { |
2079 | kvm_send_hwpoison_signal(kvm, gfn); | 2079 | kvm_send_hwpoison_signal(kvm, gfn); |
2080 | return 0; | 2080 | return 0; |
2081 | } | 2081 | } else if (is_fault_pfn(pfn)) |
2082 | return -EFAULT; | ||
2083 | |||
2082 | return 1; | 2084 | return 1; |
2083 | } | 2085 | } |
2084 | 2086 | ||
2085 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 2087 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
2086 | { | 2088 | { |
2087 | int r; | 2089 | int r; |
2088 | int level; | 2090 | int level; |
2089 | pfn_t pfn; | 2091 | pfn_t pfn; |
2090 | unsigned long mmu_seq; | 2092 | unsigned long mmu_seq; |
2091 | 2093 | ||
2092 | level = mapping_level(vcpu, gfn); | 2094 | level = mapping_level(vcpu, gfn); |
2093 | 2095 | ||
2094 | /* | 2096 | /* |
2095 | * This path builds a PAE pagetable - so we can map 2mb pages at | 2097 | * This path builds a PAE pagetable - so we can map 2mb pages at |
2096 | * maximum. Therefore check if the level is larger than that. | 2098 | * maximum. Therefore check if the level is larger than that. |
2097 | */ | 2099 | */ |
2098 | if (level > PT_DIRECTORY_LEVEL) | 2100 | if (level > PT_DIRECTORY_LEVEL) |
2099 | level = PT_DIRECTORY_LEVEL; | 2101 | level = PT_DIRECTORY_LEVEL; |
2100 | 2102 | ||
2101 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 2103 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
2102 | 2104 | ||
2103 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2105 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2104 | smp_rmb(); | 2106 | smp_rmb(); |
2105 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2107 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
2106 | 2108 | ||
2107 | /* mmio */ | 2109 | /* mmio */ |
2108 | if (is_error_pfn(pfn)) | 2110 | if (is_error_pfn(pfn)) |
2109 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); | 2111 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
2110 | 2112 | ||
2111 | spin_lock(&vcpu->kvm->mmu_lock); | 2113 | spin_lock(&vcpu->kvm->mmu_lock); |
2112 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2114 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2113 | goto out_unlock; | 2115 | goto out_unlock; |
2114 | kvm_mmu_free_some_pages(vcpu); | 2116 | kvm_mmu_free_some_pages(vcpu); |
2115 | r = __direct_map(vcpu, v, write, level, gfn, pfn); | 2117 | r = __direct_map(vcpu, v, write, level, gfn, pfn); |
2116 | spin_unlock(&vcpu->kvm->mmu_lock); | 2118 | spin_unlock(&vcpu->kvm->mmu_lock); |
2117 | 2119 | ||
2118 | 2120 | ||
2119 | return r; | 2121 | return r; |
2120 | 2122 | ||
2121 | out_unlock: | 2123 | out_unlock: |
2122 | spin_unlock(&vcpu->kvm->mmu_lock); | 2124 | spin_unlock(&vcpu->kvm->mmu_lock); |
2123 | kvm_release_pfn_clean(pfn); | 2125 | kvm_release_pfn_clean(pfn); |
2124 | return 0; | 2126 | return 0; |
2125 | } | 2127 | } |
2126 | 2128 | ||
2127 | 2129 | ||
2128 | static void mmu_free_roots(struct kvm_vcpu *vcpu) | 2130 | static void mmu_free_roots(struct kvm_vcpu *vcpu) |
2129 | { | 2131 | { |
2130 | int i; | 2132 | int i; |
2131 | struct kvm_mmu_page *sp; | 2133 | struct kvm_mmu_page *sp; |
2132 | LIST_HEAD(invalid_list); | 2134 | LIST_HEAD(invalid_list); |
2133 | 2135 | ||
2134 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2136 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2135 | return; | 2137 | return; |
2136 | spin_lock(&vcpu->kvm->mmu_lock); | 2138 | spin_lock(&vcpu->kvm->mmu_lock); |
2137 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 2139 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
2138 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2140 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2139 | 2141 | ||
2140 | sp = page_header(root); | 2142 | sp = page_header(root); |
2141 | --sp->root_count; | 2143 | --sp->root_count; |
2142 | if (!sp->root_count && sp->role.invalid) { | 2144 | if (!sp->root_count && sp->role.invalid) { |
2143 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); | 2145 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); |
2144 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 2146 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
2145 | } | 2147 | } |
2146 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 2148 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
2147 | spin_unlock(&vcpu->kvm->mmu_lock); | 2149 | spin_unlock(&vcpu->kvm->mmu_lock); |
2148 | return; | 2150 | return; |
2149 | } | 2151 | } |
2150 | for (i = 0; i < 4; ++i) { | 2152 | for (i = 0; i < 4; ++i) { |
2151 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2153 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
2152 | 2154 | ||
2153 | if (root) { | 2155 | if (root) { |
2154 | root &= PT64_BASE_ADDR_MASK; | 2156 | root &= PT64_BASE_ADDR_MASK; |
2155 | sp = page_header(root); | 2157 | sp = page_header(root); |
2156 | --sp->root_count; | 2158 | --sp->root_count; |
2157 | if (!sp->root_count && sp->role.invalid) | 2159 | if (!sp->root_count && sp->role.invalid) |
2158 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 2160 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
2159 | &invalid_list); | 2161 | &invalid_list); |
2160 | } | 2162 | } |
2161 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 2163 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
2162 | } | 2164 | } |
2163 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 2165 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
2164 | spin_unlock(&vcpu->kvm->mmu_lock); | 2166 | spin_unlock(&vcpu->kvm->mmu_lock); |
2165 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 2167 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
2166 | } | 2168 | } |
2167 | 2169 | ||
2168 | static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) | 2170 | static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) |
2169 | { | 2171 | { |
2170 | int ret = 0; | 2172 | int ret = 0; |
2171 | 2173 | ||
2172 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { | 2174 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { |
2173 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 2175 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2174 | ret = 1; | 2176 | ret = 1; |
2175 | } | 2177 | } |
2176 | 2178 | ||
2177 | return ret; | 2179 | return ret; |
2178 | } | 2180 | } |
2179 | 2181 | ||
2180 | static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | 2182 | static int mmu_alloc_roots(struct kvm_vcpu *vcpu) |
2181 | { | 2183 | { |
2182 | int i; | 2184 | int i; |
2183 | gfn_t root_gfn; | 2185 | gfn_t root_gfn; |
2184 | struct kvm_mmu_page *sp; | 2186 | struct kvm_mmu_page *sp; |
2185 | int direct = 0; | 2187 | int direct = 0; |
2186 | u64 pdptr; | 2188 | u64 pdptr; |
2187 | 2189 | ||
2188 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; | 2190 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; |
2189 | 2191 | ||
2190 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 2192 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
2191 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2193 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2192 | 2194 | ||
2193 | ASSERT(!VALID_PAGE(root)); | 2195 | ASSERT(!VALID_PAGE(root)); |
2194 | if (mmu_check_root(vcpu, root_gfn)) | 2196 | if (mmu_check_root(vcpu, root_gfn)) |
2195 | return 1; | 2197 | return 1; |
2196 | if (tdp_enabled) { | 2198 | if (tdp_enabled) { |
2197 | direct = 1; | 2199 | direct = 1; |
2198 | root_gfn = 0; | 2200 | root_gfn = 0; |
2199 | } | 2201 | } |
2200 | spin_lock(&vcpu->kvm->mmu_lock); | 2202 | spin_lock(&vcpu->kvm->mmu_lock); |
2201 | kvm_mmu_free_some_pages(vcpu); | 2203 | kvm_mmu_free_some_pages(vcpu); |
2202 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2204 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
2203 | PT64_ROOT_LEVEL, direct, | 2205 | PT64_ROOT_LEVEL, direct, |
2204 | ACC_ALL, NULL); | 2206 | ACC_ALL, NULL); |
2205 | root = __pa(sp->spt); | 2207 | root = __pa(sp->spt); |
2206 | ++sp->root_count; | 2208 | ++sp->root_count; |
2207 | spin_unlock(&vcpu->kvm->mmu_lock); | 2209 | spin_unlock(&vcpu->kvm->mmu_lock); |
2208 | vcpu->arch.mmu.root_hpa = root; | 2210 | vcpu->arch.mmu.root_hpa = root; |
2209 | return 0; | 2211 | return 0; |
2210 | } | 2212 | } |
2211 | direct = !is_paging(vcpu); | 2213 | direct = !is_paging(vcpu); |
2212 | for (i = 0; i < 4; ++i) { | 2214 | for (i = 0; i < 4; ++i) { |
2213 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2215 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
2214 | 2216 | ||
2215 | ASSERT(!VALID_PAGE(root)); | 2217 | ASSERT(!VALID_PAGE(root)); |
2216 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { | 2218 | if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { |
2217 | pdptr = kvm_pdptr_read(vcpu, i); | 2219 | pdptr = kvm_pdptr_read(vcpu, i); |
2218 | if (!is_present_gpte(pdptr)) { | 2220 | if (!is_present_gpte(pdptr)) { |
2219 | vcpu->arch.mmu.pae_root[i] = 0; | 2221 | vcpu->arch.mmu.pae_root[i] = 0; |
2220 | continue; | 2222 | continue; |
2221 | } | 2223 | } |
2222 | root_gfn = pdptr >> PAGE_SHIFT; | 2224 | root_gfn = pdptr >> PAGE_SHIFT; |
2223 | } else if (vcpu->arch.mmu.root_level == 0) | 2225 | } else if (vcpu->arch.mmu.root_level == 0) |
2224 | root_gfn = 0; | 2226 | root_gfn = 0; |
2225 | if (mmu_check_root(vcpu, root_gfn)) | 2227 | if (mmu_check_root(vcpu, root_gfn)) |
2226 | return 1; | 2228 | return 1; |
2227 | if (tdp_enabled) { | 2229 | if (tdp_enabled) { |
2228 | direct = 1; | 2230 | direct = 1; |
2229 | root_gfn = i << 30; | 2231 | root_gfn = i << 30; |
2230 | } | 2232 | } |
2231 | spin_lock(&vcpu->kvm->mmu_lock); | 2233 | spin_lock(&vcpu->kvm->mmu_lock); |
2232 | kvm_mmu_free_some_pages(vcpu); | 2234 | kvm_mmu_free_some_pages(vcpu); |
2233 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2235 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
2234 | PT32_ROOT_LEVEL, direct, | 2236 | PT32_ROOT_LEVEL, direct, |
2235 | ACC_ALL, NULL); | 2237 | ACC_ALL, NULL); |
2236 | root = __pa(sp->spt); | 2238 | root = __pa(sp->spt); |
2237 | ++sp->root_count; | 2239 | ++sp->root_count; |
2238 | spin_unlock(&vcpu->kvm->mmu_lock); | 2240 | spin_unlock(&vcpu->kvm->mmu_lock); |
2239 | 2241 | ||
2240 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 2242 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
2241 | } | 2243 | } |
2242 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 2244 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
2243 | return 0; | 2245 | return 0; |
2244 | } | 2246 | } |
2245 | 2247 | ||
2246 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) | 2248 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) |
2247 | { | 2249 | { |
2248 | int i; | 2250 | int i; |
2249 | struct kvm_mmu_page *sp; | 2251 | struct kvm_mmu_page *sp; |
2250 | 2252 | ||
2251 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2253 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2252 | return; | 2254 | return; |
2253 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 2255 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
2254 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2256 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2255 | sp = page_header(root); | 2257 | sp = page_header(root); |
2256 | mmu_sync_children(vcpu, sp); | 2258 | mmu_sync_children(vcpu, sp); |
2257 | return; | 2259 | return; |
2258 | } | 2260 | } |
2259 | for (i = 0; i < 4; ++i) { | 2261 | for (i = 0; i < 4; ++i) { |
2260 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2262 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
2261 | 2263 | ||
2262 | if (root && VALID_PAGE(root)) { | 2264 | if (root && VALID_PAGE(root)) { |
2263 | root &= PT64_BASE_ADDR_MASK; | 2265 | root &= PT64_BASE_ADDR_MASK; |
2264 | sp = page_header(root); | 2266 | sp = page_header(root); |
2265 | mmu_sync_children(vcpu, sp); | 2267 | mmu_sync_children(vcpu, sp); |
2266 | } | 2268 | } |
2267 | } | 2269 | } |
2268 | } | 2270 | } |
2269 | 2271 | ||
2270 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | 2272 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) |
2271 | { | 2273 | { |
2272 | spin_lock(&vcpu->kvm->mmu_lock); | 2274 | spin_lock(&vcpu->kvm->mmu_lock); |
2273 | mmu_sync_roots(vcpu); | 2275 | mmu_sync_roots(vcpu); |
2274 | spin_unlock(&vcpu->kvm->mmu_lock); | 2276 | spin_unlock(&vcpu->kvm->mmu_lock); |
2275 | } | 2277 | } |
2276 | 2278 | ||
2277 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, | 2279 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
2278 | u32 access, u32 *error) | 2280 | u32 access, u32 *error) |
2279 | { | 2281 | { |
2280 | if (error) | 2282 | if (error) |
2281 | *error = 0; | 2283 | *error = 0; |
2282 | return vaddr; | 2284 | return vaddr; |
2283 | } | 2285 | } |
2284 | 2286 | ||
2285 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 2287 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, |
2286 | u32 error_code) | 2288 | u32 error_code) |
2287 | { | 2289 | { |
2288 | gfn_t gfn; | 2290 | gfn_t gfn; |
2289 | int r; | 2291 | int r; |
2290 | 2292 | ||
2291 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); | 2293 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); |
2292 | r = mmu_topup_memory_caches(vcpu); | 2294 | r = mmu_topup_memory_caches(vcpu); |
2293 | if (r) | 2295 | if (r) |
2294 | return r; | 2296 | return r; |
2295 | 2297 | ||
2296 | ASSERT(vcpu); | 2298 | ASSERT(vcpu); |
2297 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 2299 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
2298 | 2300 | ||
2299 | gfn = gva >> PAGE_SHIFT; | 2301 | gfn = gva >> PAGE_SHIFT; |
2300 | 2302 | ||
2301 | return nonpaging_map(vcpu, gva & PAGE_MASK, | 2303 | return nonpaging_map(vcpu, gva & PAGE_MASK, |
2302 | error_code & PFERR_WRITE_MASK, gfn); | 2304 | error_code & PFERR_WRITE_MASK, gfn); |
2303 | } | 2305 | } |
2304 | 2306 | ||
2305 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | 2307 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, |
2306 | u32 error_code) | 2308 | u32 error_code) |
2307 | { | 2309 | { |
2308 | pfn_t pfn; | 2310 | pfn_t pfn; |
2309 | int r; | 2311 | int r; |
2310 | int level; | 2312 | int level; |
2311 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2313 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2312 | unsigned long mmu_seq; | 2314 | unsigned long mmu_seq; |
2313 | 2315 | ||
2314 | ASSERT(vcpu); | 2316 | ASSERT(vcpu); |
2315 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 2317 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
2316 | 2318 | ||
2317 | r = mmu_topup_memory_caches(vcpu); | 2319 | r = mmu_topup_memory_caches(vcpu); |
2318 | if (r) | 2320 | if (r) |
2319 | return r; | 2321 | return r; |
2320 | 2322 | ||
2321 | level = mapping_level(vcpu, gfn); | 2323 | level = mapping_level(vcpu, gfn); |
2322 | 2324 | ||
2323 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 2325 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
2324 | 2326 | ||
2325 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2327 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2326 | smp_rmb(); | 2328 | smp_rmb(); |
2327 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2329 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
2328 | if (is_error_pfn(pfn)) | 2330 | if (is_error_pfn(pfn)) |
2329 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); | 2331 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
2330 | spin_lock(&vcpu->kvm->mmu_lock); | 2332 | spin_lock(&vcpu->kvm->mmu_lock); |
2331 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2333 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2332 | goto out_unlock; | 2334 | goto out_unlock; |
2333 | kvm_mmu_free_some_pages(vcpu); | 2335 | kvm_mmu_free_some_pages(vcpu); |
2334 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 2336 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
2335 | level, gfn, pfn); | 2337 | level, gfn, pfn); |
2336 | spin_unlock(&vcpu->kvm->mmu_lock); | 2338 | spin_unlock(&vcpu->kvm->mmu_lock); |
2337 | 2339 | ||
2338 | return r; | 2340 | return r; |
2339 | 2341 | ||
2340 | out_unlock: | 2342 | out_unlock: |
2341 | spin_unlock(&vcpu->kvm->mmu_lock); | 2343 | spin_unlock(&vcpu->kvm->mmu_lock); |
2342 | kvm_release_pfn_clean(pfn); | 2344 | kvm_release_pfn_clean(pfn); |
2343 | return 0; | 2345 | return 0; |
2344 | } | 2346 | } |
2345 | 2347 | ||
2346 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 2348 | static void nonpaging_free(struct kvm_vcpu *vcpu) |
2347 | { | 2349 | { |
2348 | mmu_free_roots(vcpu); | 2350 | mmu_free_roots(vcpu); |
2349 | } | 2351 | } |
2350 | 2352 | ||
2351 | static int nonpaging_init_context(struct kvm_vcpu *vcpu) | 2353 | static int nonpaging_init_context(struct kvm_vcpu *vcpu) |
2352 | { | 2354 | { |
2353 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2355 | struct kvm_mmu *context = &vcpu->arch.mmu; |
2354 | 2356 | ||
2355 | context->new_cr3 = nonpaging_new_cr3; | 2357 | context->new_cr3 = nonpaging_new_cr3; |
2356 | context->page_fault = nonpaging_page_fault; | 2358 | context->page_fault = nonpaging_page_fault; |
2357 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 2359 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
2358 | context->free = nonpaging_free; | 2360 | context->free = nonpaging_free; |
2359 | context->prefetch_page = nonpaging_prefetch_page; | 2361 | context->prefetch_page = nonpaging_prefetch_page; |
2360 | context->sync_page = nonpaging_sync_page; | 2362 | context->sync_page = nonpaging_sync_page; |
2361 | context->invlpg = nonpaging_invlpg; | 2363 | context->invlpg = nonpaging_invlpg; |
2362 | context->root_level = 0; | 2364 | context->root_level = 0; |
2363 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2365 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2364 | context->root_hpa = INVALID_PAGE; | 2366 | context->root_hpa = INVALID_PAGE; |
2365 | return 0; | 2367 | return 0; |
2366 | } | 2368 | } |
2367 | 2369 | ||
2368 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 2370 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
2369 | { | 2371 | { |
2370 | ++vcpu->stat.tlb_flush; | 2372 | ++vcpu->stat.tlb_flush; |
2371 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 2373 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
2372 | } | 2374 | } |
2373 | 2375 | ||
2374 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 2376 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
2375 | { | 2377 | { |
2376 | pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); | 2378 | pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); |
2377 | mmu_free_roots(vcpu); | 2379 | mmu_free_roots(vcpu); |
2378 | } | 2380 | } |
2379 | 2381 | ||
2380 | static void inject_page_fault(struct kvm_vcpu *vcpu, | 2382 | static void inject_page_fault(struct kvm_vcpu *vcpu, |
2381 | u64 addr, | 2383 | u64 addr, |
2382 | u32 err_code) | 2384 | u32 err_code) |
2383 | { | 2385 | { |
2384 | kvm_inject_page_fault(vcpu, addr, err_code); | 2386 | kvm_inject_page_fault(vcpu, addr, err_code); |
2385 | } | 2387 | } |
2386 | 2388 | ||
2387 | static void paging_free(struct kvm_vcpu *vcpu) | 2389 | static void paging_free(struct kvm_vcpu *vcpu) |
2388 | { | 2390 | { |
2389 | nonpaging_free(vcpu); | 2391 | nonpaging_free(vcpu); |
2390 | } | 2392 | } |
2391 | 2393 | ||
2392 | static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) | 2394 | static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) |
2393 | { | 2395 | { |
2394 | int bit7; | 2396 | int bit7; |
2395 | 2397 | ||
2396 | bit7 = (gpte >> 7) & 1; | 2398 | bit7 = (gpte >> 7) & 1; |
2397 | return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0; | 2399 | return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0; |
2398 | } | 2400 | } |
2399 | 2401 | ||
2400 | #define PTTYPE 64 | 2402 | #define PTTYPE 64 |
2401 | #include "paging_tmpl.h" | 2403 | #include "paging_tmpl.h" |
2402 | #undef PTTYPE | 2404 | #undef PTTYPE |
2403 | 2405 | ||
2404 | #define PTTYPE 32 | 2406 | #define PTTYPE 32 |
2405 | #include "paging_tmpl.h" | 2407 | #include "paging_tmpl.h" |
2406 | #undef PTTYPE | 2408 | #undef PTTYPE |
2407 | 2409 | ||
2408 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | 2410 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) |
2409 | { | 2411 | { |
2410 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2412 | struct kvm_mmu *context = &vcpu->arch.mmu; |
2411 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 2413 | int maxphyaddr = cpuid_maxphyaddr(vcpu); |
2412 | u64 exb_bit_rsvd = 0; | 2414 | u64 exb_bit_rsvd = 0; |
2413 | 2415 | ||
2414 | if (!is_nx(vcpu)) | 2416 | if (!is_nx(vcpu)) |
2415 | exb_bit_rsvd = rsvd_bits(63, 63); | 2417 | exb_bit_rsvd = rsvd_bits(63, 63); |
2416 | switch (level) { | 2418 | switch (level) { |
2417 | case PT32_ROOT_LEVEL: | 2419 | case PT32_ROOT_LEVEL: |
2418 | /* no rsvd bits for 2 level 4K page table entries */ | 2420 | /* no rsvd bits for 2 level 4K page table entries */ |
2419 | context->rsvd_bits_mask[0][1] = 0; | 2421 | context->rsvd_bits_mask[0][1] = 0; |
2420 | context->rsvd_bits_mask[0][0] = 0; | 2422 | context->rsvd_bits_mask[0][0] = 0; |
2421 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | 2423 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2422 | 2424 | ||
2423 | if (!is_pse(vcpu)) { | 2425 | if (!is_pse(vcpu)) { |
2424 | context->rsvd_bits_mask[1][1] = 0; | 2426 | context->rsvd_bits_mask[1][1] = 0; |
2425 | break; | 2427 | break; |
2426 | } | 2428 | } |
2427 | 2429 | ||
2428 | if (is_cpuid_PSE36()) | 2430 | if (is_cpuid_PSE36()) |
2429 | /* 36bits PSE 4MB page */ | 2431 | /* 36bits PSE 4MB page */ |
2430 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); | 2432 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); |
2431 | else | 2433 | else |
2432 | /* 32 bits PSE 4MB page */ | 2434 | /* 32 bits PSE 4MB page */ |
2433 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | 2435 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); |
2434 | break; | 2436 | break; |
2435 | case PT32E_ROOT_LEVEL: | 2437 | case PT32E_ROOT_LEVEL: |
2436 | context->rsvd_bits_mask[0][2] = | 2438 | context->rsvd_bits_mask[0][2] = |
2437 | rsvd_bits(maxphyaddr, 63) | | 2439 | rsvd_bits(maxphyaddr, 63) | |
2438 | rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */ | 2440 | rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */ |
2439 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | | 2441 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | |
2440 | rsvd_bits(maxphyaddr, 62); /* PDE */ | 2442 | rsvd_bits(maxphyaddr, 62); /* PDE */ |
2441 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | | 2443 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | |
2442 | rsvd_bits(maxphyaddr, 62); /* PTE */ | 2444 | rsvd_bits(maxphyaddr, 62); /* PTE */ |
2443 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2445 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2444 | rsvd_bits(maxphyaddr, 62) | | 2446 | rsvd_bits(maxphyaddr, 62) | |
2445 | rsvd_bits(13, 20); /* large page */ | 2447 | rsvd_bits(13, 20); /* large page */ |
2446 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | 2448 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2447 | break; | 2449 | break; |
2448 | case PT64_ROOT_LEVEL: | 2450 | case PT64_ROOT_LEVEL: |
2449 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 2451 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
2450 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); | 2452 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); |
2451 | context->rsvd_bits_mask[0][2] = exb_bit_rsvd | | 2453 | context->rsvd_bits_mask[0][2] = exb_bit_rsvd | |
2452 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); | 2454 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); |
2453 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | | 2455 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | |
2454 | rsvd_bits(maxphyaddr, 51); | 2456 | rsvd_bits(maxphyaddr, 51); |
2455 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | | 2457 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | |
2456 | rsvd_bits(maxphyaddr, 51); | 2458 | rsvd_bits(maxphyaddr, 51); |
2457 | context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; | 2459 | context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; |
2458 | context->rsvd_bits_mask[1][2] = exb_bit_rsvd | | 2460 | context->rsvd_bits_mask[1][2] = exb_bit_rsvd | |
2459 | rsvd_bits(maxphyaddr, 51) | | 2461 | rsvd_bits(maxphyaddr, 51) | |
2460 | rsvd_bits(13, 29); | 2462 | rsvd_bits(13, 29); |
2461 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2463 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2462 | rsvd_bits(maxphyaddr, 51) | | 2464 | rsvd_bits(maxphyaddr, 51) | |
2463 | rsvd_bits(13, 20); /* large page */ | 2465 | rsvd_bits(13, 20); /* large page */ |
2464 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | 2466 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2465 | break; | 2467 | break; |
2466 | } | 2468 | } |
2467 | } | 2469 | } |
2468 | 2470 | ||
2469 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | 2471 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) |
2470 | { | 2472 | { |
2471 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2473 | struct kvm_mmu *context = &vcpu->arch.mmu; |
2472 | 2474 | ||
2473 | ASSERT(is_pae(vcpu)); | 2475 | ASSERT(is_pae(vcpu)); |
2474 | context->new_cr3 = paging_new_cr3; | 2476 | context->new_cr3 = paging_new_cr3; |
2475 | context->page_fault = paging64_page_fault; | 2477 | context->page_fault = paging64_page_fault; |
2476 | context->gva_to_gpa = paging64_gva_to_gpa; | 2478 | context->gva_to_gpa = paging64_gva_to_gpa; |
2477 | context->prefetch_page = paging64_prefetch_page; | 2479 | context->prefetch_page = paging64_prefetch_page; |
2478 | context->sync_page = paging64_sync_page; | 2480 | context->sync_page = paging64_sync_page; |
2479 | context->invlpg = paging64_invlpg; | 2481 | context->invlpg = paging64_invlpg; |
2480 | context->free = paging_free; | 2482 | context->free = paging_free; |
2481 | context->root_level = level; | 2483 | context->root_level = level; |
2482 | context->shadow_root_level = level; | 2484 | context->shadow_root_level = level; |
2483 | context->root_hpa = INVALID_PAGE; | 2485 | context->root_hpa = INVALID_PAGE; |
2484 | return 0; | 2486 | return 0; |
2485 | } | 2487 | } |
2486 | 2488 | ||
2487 | static int paging64_init_context(struct kvm_vcpu *vcpu) | 2489 | static int paging64_init_context(struct kvm_vcpu *vcpu) |
2488 | { | 2490 | { |
2489 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); | 2491 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); |
2490 | return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); | 2492 | return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); |
2491 | } | 2493 | } |
2492 | 2494 | ||
2493 | static int paging32_init_context(struct kvm_vcpu *vcpu) | 2495 | static int paging32_init_context(struct kvm_vcpu *vcpu) |
2494 | { | 2496 | { |
2495 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2497 | struct kvm_mmu *context = &vcpu->arch.mmu; |
2496 | 2498 | ||
2497 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); | 2499 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); |
2498 | context->new_cr3 = paging_new_cr3; | 2500 | context->new_cr3 = paging_new_cr3; |
2499 | context->page_fault = paging32_page_fault; | 2501 | context->page_fault = paging32_page_fault; |
2500 | context->gva_to_gpa = paging32_gva_to_gpa; | 2502 | context->gva_to_gpa = paging32_gva_to_gpa; |
2501 | context->free = paging_free; | 2503 | context->free = paging_free; |
2502 | context->prefetch_page = paging32_prefetch_page; | 2504 | context->prefetch_page = paging32_prefetch_page; |
2503 | context->sync_page = paging32_sync_page; | 2505 | context->sync_page = paging32_sync_page; |
2504 | context->invlpg = paging32_invlpg; | 2506 | context->invlpg = paging32_invlpg; |
2505 | context->root_level = PT32_ROOT_LEVEL; | 2507 | context->root_level = PT32_ROOT_LEVEL; |
2506 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2508 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2507 | context->root_hpa = INVALID_PAGE; | 2509 | context->root_hpa = INVALID_PAGE; |
2508 | return 0; | 2510 | return 0; |
2509 | } | 2511 | } |
2510 | 2512 | ||
2511 | static int paging32E_init_context(struct kvm_vcpu *vcpu) | 2513 | static int paging32E_init_context(struct kvm_vcpu *vcpu) |
2512 | { | 2514 | { |
2513 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); | 2515 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); |
2514 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); | 2516 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); |
2515 | } | 2517 | } |
2516 | 2518 | ||
2517 | static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | 2519 | static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) |
2518 | { | 2520 | { |
2519 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2521 | struct kvm_mmu *context = &vcpu->arch.mmu; |
2520 | 2522 | ||
2521 | context->new_cr3 = nonpaging_new_cr3; | 2523 | context->new_cr3 = nonpaging_new_cr3; |
2522 | context->page_fault = tdp_page_fault; | 2524 | context->page_fault = tdp_page_fault; |
2523 | context->free = nonpaging_free; | 2525 | context->free = nonpaging_free; |
2524 | context->prefetch_page = nonpaging_prefetch_page; | 2526 | context->prefetch_page = nonpaging_prefetch_page; |
2525 | context->sync_page = nonpaging_sync_page; | 2527 | context->sync_page = nonpaging_sync_page; |
2526 | context->invlpg = nonpaging_invlpg; | 2528 | context->invlpg = nonpaging_invlpg; |
2527 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 2529 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
2528 | context->root_hpa = INVALID_PAGE; | 2530 | context->root_hpa = INVALID_PAGE; |
2529 | 2531 | ||
2530 | if (!is_paging(vcpu)) { | 2532 | if (!is_paging(vcpu)) { |
2531 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 2533 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
2532 | context->root_level = 0; | 2534 | context->root_level = 0; |
2533 | } else if (is_long_mode(vcpu)) { | 2535 | } else if (is_long_mode(vcpu)) { |
2534 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); | 2536 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); |
2535 | context->gva_to_gpa = paging64_gva_to_gpa; | 2537 | context->gva_to_gpa = paging64_gva_to_gpa; |
2536 | context->root_level = PT64_ROOT_LEVEL; | 2538 | context->root_level = PT64_ROOT_LEVEL; |
2537 | } else if (is_pae(vcpu)) { | 2539 | } else if (is_pae(vcpu)) { |
2538 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); | 2540 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); |
2539 | context->gva_to_gpa = paging64_gva_to_gpa; | 2541 | context->gva_to_gpa = paging64_gva_to_gpa; |
2540 | context->root_level = PT32E_ROOT_LEVEL; | 2542 | context->root_level = PT32E_ROOT_LEVEL; |
2541 | } else { | 2543 | } else { |
2542 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); | 2544 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); |
2543 | context->gva_to_gpa = paging32_gva_to_gpa; | 2545 | context->gva_to_gpa = paging32_gva_to_gpa; |
2544 | context->root_level = PT32_ROOT_LEVEL; | 2546 | context->root_level = PT32_ROOT_LEVEL; |
2545 | } | 2547 | } |
2546 | 2548 | ||
2547 | return 0; | 2549 | return 0; |
2548 | } | 2550 | } |
2549 | 2551 | ||
2550 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | 2552 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) |
2551 | { | 2553 | { |
2552 | int r; | 2554 | int r; |
2553 | 2555 | ||
2554 | ASSERT(vcpu); | 2556 | ASSERT(vcpu); |
2555 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 2557 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
2556 | 2558 | ||
2557 | if (!is_paging(vcpu)) | 2559 | if (!is_paging(vcpu)) |
2558 | r = nonpaging_init_context(vcpu); | 2560 | r = nonpaging_init_context(vcpu); |
2559 | else if (is_long_mode(vcpu)) | 2561 | else if (is_long_mode(vcpu)) |
2560 | r = paging64_init_context(vcpu); | 2562 | r = paging64_init_context(vcpu); |
2561 | else if (is_pae(vcpu)) | 2563 | else if (is_pae(vcpu)) |
2562 | r = paging32E_init_context(vcpu); | 2564 | r = paging32E_init_context(vcpu); |
2563 | else | 2565 | else |
2564 | r = paging32_init_context(vcpu); | 2566 | r = paging32_init_context(vcpu); |
2565 | 2567 | ||
2566 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); | 2568 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
2567 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | 2569 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); |
2568 | 2570 | ||
2569 | return r; | 2571 | return r; |
2570 | } | 2572 | } |
2571 | 2573 | ||
2572 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 2574 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) |
2573 | { | 2575 | { |
2574 | vcpu->arch.update_pte.pfn = bad_pfn; | 2576 | vcpu->arch.update_pte.pfn = bad_pfn; |
2575 | 2577 | ||
2576 | if (tdp_enabled) | 2578 | if (tdp_enabled) |
2577 | return init_kvm_tdp_mmu(vcpu); | 2579 | return init_kvm_tdp_mmu(vcpu); |
2578 | else | 2580 | else |
2579 | return init_kvm_softmmu(vcpu); | 2581 | return init_kvm_softmmu(vcpu); |
2580 | } | 2582 | } |
2581 | 2583 | ||
2582 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | 2584 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) |
2583 | { | 2585 | { |
2584 | ASSERT(vcpu); | 2586 | ASSERT(vcpu); |
2585 | if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2587 | if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2586 | /* mmu.free() should set root_hpa = INVALID_PAGE */ | 2588 | /* mmu.free() should set root_hpa = INVALID_PAGE */ |
2587 | vcpu->arch.mmu.free(vcpu); | 2589 | vcpu->arch.mmu.free(vcpu); |
2588 | } | 2590 | } |
2589 | 2591 | ||
2590 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | 2592 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) |
2591 | { | 2593 | { |
2592 | destroy_kvm_mmu(vcpu); | 2594 | destroy_kvm_mmu(vcpu); |
2593 | return init_kvm_mmu(vcpu); | 2595 | return init_kvm_mmu(vcpu); |
2594 | } | 2596 | } |
2595 | EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); | 2597 | EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); |
2596 | 2598 | ||
2597 | int kvm_mmu_load(struct kvm_vcpu *vcpu) | 2599 | int kvm_mmu_load(struct kvm_vcpu *vcpu) |
2598 | { | 2600 | { |
2599 | int r; | 2601 | int r; |
2600 | 2602 | ||
2601 | r = mmu_topup_memory_caches(vcpu); | 2603 | r = mmu_topup_memory_caches(vcpu); |
2602 | if (r) | 2604 | if (r) |
2603 | goto out; | 2605 | goto out; |
2604 | r = mmu_alloc_roots(vcpu); | 2606 | r = mmu_alloc_roots(vcpu); |
2605 | spin_lock(&vcpu->kvm->mmu_lock); | 2607 | spin_lock(&vcpu->kvm->mmu_lock); |
2606 | mmu_sync_roots(vcpu); | 2608 | mmu_sync_roots(vcpu); |
2607 | spin_unlock(&vcpu->kvm->mmu_lock); | 2609 | spin_unlock(&vcpu->kvm->mmu_lock); |
2608 | if (r) | 2610 | if (r) |
2609 | goto out; | 2611 | goto out; |
2610 | /* set_cr3() should ensure TLB has been flushed */ | 2612 | /* set_cr3() should ensure TLB has been flushed */ |
2611 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); | 2613 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); |
2612 | out: | 2614 | out: |
2613 | return r; | 2615 | return r; |
2614 | } | 2616 | } |
2615 | EXPORT_SYMBOL_GPL(kvm_mmu_load); | 2617 | EXPORT_SYMBOL_GPL(kvm_mmu_load); |
2616 | 2618 | ||
2617 | void kvm_mmu_unload(struct kvm_vcpu *vcpu) | 2619 | void kvm_mmu_unload(struct kvm_vcpu *vcpu) |
2618 | { | 2620 | { |
2619 | mmu_free_roots(vcpu); | 2621 | mmu_free_roots(vcpu); |
2620 | } | 2622 | } |
2621 | 2623 | ||
2622 | static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | 2624 | static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, |
2623 | struct kvm_mmu_page *sp, | 2625 | struct kvm_mmu_page *sp, |
2624 | u64 *spte) | 2626 | u64 *spte) |
2625 | { | 2627 | { |
2626 | u64 pte; | 2628 | u64 pte; |
2627 | struct kvm_mmu_page *child; | 2629 | struct kvm_mmu_page *child; |
2628 | 2630 | ||
2629 | pte = *spte; | 2631 | pte = *spte; |
2630 | if (is_shadow_present_pte(pte)) { | 2632 | if (is_shadow_present_pte(pte)) { |
2631 | if (is_last_spte(pte, sp->role.level)) | 2633 | if (is_last_spte(pte, sp->role.level)) |
2632 | drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte); | 2634 | drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte); |
2633 | else { | 2635 | else { |
2634 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 2636 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
2635 | mmu_page_remove_parent_pte(child, spte); | 2637 | mmu_page_remove_parent_pte(child, spte); |
2636 | } | 2638 | } |
2637 | } | 2639 | } |
2638 | __set_spte(spte, shadow_trap_nonpresent_pte); | 2640 | __set_spte(spte, shadow_trap_nonpresent_pte); |
2639 | if (is_large_pte(pte)) | 2641 | if (is_large_pte(pte)) |
2640 | --vcpu->kvm->stat.lpages; | 2642 | --vcpu->kvm->stat.lpages; |
2641 | } | 2643 | } |
2642 | 2644 | ||
2643 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 2645 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
2644 | struct kvm_mmu_page *sp, | 2646 | struct kvm_mmu_page *sp, |
2645 | u64 *spte, | 2647 | u64 *spte, |
2646 | const void *new) | 2648 | const void *new) |
2647 | { | 2649 | { |
2648 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { | 2650 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { |
2649 | ++vcpu->kvm->stat.mmu_pde_zapped; | 2651 | ++vcpu->kvm->stat.mmu_pde_zapped; |
2650 | return; | 2652 | return; |
2651 | } | 2653 | } |
2652 | 2654 | ||
2653 | ++vcpu->kvm->stat.mmu_pte_updated; | 2655 | ++vcpu->kvm->stat.mmu_pte_updated; |
2654 | if (!sp->role.cr4_pae) | 2656 | if (!sp->role.cr4_pae) |
2655 | paging32_update_pte(vcpu, sp, spte, new); | 2657 | paging32_update_pte(vcpu, sp, spte, new); |
2656 | else | 2658 | else |
2657 | paging64_update_pte(vcpu, sp, spte, new); | 2659 | paging64_update_pte(vcpu, sp, spte, new); |
2658 | } | 2660 | } |
2659 | 2661 | ||
2660 | static bool need_remote_flush(u64 old, u64 new) | 2662 | static bool need_remote_flush(u64 old, u64 new) |
2661 | { | 2663 | { |
2662 | if (!is_shadow_present_pte(old)) | 2664 | if (!is_shadow_present_pte(old)) |
2663 | return false; | 2665 | return false; |
2664 | if (!is_shadow_present_pte(new)) | 2666 | if (!is_shadow_present_pte(new)) |
2665 | return true; | 2667 | return true; |
2666 | if ((old ^ new) & PT64_BASE_ADDR_MASK) | 2668 | if ((old ^ new) & PT64_BASE_ADDR_MASK) |
2667 | return true; | 2669 | return true; |
2668 | old ^= PT64_NX_MASK; | 2670 | old ^= PT64_NX_MASK; |
2669 | new ^= PT64_NX_MASK; | 2671 | new ^= PT64_NX_MASK; |
2670 | return (old & ~new & PT64_PERM_MASK) != 0; | 2672 | return (old & ~new & PT64_PERM_MASK) != 0; |
2671 | } | 2673 | } |
2672 | 2674 | ||
2673 | static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, | 2675 | static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, |
2674 | bool remote_flush, bool local_flush) | 2676 | bool remote_flush, bool local_flush) |
2675 | { | 2677 | { |
2676 | if (zap_page) | 2678 | if (zap_page) |
2677 | return; | 2679 | return; |
2678 | 2680 | ||
2679 | if (remote_flush) | 2681 | if (remote_flush) |
2680 | kvm_flush_remote_tlbs(vcpu->kvm); | 2682 | kvm_flush_remote_tlbs(vcpu->kvm); |
2681 | else if (local_flush) | 2683 | else if (local_flush) |
2682 | kvm_mmu_flush_tlb(vcpu); | 2684 | kvm_mmu_flush_tlb(vcpu); |
2683 | } | 2685 | } |
2684 | 2686 | ||
2685 | static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | 2687 | static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) |
2686 | { | 2688 | { |
2687 | u64 *spte = vcpu->arch.last_pte_updated; | 2689 | u64 *spte = vcpu->arch.last_pte_updated; |
2688 | 2690 | ||
2689 | return !!(spte && (*spte & shadow_accessed_mask)); | 2691 | return !!(spte && (*spte & shadow_accessed_mask)); |
2690 | } | 2692 | } |
2691 | 2693 | ||
2692 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2694 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
2693 | u64 gpte) | 2695 | u64 gpte) |
2694 | { | 2696 | { |
2695 | gfn_t gfn; | 2697 | gfn_t gfn; |
2696 | pfn_t pfn; | 2698 | pfn_t pfn; |
2697 | 2699 | ||
2698 | if (!is_present_gpte(gpte)) | 2700 | if (!is_present_gpte(gpte)) |
2699 | return; | 2701 | return; |
2700 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2702 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
2701 | 2703 | ||
2702 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2704 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2703 | smp_rmb(); | 2705 | smp_rmb(); |
2704 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2706 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
2705 | 2707 | ||
2706 | if (is_error_pfn(pfn)) { | 2708 | if (is_error_pfn(pfn)) { |
2707 | kvm_release_pfn_clean(pfn); | 2709 | kvm_release_pfn_clean(pfn); |
2708 | return; | 2710 | return; |
2709 | } | 2711 | } |
2710 | vcpu->arch.update_pte.gfn = gfn; | 2712 | vcpu->arch.update_pte.gfn = gfn; |
2711 | vcpu->arch.update_pte.pfn = pfn; | 2713 | vcpu->arch.update_pte.pfn = pfn; |
2712 | } | 2714 | } |
2713 | 2715 | ||
2714 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | 2716 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) |
2715 | { | 2717 | { |
2716 | u64 *spte = vcpu->arch.last_pte_updated; | 2718 | u64 *spte = vcpu->arch.last_pte_updated; |
2717 | 2719 | ||
2718 | if (spte | 2720 | if (spte |
2719 | && vcpu->arch.last_pte_gfn == gfn | 2721 | && vcpu->arch.last_pte_gfn == gfn |
2720 | && shadow_accessed_mask | 2722 | && shadow_accessed_mask |
2721 | && !(*spte & shadow_accessed_mask) | 2723 | && !(*spte & shadow_accessed_mask) |
2722 | && is_shadow_present_pte(*spte)) | 2724 | && is_shadow_present_pte(*spte)) |
2723 | set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | 2725 | set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); |
2724 | } | 2726 | } |
2725 | 2727 | ||
2726 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2728 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
2727 | const u8 *new, int bytes, | 2729 | const u8 *new, int bytes, |
2728 | bool guest_initiated) | 2730 | bool guest_initiated) |
2729 | { | 2731 | { |
2730 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2732 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2731 | struct kvm_mmu_page *sp; | 2733 | struct kvm_mmu_page *sp; |
2732 | struct hlist_node *node; | 2734 | struct hlist_node *node; |
2733 | LIST_HEAD(invalid_list); | 2735 | LIST_HEAD(invalid_list); |
2734 | u64 entry, gentry; | 2736 | u64 entry, gentry; |
2735 | u64 *spte; | 2737 | u64 *spte; |
2736 | unsigned offset = offset_in_page(gpa); | 2738 | unsigned offset = offset_in_page(gpa); |
2737 | unsigned pte_size; | 2739 | unsigned pte_size; |
2738 | unsigned page_offset; | 2740 | unsigned page_offset; |
2739 | unsigned misaligned; | 2741 | unsigned misaligned; |
2740 | unsigned quadrant; | 2742 | unsigned quadrant; |
2741 | int level; | 2743 | int level; |
2742 | int flooded = 0; | 2744 | int flooded = 0; |
2743 | int npte; | 2745 | int npte; |
2744 | int r; | 2746 | int r; |
2745 | int invlpg_counter; | 2747 | int invlpg_counter; |
2746 | bool remote_flush, local_flush, zap_page; | 2748 | bool remote_flush, local_flush, zap_page; |
2747 | 2749 | ||
2748 | zap_page = remote_flush = local_flush = false; | 2750 | zap_page = remote_flush = local_flush = false; |
2749 | 2751 | ||
2750 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2752 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
2751 | 2753 | ||
2752 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | 2754 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); |
2753 | 2755 | ||
2754 | /* | 2756 | /* |
2755 | * Assume that the pte write on a page table of the same type | 2757 | * Assume that the pte write on a page table of the same type |
2756 | * as the current vcpu paging mode. This is nearly always true | 2758 | * as the current vcpu paging mode. This is nearly always true |
2757 | * (might be false while changing modes). Note it is verified later | 2759 | * (might be false while changing modes). Note it is verified later |
2758 | * by update_pte(). | 2760 | * by update_pte(). |
2759 | */ | 2761 | */ |
2760 | if ((is_pae(vcpu) && bytes == 4) || !new) { | 2762 | if ((is_pae(vcpu) && bytes == 4) || !new) { |
2761 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 2763 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
2762 | if (is_pae(vcpu)) { | 2764 | if (is_pae(vcpu)) { |
2763 | gpa &= ~(gpa_t)7; | 2765 | gpa &= ~(gpa_t)7; |
2764 | bytes = 8; | 2766 | bytes = 8; |
2765 | } | 2767 | } |
2766 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | 2768 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); |
2767 | if (r) | 2769 | if (r) |
2768 | gentry = 0; | 2770 | gentry = 0; |
2769 | new = (const u8 *)&gentry; | 2771 | new = (const u8 *)&gentry; |
2770 | } | 2772 | } |
2771 | 2773 | ||
2772 | switch (bytes) { | 2774 | switch (bytes) { |
2773 | case 4: | 2775 | case 4: |
2774 | gentry = *(const u32 *)new; | 2776 | gentry = *(const u32 *)new; |
2775 | break; | 2777 | break; |
2776 | case 8: | 2778 | case 8: |
2777 | gentry = *(const u64 *)new; | 2779 | gentry = *(const u64 *)new; |
2778 | break; | 2780 | break; |
2779 | default: | 2781 | default: |
2780 | gentry = 0; | 2782 | gentry = 0; |
2781 | break; | 2783 | break; |
2782 | } | 2784 | } |
2783 | 2785 | ||
2784 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | 2786 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); |
2785 | spin_lock(&vcpu->kvm->mmu_lock); | 2787 | spin_lock(&vcpu->kvm->mmu_lock); |
2786 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | 2788 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) |
2787 | gentry = 0; | 2789 | gentry = 0; |
2788 | kvm_mmu_access_page(vcpu, gfn); | 2790 | kvm_mmu_access_page(vcpu, gfn); |
2789 | kvm_mmu_free_some_pages(vcpu); | 2791 | kvm_mmu_free_some_pages(vcpu); |
2790 | ++vcpu->kvm->stat.mmu_pte_write; | 2792 | ++vcpu->kvm->stat.mmu_pte_write; |
2791 | kvm_mmu_audit(vcpu, "pre pte write"); | 2793 | kvm_mmu_audit(vcpu, "pre pte write"); |
2792 | if (guest_initiated) { | 2794 | if (guest_initiated) { |
2793 | if (gfn == vcpu->arch.last_pt_write_gfn | 2795 | if (gfn == vcpu->arch.last_pt_write_gfn |
2794 | && !last_updated_pte_accessed(vcpu)) { | 2796 | && !last_updated_pte_accessed(vcpu)) { |
2795 | ++vcpu->arch.last_pt_write_count; | 2797 | ++vcpu->arch.last_pt_write_count; |
2796 | if (vcpu->arch.last_pt_write_count >= 3) | 2798 | if (vcpu->arch.last_pt_write_count >= 3) |
2797 | flooded = 1; | 2799 | flooded = 1; |
2798 | } else { | 2800 | } else { |
2799 | vcpu->arch.last_pt_write_gfn = gfn; | 2801 | vcpu->arch.last_pt_write_gfn = gfn; |
2800 | vcpu->arch.last_pt_write_count = 1; | 2802 | vcpu->arch.last_pt_write_count = 1; |
2801 | vcpu->arch.last_pte_updated = NULL; | 2803 | vcpu->arch.last_pte_updated = NULL; |
2802 | } | 2804 | } |
2803 | } | 2805 | } |
2804 | 2806 | ||
2805 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { | 2807 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
2806 | pte_size = sp->role.cr4_pae ? 8 : 4; | 2808 | pte_size = sp->role.cr4_pae ? 8 : 4; |
2807 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2809 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
2808 | misaligned |= bytes < 4; | 2810 | misaligned |= bytes < 4; |
2809 | if (misaligned || flooded) { | 2811 | if (misaligned || flooded) { |
2810 | /* | 2812 | /* |
2811 | * Misaligned accesses are too much trouble to fix | 2813 | * Misaligned accesses are too much trouble to fix |
2812 | * up; also, they usually indicate a page is not used | 2814 | * up; also, they usually indicate a page is not used |
2813 | * as a page table. | 2815 | * as a page table. |
2814 | * | 2816 | * |
2815 | * If we're seeing too many writes to a page, | 2817 | * If we're seeing too many writes to a page, |
2816 | * it may no longer be a page table, or we may be | 2818 | * it may no longer be a page table, or we may be |
2817 | * forking, in which case it is better to unmap the | 2819 | * forking, in which case it is better to unmap the |
2818 | * page. | 2820 | * page. |
2819 | */ | 2821 | */ |
2820 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2822 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
2821 | gpa, bytes, sp->role.word); | 2823 | gpa, bytes, sp->role.word); |
2822 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 2824 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
2823 | &invalid_list); | 2825 | &invalid_list); |
2824 | ++vcpu->kvm->stat.mmu_flooded; | 2826 | ++vcpu->kvm->stat.mmu_flooded; |
2825 | continue; | 2827 | continue; |
2826 | } | 2828 | } |
2827 | page_offset = offset; | 2829 | page_offset = offset; |
2828 | level = sp->role.level; | 2830 | level = sp->role.level; |
2829 | npte = 1; | 2831 | npte = 1; |
2830 | if (!sp->role.cr4_pae) { | 2832 | if (!sp->role.cr4_pae) { |
2831 | page_offset <<= 1; /* 32->64 */ | 2833 | page_offset <<= 1; /* 32->64 */ |
2832 | /* | 2834 | /* |
2833 | * A 32-bit pde maps 4MB while the shadow pdes map | 2835 | * A 32-bit pde maps 4MB while the shadow pdes map |
2834 | * only 2MB. So we need to double the offset again | 2836 | * only 2MB. So we need to double the offset again |
2835 | * and zap two pdes instead of one. | 2837 | * and zap two pdes instead of one. |
2836 | */ | 2838 | */ |
2837 | if (level == PT32_ROOT_LEVEL) { | 2839 | if (level == PT32_ROOT_LEVEL) { |
2838 | page_offset &= ~7; /* kill rounding error */ | 2840 | page_offset &= ~7; /* kill rounding error */ |
2839 | page_offset <<= 1; | 2841 | page_offset <<= 1; |
2840 | npte = 2; | 2842 | npte = 2; |
2841 | } | 2843 | } |
2842 | quadrant = page_offset >> PAGE_SHIFT; | 2844 | quadrant = page_offset >> PAGE_SHIFT; |
2843 | page_offset &= ~PAGE_MASK; | 2845 | page_offset &= ~PAGE_MASK; |
2844 | if (quadrant != sp->role.quadrant) | 2846 | if (quadrant != sp->role.quadrant) |
2845 | continue; | 2847 | continue; |
2846 | } | 2848 | } |
2847 | local_flush = true; | 2849 | local_flush = true; |
2848 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2850 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
2849 | while (npte--) { | 2851 | while (npte--) { |
2850 | entry = *spte; | 2852 | entry = *spte; |
2851 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2853 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
2852 | if (gentry) | 2854 | if (gentry) |
2853 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 2855 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
2854 | if (!remote_flush && need_remote_flush(entry, *spte)) | 2856 | if (!remote_flush && need_remote_flush(entry, *spte)) |
2855 | remote_flush = true; | 2857 | remote_flush = true; |
2856 | ++spte; | 2858 | ++spte; |
2857 | } | 2859 | } |
2858 | } | 2860 | } |
2859 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); | 2861 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); |
2860 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 2862 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
2861 | kvm_mmu_audit(vcpu, "post pte write"); | 2863 | kvm_mmu_audit(vcpu, "post pte write"); |
2862 | spin_unlock(&vcpu->kvm->mmu_lock); | 2864 | spin_unlock(&vcpu->kvm->mmu_lock); |
2863 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { | 2865 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { |
2864 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); | 2866 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); |
2865 | vcpu->arch.update_pte.pfn = bad_pfn; | 2867 | vcpu->arch.update_pte.pfn = bad_pfn; |
2866 | } | 2868 | } |
2867 | } | 2869 | } |
2868 | 2870 | ||
2869 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | 2871 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) |
2870 | { | 2872 | { |
2871 | gpa_t gpa; | 2873 | gpa_t gpa; |
2872 | int r; | 2874 | int r; |
2873 | 2875 | ||
2874 | if (tdp_enabled) | 2876 | if (tdp_enabled) |
2875 | return 0; | 2877 | return 0; |
2876 | 2878 | ||
2877 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | 2879 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
2878 | 2880 | ||
2879 | spin_lock(&vcpu->kvm->mmu_lock); | 2881 | spin_lock(&vcpu->kvm->mmu_lock); |
2880 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 2882 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
2881 | spin_unlock(&vcpu->kvm->mmu_lock); | 2883 | spin_unlock(&vcpu->kvm->mmu_lock); |
2882 | return r; | 2884 | return r; |
2883 | } | 2885 | } |
2884 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 2886 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
2885 | 2887 | ||
2886 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 2888 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
2887 | { | 2889 | { |
2888 | int free_pages; | 2890 | int free_pages; |
2889 | LIST_HEAD(invalid_list); | 2891 | LIST_HEAD(invalid_list); |
2890 | 2892 | ||
2891 | free_pages = vcpu->kvm->arch.n_free_mmu_pages; | 2893 | free_pages = vcpu->kvm->arch.n_free_mmu_pages; |
2892 | while (free_pages < KVM_REFILL_PAGES && | 2894 | while (free_pages < KVM_REFILL_PAGES && |
2893 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | 2895 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { |
2894 | struct kvm_mmu_page *sp; | 2896 | struct kvm_mmu_page *sp; |
2895 | 2897 | ||
2896 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, | 2898 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, |
2897 | struct kvm_mmu_page, link); | 2899 | struct kvm_mmu_page, link); |
2898 | free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 2900 | free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
2899 | &invalid_list); | 2901 | &invalid_list); |
2900 | ++vcpu->kvm->stat.mmu_recycled; | 2902 | ++vcpu->kvm->stat.mmu_recycled; |
2901 | } | 2903 | } |
2902 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 2904 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
2903 | } | 2905 | } |
2904 | 2906 | ||
2905 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 2907 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) |
2906 | { | 2908 | { |
2907 | int r; | 2909 | int r; |
2908 | enum emulation_result er; | 2910 | enum emulation_result er; |
2909 | 2911 | ||
2910 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); | 2912 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); |
2911 | if (r < 0) | 2913 | if (r < 0) |
2912 | goto out; | 2914 | goto out; |
2913 | 2915 | ||
2914 | if (!r) { | 2916 | if (!r) { |
2915 | r = 1; | 2917 | r = 1; |
2916 | goto out; | 2918 | goto out; |
2917 | } | 2919 | } |
2918 | 2920 | ||
2919 | r = mmu_topup_memory_caches(vcpu); | 2921 | r = mmu_topup_memory_caches(vcpu); |
2920 | if (r) | 2922 | if (r) |
2921 | goto out; | 2923 | goto out; |
2922 | 2924 | ||
2923 | er = emulate_instruction(vcpu, cr2, error_code, 0); | 2925 | er = emulate_instruction(vcpu, cr2, error_code, 0); |
2924 | 2926 | ||
2925 | switch (er) { | 2927 | switch (er) { |
2926 | case EMULATE_DONE: | 2928 | case EMULATE_DONE: |
2927 | return 1; | 2929 | return 1; |
2928 | case EMULATE_DO_MMIO: | 2930 | case EMULATE_DO_MMIO: |
2929 | ++vcpu->stat.mmio_exits; | 2931 | ++vcpu->stat.mmio_exits; |
2930 | /* fall through */ | 2932 | /* fall through */ |
2931 | case EMULATE_FAIL: | 2933 | case EMULATE_FAIL: |
2932 | return 0; | 2934 | return 0; |
2933 | default: | 2935 | default: |
2934 | BUG(); | 2936 | BUG(); |
2935 | } | 2937 | } |
2936 | out: | 2938 | out: |
2937 | return r; | 2939 | return r; |
2938 | } | 2940 | } |
2939 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | 2941 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); |
2940 | 2942 | ||
2941 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | 2943 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) |
2942 | { | 2944 | { |
2943 | vcpu->arch.mmu.invlpg(vcpu, gva); | 2945 | vcpu->arch.mmu.invlpg(vcpu, gva); |
2944 | kvm_mmu_flush_tlb(vcpu); | 2946 | kvm_mmu_flush_tlb(vcpu); |
2945 | ++vcpu->stat.invlpg; | 2947 | ++vcpu->stat.invlpg; |
2946 | } | 2948 | } |
2947 | EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); | 2949 | EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); |
2948 | 2950 | ||
2949 | void kvm_enable_tdp(void) | 2951 | void kvm_enable_tdp(void) |
2950 | { | 2952 | { |
2951 | tdp_enabled = true; | 2953 | tdp_enabled = true; |
2952 | } | 2954 | } |
2953 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); | 2955 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); |
2954 | 2956 | ||
2955 | void kvm_disable_tdp(void) | 2957 | void kvm_disable_tdp(void) |
2956 | { | 2958 | { |
2957 | tdp_enabled = false; | 2959 | tdp_enabled = false; |
2958 | } | 2960 | } |
2959 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); | 2961 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); |
2960 | 2962 | ||
2961 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 2963 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
2962 | { | 2964 | { |
2963 | free_page((unsigned long)vcpu->arch.mmu.pae_root); | 2965 | free_page((unsigned long)vcpu->arch.mmu.pae_root); |
2964 | } | 2966 | } |
2965 | 2967 | ||
2966 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | 2968 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) |
2967 | { | 2969 | { |
2968 | struct page *page; | 2970 | struct page *page; |
2969 | int i; | 2971 | int i; |
2970 | 2972 | ||
2971 | ASSERT(vcpu); | 2973 | ASSERT(vcpu); |
2972 | 2974 | ||
2973 | /* | 2975 | /* |
2974 | * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. | 2976 | * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. |
2975 | * Therefore we need to allocate shadow page tables in the first | 2977 | * Therefore we need to allocate shadow page tables in the first |
2976 | * 4GB of memory, which happens to fit the DMA32 zone. | 2978 | * 4GB of memory, which happens to fit the DMA32 zone. |
2977 | */ | 2979 | */ |
2978 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); | 2980 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); |
2979 | if (!page) | 2981 | if (!page) |
2980 | return -ENOMEM; | 2982 | return -ENOMEM; |
2981 | 2983 | ||
2982 | vcpu->arch.mmu.pae_root = page_address(page); | 2984 | vcpu->arch.mmu.pae_root = page_address(page); |
2983 | for (i = 0; i < 4; ++i) | 2985 | for (i = 0; i < 4; ++i) |
2984 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 2986 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
2985 | 2987 | ||
2986 | return 0; | 2988 | return 0; |
2987 | } | 2989 | } |
2988 | 2990 | ||
2989 | int kvm_mmu_create(struct kvm_vcpu *vcpu) | 2991 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
2990 | { | 2992 | { |
2991 | ASSERT(vcpu); | 2993 | ASSERT(vcpu); |
2992 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 2994 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
2993 | 2995 | ||
2994 | return alloc_mmu_pages(vcpu); | 2996 | return alloc_mmu_pages(vcpu); |
2995 | } | 2997 | } |
2996 | 2998 | ||
2997 | int kvm_mmu_setup(struct kvm_vcpu *vcpu) | 2999 | int kvm_mmu_setup(struct kvm_vcpu *vcpu) |
2998 | { | 3000 | { |
2999 | ASSERT(vcpu); | 3001 | ASSERT(vcpu); |
3000 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3002 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3001 | 3003 | ||
3002 | return init_kvm_mmu(vcpu); | 3004 | return init_kvm_mmu(vcpu); |
3003 | } | 3005 | } |
3004 | 3006 | ||
3005 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | 3007 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) |
3006 | { | 3008 | { |
3007 | ASSERT(vcpu); | 3009 | ASSERT(vcpu); |
3008 | 3010 | ||
3009 | destroy_kvm_mmu(vcpu); | 3011 | destroy_kvm_mmu(vcpu); |
3010 | free_mmu_pages(vcpu); | 3012 | free_mmu_pages(vcpu); |
3011 | mmu_free_memory_caches(vcpu); | 3013 | mmu_free_memory_caches(vcpu); |
3012 | } | 3014 | } |
3013 | 3015 | ||
3014 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | 3016 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) |
3015 | { | 3017 | { |
3016 | struct kvm_mmu_page *sp; | 3018 | struct kvm_mmu_page *sp; |
3017 | 3019 | ||
3018 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { | 3020 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { |
3019 | int i; | 3021 | int i; |
3020 | u64 *pt; | 3022 | u64 *pt; |
3021 | 3023 | ||
3022 | if (!test_bit(slot, sp->slot_bitmap)) | 3024 | if (!test_bit(slot, sp->slot_bitmap)) |
3023 | continue; | 3025 | continue; |
3024 | 3026 | ||
3025 | pt = sp->spt; | 3027 | pt = sp->spt; |
3026 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3028 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
3027 | /* avoid RMW */ | 3029 | /* avoid RMW */ |
3028 | if (is_writable_pte(pt[i])) | 3030 | if (is_writable_pte(pt[i])) |
3029 | pt[i] &= ~PT_WRITABLE_MASK; | 3031 | pt[i] &= ~PT_WRITABLE_MASK; |
3030 | } | 3032 | } |
3031 | kvm_flush_remote_tlbs(kvm); | 3033 | kvm_flush_remote_tlbs(kvm); |
3032 | } | 3034 | } |
3033 | 3035 | ||
3034 | void kvm_mmu_zap_all(struct kvm *kvm) | 3036 | void kvm_mmu_zap_all(struct kvm *kvm) |
3035 | { | 3037 | { |
3036 | struct kvm_mmu_page *sp, *node; | 3038 | struct kvm_mmu_page *sp, *node; |
3037 | LIST_HEAD(invalid_list); | 3039 | LIST_HEAD(invalid_list); |
3038 | 3040 | ||
3039 | spin_lock(&kvm->mmu_lock); | 3041 | spin_lock(&kvm->mmu_lock); |
3040 | restart: | 3042 | restart: |
3041 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 3043 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
3042 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) | 3044 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) |
3043 | goto restart; | 3045 | goto restart; |
3044 | 3046 | ||
3045 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 3047 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
3046 | spin_unlock(&kvm->mmu_lock); | 3048 | spin_unlock(&kvm->mmu_lock); |
3047 | } | 3049 | } |
3048 | 3050 | ||
3049 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | 3051 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, |
3050 | struct list_head *invalid_list) | 3052 | struct list_head *invalid_list) |
3051 | { | 3053 | { |
3052 | struct kvm_mmu_page *page; | 3054 | struct kvm_mmu_page *page; |
3053 | 3055 | ||
3054 | page = container_of(kvm->arch.active_mmu_pages.prev, | 3056 | page = container_of(kvm->arch.active_mmu_pages.prev, |
3055 | struct kvm_mmu_page, link); | 3057 | struct kvm_mmu_page, link); |
3056 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); | 3058 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); |
3057 | } | 3059 | } |
3058 | 3060 | ||
3059 | static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 3061 | static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) |
3060 | { | 3062 | { |
3061 | struct kvm *kvm; | 3063 | struct kvm *kvm; |
3062 | struct kvm *kvm_freed = NULL; | 3064 | struct kvm *kvm_freed = NULL; |
3063 | int cache_count = 0; | 3065 | int cache_count = 0; |
3064 | 3066 | ||
3065 | spin_lock(&kvm_lock); | 3067 | spin_lock(&kvm_lock); |
3066 | 3068 | ||
3067 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3069 | list_for_each_entry(kvm, &vm_list, vm_list) { |
3068 | int npages, idx, freed_pages; | 3070 | int npages, idx, freed_pages; |
3069 | LIST_HEAD(invalid_list); | 3071 | LIST_HEAD(invalid_list); |
3070 | 3072 | ||
3071 | idx = srcu_read_lock(&kvm->srcu); | 3073 | idx = srcu_read_lock(&kvm->srcu); |
3072 | spin_lock(&kvm->mmu_lock); | 3074 | spin_lock(&kvm->mmu_lock); |
3073 | npages = kvm->arch.n_alloc_mmu_pages - | 3075 | npages = kvm->arch.n_alloc_mmu_pages - |
3074 | kvm->arch.n_free_mmu_pages; | 3076 | kvm->arch.n_free_mmu_pages; |
3075 | cache_count += npages; | 3077 | cache_count += npages; |
3076 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | 3078 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { |
3077 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, | 3079 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, |
3078 | &invalid_list); | 3080 | &invalid_list); |
3079 | cache_count -= freed_pages; | 3081 | cache_count -= freed_pages; |
3080 | kvm_freed = kvm; | 3082 | kvm_freed = kvm; |
3081 | } | 3083 | } |
3082 | nr_to_scan--; | 3084 | nr_to_scan--; |
3083 | 3085 | ||
3084 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 3086 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
3085 | spin_unlock(&kvm->mmu_lock); | 3087 | spin_unlock(&kvm->mmu_lock); |
3086 | srcu_read_unlock(&kvm->srcu, idx); | 3088 | srcu_read_unlock(&kvm->srcu, idx); |
3087 | } | 3089 | } |
3088 | if (kvm_freed) | 3090 | if (kvm_freed) |
3089 | list_move_tail(&kvm_freed->vm_list, &vm_list); | 3091 | list_move_tail(&kvm_freed->vm_list, &vm_list); |
3090 | 3092 | ||
3091 | spin_unlock(&kvm_lock); | 3093 | spin_unlock(&kvm_lock); |
3092 | 3094 | ||
3093 | return cache_count; | 3095 | return cache_count; |
3094 | } | 3096 | } |
3095 | 3097 | ||
3096 | static struct shrinker mmu_shrinker = { | 3098 | static struct shrinker mmu_shrinker = { |
3097 | .shrink = mmu_shrink, | 3099 | .shrink = mmu_shrink, |
3098 | .seeks = DEFAULT_SEEKS * 10, | 3100 | .seeks = DEFAULT_SEEKS * 10, |
3099 | }; | 3101 | }; |
3100 | 3102 | ||
3101 | static void mmu_destroy_caches(void) | 3103 | static void mmu_destroy_caches(void) |
3102 | { | 3104 | { |
3103 | if (pte_chain_cache) | 3105 | if (pte_chain_cache) |
3104 | kmem_cache_destroy(pte_chain_cache); | 3106 | kmem_cache_destroy(pte_chain_cache); |
3105 | if (rmap_desc_cache) | 3107 | if (rmap_desc_cache) |
3106 | kmem_cache_destroy(rmap_desc_cache); | 3108 | kmem_cache_destroy(rmap_desc_cache); |
3107 | if (mmu_page_header_cache) | 3109 | if (mmu_page_header_cache) |
3108 | kmem_cache_destroy(mmu_page_header_cache); | 3110 | kmem_cache_destroy(mmu_page_header_cache); |
3109 | } | 3111 | } |
3110 | 3112 | ||
3111 | void kvm_mmu_module_exit(void) | 3113 | void kvm_mmu_module_exit(void) |
3112 | { | 3114 | { |
3113 | mmu_destroy_caches(); | 3115 | mmu_destroy_caches(); |
3114 | unregister_shrinker(&mmu_shrinker); | 3116 | unregister_shrinker(&mmu_shrinker); |
3115 | } | 3117 | } |
3116 | 3118 | ||
3117 | int kvm_mmu_module_init(void) | 3119 | int kvm_mmu_module_init(void) |
3118 | { | 3120 | { |
3119 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", | 3121 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", |
3120 | sizeof(struct kvm_pte_chain), | 3122 | sizeof(struct kvm_pte_chain), |
3121 | 0, 0, NULL); | 3123 | 0, 0, NULL); |
3122 | if (!pte_chain_cache) | 3124 | if (!pte_chain_cache) |
3123 | goto nomem; | 3125 | goto nomem; |
3124 | rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", | 3126 | rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", |
3125 | sizeof(struct kvm_rmap_desc), | 3127 | sizeof(struct kvm_rmap_desc), |
3126 | 0, 0, NULL); | 3128 | 0, 0, NULL); |
3127 | if (!rmap_desc_cache) | 3129 | if (!rmap_desc_cache) |
3128 | goto nomem; | 3130 | goto nomem; |
3129 | 3131 | ||
3130 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", | 3132 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", |
3131 | sizeof(struct kvm_mmu_page), | 3133 | sizeof(struct kvm_mmu_page), |
3132 | 0, 0, NULL); | 3134 | 0, 0, NULL); |
3133 | if (!mmu_page_header_cache) | 3135 | if (!mmu_page_header_cache) |
3134 | goto nomem; | 3136 | goto nomem; |
3135 | 3137 | ||
3136 | register_shrinker(&mmu_shrinker); | 3138 | register_shrinker(&mmu_shrinker); |
3137 | 3139 | ||
3138 | return 0; | 3140 | return 0; |
3139 | 3141 | ||
3140 | nomem: | 3142 | nomem: |
3141 | mmu_destroy_caches(); | 3143 | mmu_destroy_caches(); |
3142 | return -ENOMEM; | 3144 | return -ENOMEM; |
3143 | } | 3145 | } |
3144 | 3146 | ||
3145 | /* | 3147 | /* |
3146 | * Caculate mmu pages needed for kvm. | 3148 | * Caculate mmu pages needed for kvm. |
3147 | */ | 3149 | */ |
3148 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | 3150 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) |
3149 | { | 3151 | { |
3150 | int i; | 3152 | int i; |
3151 | unsigned int nr_mmu_pages; | 3153 | unsigned int nr_mmu_pages; |
3152 | unsigned int nr_pages = 0; | 3154 | unsigned int nr_pages = 0; |
3153 | struct kvm_memslots *slots; | 3155 | struct kvm_memslots *slots; |
3154 | 3156 | ||
3155 | slots = kvm_memslots(kvm); | 3157 | slots = kvm_memslots(kvm); |
3156 | 3158 | ||
3157 | for (i = 0; i < slots->nmemslots; i++) | 3159 | for (i = 0; i < slots->nmemslots; i++) |
3158 | nr_pages += slots->memslots[i].npages; | 3160 | nr_pages += slots->memslots[i].npages; |
3159 | 3161 | ||
3160 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 3162 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
3161 | nr_mmu_pages = max(nr_mmu_pages, | 3163 | nr_mmu_pages = max(nr_mmu_pages, |
3162 | (unsigned int) KVM_MIN_ALLOC_MMU_PAGES); | 3164 | (unsigned int) KVM_MIN_ALLOC_MMU_PAGES); |
3163 | 3165 | ||
3164 | return nr_mmu_pages; | 3166 | return nr_mmu_pages; |
3165 | } | 3167 | } |
3166 | 3168 | ||
3167 | static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer, | 3169 | static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer, |
3168 | unsigned len) | 3170 | unsigned len) |
3169 | { | 3171 | { |
3170 | if (len > buffer->len) | 3172 | if (len > buffer->len) |
3171 | return NULL; | 3173 | return NULL; |
3172 | return buffer->ptr; | 3174 | return buffer->ptr; |
3173 | } | 3175 | } |
3174 | 3176 | ||
3175 | static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer, | 3177 | static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer, |
3176 | unsigned len) | 3178 | unsigned len) |
3177 | { | 3179 | { |
3178 | void *ret; | 3180 | void *ret; |
3179 | 3181 | ||
3180 | ret = pv_mmu_peek_buffer(buffer, len); | 3182 | ret = pv_mmu_peek_buffer(buffer, len); |
3181 | if (!ret) | 3183 | if (!ret) |
3182 | return ret; | 3184 | return ret; |
3183 | buffer->ptr += len; | 3185 | buffer->ptr += len; |
3184 | buffer->len -= len; | 3186 | buffer->len -= len; |
3185 | buffer->processed += len; | 3187 | buffer->processed += len; |
3186 | return ret; | 3188 | return ret; |
3187 | } | 3189 | } |
3188 | 3190 | ||
3189 | static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | 3191 | static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, |
3190 | gpa_t addr, gpa_t value) | 3192 | gpa_t addr, gpa_t value) |
3191 | { | 3193 | { |
3192 | int bytes = 8; | 3194 | int bytes = 8; |
3193 | int r; | 3195 | int r; |
3194 | 3196 | ||
3195 | if (!is_long_mode(vcpu) && !is_pae(vcpu)) | 3197 | if (!is_long_mode(vcpu) && !is_pae(vcpu)) |
3196 | bytes = 4; | 3198 | bytes = 4; |
3197 | 3199 | ||
3198 | r = mmu_topup_memory_caches(vcpu); | 3200 | r = mmu_topup_memory_caches(vcpu); |
3199 | if (r) | 3201 | if (r) |
3200 | return r; | 3202 | return r; |
3201 | 3203 | ||
3202 | if (!emulator_write_phys(vcpu, addr, &value, bytes)) | 3204 | if (!emulator_write_phys(vcpu, addr, &value, bytes)) |
3203 | return -EFAULT; | 3205 | return -EFAULT; |
3204 | 3206 | ||
3205 | return 1; | 3207 | return 1; |
3206 | } | 3208 | } |
3207 | 3209 | ||
3208 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 3210 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
3209 | { | 3211 | { |
3210 | (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); | 3212 | (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); |
3211 | return 1; | 3213 | return 1; |
3212 | } | 3214 | } |
3213 | 3215 | ||
3214 | static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) | 3216 | static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) |
3215 | { | 3217 | { |
3216 | spin_lock(&vcpu->kvm->mmu_lock); | 3218 | spin_lock(&vcpu->kvm->mmu_lock); |
3217 | mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); | 3219 | mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); |
3218 | spin_unlock(&vcpu->kvm->mmu_lock); | 3220 | spin_unlock(&vcpu->kvm->mmu_lock); |
3219 | return 1; | 3221 | return 1; |
3220 | } | 3222 | } |
3221 | 3223 | ||
3222 | static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu, | 3224 | static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu, |
3223 | struct kvm_pv_mmu_op_buffer *buffer) | 3225 | struct kvm_pv_mmu_op_buffer *buffer) |
3224 | { | 3226 | { |
3225 | struct kvm_mmu_op_header *header; | 3227 | struct kvm_mmu_op_header *header; |
3226 | 3228 | ||
3227 | header = pv_mmu_peek_buffer(buffer, sizeof *header); | 3229 | header = pv_mmu_peek_buffer(buffer, sizeof *header); |
3228 | if (!header) | 3230 | if (!header) |
3229 | return 0; | 3231 | return 0; |
3230 | switch (header->op) { | 3232 | switch (header->op) { |
3231 | case KVM_MMU_OP_WRITE_PTE: { | 3233 | case KVM_MMU_OP_WRITE_PTE: { |
3232 | struct kvm_mmu_op_write_pte *wpte; | 3234 | struct kvm_mmu_op_write_pte *wpte; |
3233 | 3235 | ||
3234 | wpte = pv_mmu_read_buffer(buffer, sizeof *wpte); | 3236 | wpte = pv_mmu_read_buffer(buffer, sizeof *wpte); |
3235 | if (!wpte) | 3237 | if (!wpte) |
3236 | return 0; | 3238 | return 0; |
3237 | return kvm_pv_mmu_write(vcpu, wpte->pte_phys, | 3239 | return kvm_pv_mmu_write(vcpu, wpte->pte_phys, |
3238 | wpte->pte_val); | 3240 | wpte->pte_val); |
3239 | } | 3241 | } |
3240 | case KVM_MMU_OP_FLUSH_TLB: { | 3242 | case KVM_MMU_OP_FLUSH_TLB: { |
3241 | struct kvm_mmu_op_flush_tlb *ftlb; | 3243 | struct kvm_mmu_op_flush_tlb *ftlb; |
3242 | 3244 | ||
3243 | ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb); | 3245 | ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb); |
3244 | if (!ftlb) | 3246 | if (!ftlb) |
3245 | return 0; | 3247 | return 0; |
3246 | return kvm_pv_mmu_flush_tlb(vcpu); | 3248 | return kvm_pv_mmu_flush_tlb(vcpu); |
3247 | } | 3249 | } |
3248 | case KVM_MMU_OP_RELEASE_PT: { | 3250 | case KVM_MMU_OP_RELEASE_PT: { |
3249 | struct kvm_mmu_op_release_pt *rpt; | 3251 | struct kvm_mmu_op_release_pt *rpt; |
3250 | 3252 | ||
3251 | rpt = pv_mmu_read_buffer(buffer, sizeof *rpt); | 3253 | rpt = pv_mmu_read_buffer(buffer, sizeof *rpt); |
3252 | if (!rpt) | 3254 | if (!rpt) |
3253 | return 0; | 3255 | return 0; |
3254 | return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys); | 3256 | return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys); |
3255 | } | 3257 | } |
3256 | default: return 0; | 3258 | default: return 0; |
3257 | } | 3259 | } |
3258 | } | 3260 | } |
3259 | 3261 | ||
3260 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | 3262 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, |
3261 | gpa_t addr, unsigned long *ret) | 3263 | gpa_t addr, unsigned long *ret) |
3262 | { | 3264 | { |
3263 | int r; | 3265 | int r; |
3264 | struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer; | 3266 | struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer; |
3265 | 3267 | ||
3266 | buffer->ptr = buffer->buf; | 3268 | buffer->ptr = buffer->buf; |
3267 | buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf); | 3269 | buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf); |
3268 | buffer->processed = 0; | 3270 | buffer->processed = 0; |
3269 | 3271 | ||
3270 | r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len); | 3272 | r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len); |
3271 | if (r) | 3273 | if (r) |
3272 | goto out; | 3274 | goto out; |
3273 | 3275 | ||
3274 | while (buffer->len) { | 3276 | while (buffer->len) { |
3275 | r = kvm_pv_mmu_op_one(vcpu, buffer); | 3277 | r = kvm_pv_mmu_op_one(vcpu, buffer); |
3276 | if (r < 0) | 3278 | if (r < 0) |
3277 | goto out; | 3279 | goto out; |
3278 | if (r == 0) | 3280 | if (r == 0) |
3279 | break; | 3281 | break; |
3280 | } | 3282 | } |
3281 | 3283 | ||
3282 | r = 1; | 3284 | r = 1; |
3283 | out: | 3285 | out: |
3284 | *ret = buffer->processed; | 3286 | *ret = buffer->processed; |
3285 | return r; | 3287 | return r; |
3286 | } | 3288 | } |
3287 | 3289 | ||
3288 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | 3290 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) |
3289 | { | 3291 | { |
3290 | struct kvm_shadow_walk_iterator iterator; | 3292 | struct kvm_shadow_walk_iterator iterator; |
3291 | int nr_sptes = 0; | 3293 | int nr_sptes = 0; |
3292 | 3294 | ||
3293 | spin_lock(&vcpu->kvm->mmu_lock); | 3295 | spin_lock(&vcpu->kvm->mmu_lock); |
3294 | for_each_shadow_entry(vcpu, addr, iterator) { | 3296 | for_each_shadow_entry(vcpu, addr, iterator) { |
3295 | sptes[iterator.level-1] = *iterator.sptep; | 3297 | sptes[iterator.level-1] = *iterator.sptep; |
3296 | nr_sptes++; | 3298 | nr_sptes++; |
3297 | if (!is_shadow_present_pte(*iterator.sptep)) | 3299 | if (!is_shadow_present_pte(*iterator.sptep)) |
3298 | break; | 3300 | break; |
3299 | } | 3301 | } |
3300 | spin_unlock(&vcpu->kvm->mmu_lock); | 3302 | spin_unlock(&vcpu->kvm->mmu_lock); |
3301 | 3303 | ||
3302 | return nr_sptes; | 3304 | return nr_sptes; |
3303 | } | 3305 | } |
3304 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); | 3306 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); |
3305 | 3307 | ||
3306 | #ifdef AUDIT | 3308 | #ifdef AUDIT |
3307 | 3309 | ||
3308 | static const char *audit_msg; | 3310 | static const char *audit_msg; |
3309 | 3311 | ||
3310 | static gva_t canonicalize(gva_t gva) | 3312 | static gva_t canonicalize(gva_t gva) |
3311 | { | 3313 | { |
3312 | #ifdef CONFIG_X86_64 | 3314 | #ifdef CONFIG_X86_64 |
3313 | gva = (long long)(gva << 16) >> 16; | 3315 | gva = (long long)(gva << 16) >> 16; |
3314 | #endif | 3316 | #endif |
3315 | return gva; | 3317 | return gva; |
3316 | } | 3318 | } |
3317 | 3319 | ||
3318 | 3320 | ||
3319 | typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); | 3321 | typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); |
3320 | 3322 | ||
3321 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | 3323 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, |
3322 | inspect_spte_fn fn) | 3324 | inspect_spte_fn fn) |
3323 | { | 3325 | { |
3324 | int i; | 3326 | int i; |
3325 | 3327 | ||
3326 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 3328 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
3327 | u64 ent = sp->spt[i]; | 3329 | u64 ent = sp->spt[i]; |
3328 | 3330 | ||
3329 | if (is_shadow_present_pte(ent)) { | 3331 | if (is_shadow_present_pte(ent)) { |
3330 | if (!is_last_spte(ent, sp->role.level)) { | 3332 | if (!is_last_spte(ent, sp->role.level)) { |
3331 | struct kvm_mmu_page *child; | 3333 | struct kvm_mmu_page *child; |
3332 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 3334 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
3333 | __mmu_spte_walk(kvm, child, fn); | 3335 | __mmu_spte_walk(kvm, child, fn); |
3334 | } else | 3336 | } else |
3335 | fn(kvm, &sp->spt[i]); | 3337 | fn(kvm, &sp->spt[i]); |
3336 | } | 3338 | } |
3337 | } | 3339 | } |
3338 | } | 3340 | } |
3339 | 3341 | ||
3340 | static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) | 3342 | static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) |
3341 | { | 3343 | { |
3342 | int i; | 3344 | int i; |
3343 | struct kvm_mmu_page *sp; | 3345 | struct kvm_mmu_page *sp; |
3344 | 3346 | ||
3345 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 3347 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
3346 | return; | 3348 | return; |
3347 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 3349 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
3348 | hpa_t root = vcpu->arch.mmu.root_hpa; | 3350 | hpa_t root = vcpu->arch.mmu.root_hpa; |
3349 | sp = page_header(root); | 3351 | sp = page_header(root); |
3350 | __mmu_spte_walk(vcpu->kvm, sp, fn); | 3352 | __mmu_spte_walk(vcpu->kvm, sp, fn); |
3351 | return; | 3353 | return; |
3352 | } | 3354 | } |
3353 | for (i = 0; i < 4; ++i) { | 3355 | for (i = 0; i < 4; ++i) { |
3354 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 3356 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
3355 | 3357 | ||
3356 | if (root && VALID_PAGE(root)) { | 3358 | if (root && VALID_PAGE(root)) { |
3357 | root &= PT64_BASE_ADDR_MASK; | 3359 | root &= PT64_BASE_ADDR_MASK; |
3358 | sp = page_header(root); | 3360 | sp = page_header(root); |
3359 | __mmu_spte_walk(vcpu->kvm, sp, fn); | 3361 | __mmu_spte_walk(vcpu->kvm, sp, fn); |
3360 | } | 3362 | } |
3361 | } | 3363 | } |
3362 | return; | 3364 | return; |
3363 | } | 3365 | } |
3364 | 3366 | ||
3365 | static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | 3367 | static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, |
3366 | gva_t va, int level) | 3368 | gva_t va, int level) |
3367 | { | 3369 | { |
3368 | u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK); | 3370 | u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK); |
3369 | int i; | 3371 | int i; |
3370 | gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1)); | 3372 | gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1)); |
3371 | 3373 | ||
3372 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { | 3374 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { |
3373 | u64 ent = pt[i]; | 3375 | u64 ent = pt[i]; |
3374 | 3376 | ||
3375 | if (ent == shadow_trap_nonpresent_pte) | 3377 | if (ent == shadow_trap_nonpresent_pte) |
3376 | continue; | 3378 | continue; |
3377 | 3379 | ||
3378 | va = canonicalize(va); | 3380 | va = canonicalize(va); |
3379 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) | 3381 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) |
3380 | audit_mappings_page(vcpu, ent, va, level - 1); | 3382 | audit_mappings_page(vcpu, ent, va, level - 1); |
3381 | else { | 3383 | else { |
3382 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL); | 3384 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL); |
3383 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3385 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3384 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); | 3386 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); |
3385 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; | 3387 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; |
3386 | 3388 | ||
3387 | if (is_error_pfn(pfn)) { | 3389 | if (is_error_pfn(pfn)) { |
3388 | kvm_release_pfn_clean(pfn); | 3390 | kvm_release_pfn_clean(pfn); |
3389 | continue; | 3391 | continue; |
3390 | } | 3392 | } |
3391 | 3393 | ||
3392 | if (is_shadow_present_pte(ent) | 3394 | if (is_shadow_present_pte(ent) |
3393 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | 3395 | && (ent & PT64_BASE_ADDR_MASK) != hpa) |
3394 | printk(KERN_ERR "xx audit error: (%s) levels %d" | 3396 | printk(KERN_ERR "xx audit error: (%s) levels %d" |
3395 | " gva %lx gpa %llx hpa %llx ent %llx %d\n", | 3397 | " gva %lx gpa %llx hpa %llx ent %llx %d\n", |
3396 | audit_msg, vcpu->arch.mmu.root_level, | 3398 | audit_msg, vcpu->arch.mmu.root_level, |
3397 | va, gpa, hpa, ent, | 3399 | va, gpa, hpa, ent, |
3398 | is_shadow_present_pte(ent)); | 3400 | is_shadow_present_pte(ent)); |
3399 | else if (ent == shadow_notrap_nonpresent_pte | 3401 | else if (ent == shadow_notrap_nonpresent_pte |
3400 | && !is_error_hpa(hpa)) | 3402 | && !is_error_hpa(hpa)) |
3401 | printk(KERN_ERR "audit: (%s) notrap shadow," | 3403 | printk(KERN_ERR "audit: (%s) notrap shadow," |
3402 | " valid guest gva %lx\n", audit_msg, va); | 3404 | " valid guest gva %lx\n", audit_msg, va); |
3403 | kvm_release_pfn_clean(pfn); | 3405 | kvm_release_pfn_clean(pfn); |
3404 | 3406 | ||
3405 | } | 3407 | } |
3406 | } | 3408 | } |
3407 | } | 3409 | } |
3408 | 3410 | ||
3409 | static void audit_mappings(struct kvm_vcpu *vcpu) | 3411 | static void audit_mappings(struct kvm_vcpu *vcpu) |
3410 | { | 3412 | { |
3411 | unsigned i; | 3413 | unsigned i; |
3412 | 3414 | ||
3413 | if (vcpu->arch.mmu.root_level == 4) | 3415 | if (vcpu->arch.mmu.root_level == 4) |
3414 | audit_mappings_page(vcpu, vcpu->arch.mmu.root_hpa, 0, 4); | 3416 | audit_mappings_page(vcpu, vcpu->arch.mmu.root_hpa, 0, 4); |
3415 | else | 3417 | else |
3416 | for (i = 0; i < 4; ++i) | 3418 | for (i = 0; i < 4; ++i) |
3417 | if (vcpu->arch.mmu.pae_root[i] & PT_PRESENT_MASK) | 3419 | if (vcpu->arch.mmu.pae_root[i] & PT_PRESENT_MASK) |
3418 | audit_mappings_page(vcpu, | 3420 | audit_mappings_page(vcpu, |
3419 | vcpu->arch.mmu.pae_root[i], | 3421 | vcpu->arch.mmu.pae_root[i], |
3420 | i << 30, | 3422 | i << 30, |
3421 | 2); | 3423 | 2); |
3422 | } | 3424 | } |
3423 | 3425 | ||
3424 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3426 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3425 | { | 3427 | { |
3426 | struct kvm *kvm = vcpu->kvm; | 3428 | struct kvm *kvm = vcpu->kvm; |
3427 | struct kvm_memslots *slots; | 3429 | struct kvm_memslots *slots; |
3428 | int nmaps = 0; | 3430 | int nmaps = 0; |
3429 | int i, j, k, idx; | 3431 | int i, j, k, idx; |
3430 | 3432 | ||
3431 | idx = srcu_read_lock(&kvm->srcu); | 3433 | idx = srcu_read_lock(&kvm->srcu); |
3432 | slots = kvm_memslots(kvm); | 3434 | slots = kvm_memslots(kvm); |
3433 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3435 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3434 | struct kvm_memory_slot *m = &slots->memslots[i]; | 3436 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3435 | struct kvm_rmap_desc *d; | 3437 | struct kvm_rmap_desc *d; |
3436 | 3438 | ||
3437 | for (j = 0; j < m->npages; ++j) { | 3439 | for (j = 0; j < m->npages; ++j) { |
3438 | unsigned long *rmapp = &m->rmap[j]; | 3440 | unsigned long *rmapp = &m->rmap[j]; |
3439 | 3441 | ||
3440 | if (!*rmapp) | 3442 | if (!*rmapp) |
3441 | continue; | 3443 | continue; |
3442 | if (!(*rmapp & 1)) { | 3444 | if (!(*rmapp & 1)) { |
3443 | ++nmaps; | 3445 | ++nmaps; |
3444 | continue; | 3446 | continue; |
3445 | } | 3447 | } |
3446 | d = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 3448 | d = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
3447 | while (d) { | 3449 | while (d) { |
3448 | for (k = 0; k < RMAP_EXT; ++k) | 3450 | for (k = 0; k < RMAP_EXT; ++k) |
3449 | if (d->sptes[k]) | 3451 | if (d->sptes[k]) |
3450 | ++nmaps; | 3452 | ++nmaps; |
3451 | else | 3453 | else |
3452 | break; | 3454 | break; |
3453 | d = d->more; | 3455 | d = d->more; |
3454 | } | 3456 | } |
3455 | } | 3457 | } |
3456 | } | 3458 | } |
3457 | srcu_read_unlock(&kvm->srcu, idx); | 3459 | srcu_read_unlock(&kvm->srcu, idx); |
3458 | return nmaps; | 3460 | return nmaps; |
3459 | } | 3461 | } |
3460 | 3462 | ||
3461 | void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | 3463 | void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
3462 | { | 3464 | { |
3463 | unsigned long *rmapp; | 3465 | unsigned long *rmapp; |
3464 | struct kvm_mmu_page *rev_sp; | 3466 | struct kvm_mmu_page *rev_sp; |
3465 | gfn_t gfn; | 3467 | gfn_t gfn; |
3466 | 3468 | ||
3467 | if (is_writable_pte(*sptep)) { | 3469 | if (is_writable_pte(*sptep)) { |
3468 | rev_sp = page_header(__pa(sptep)); | 3470 | rev_sp = page_header(__pa(sptep)); |
3469 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); | 3471 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); |
3470 | 3472 | ||
3471 | if (!gfn_to_memslot(kvm, gfn)) { | 3473 | if (!gfn_to_memslot(kvm, gfn)) { |
3472 | if (!printk_ratelimit()) | 3474 | if (!printk_ratelimit()) |
3473 | return; | 3475 | return; |
3474 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", | 3476 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", |
3475 | audit_msg, gfn); | 3477 | audit_msg, gfn); |
3476 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", | 3478 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", |
3477 | audit_msg, (long int)(sptep - rev_sp->spt), | 3479 | audit_msg, (long int)(sptep - rev_sp->spt), |
3478 | rev_sp->gfn); | 3480 | rev_sp->gfn); |
3479 | dump_stack(); | 3481 | dump_stack(); |
3480 | return; | 3482 | return; |
3481 | } | 3483 | } |
3482 | 3484 | ||
3483 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); | 3485 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); |
3484 | if (!*rmapp) { | 3486 | if (!*rmapp) { |
3485 | if (!printk_ratelimit()) | 3487 | if (!printk_ratelimit()) |
3486 | return; | 3488 | return; |
3487 | printk(KERN_ERR "%s: no rmap for writable spte %llx\n", | 3489 | printk(KERN_ERR "%s: no rmap for writable spte %llx\n", |
3488 | audit_msg, *sptep); | 3490 | audit_msg, *sptep); |
3489 | dump_stack(); | 3491 | dump_stack(); |
3490 | } | 3492 | } |
3491 | } | 3493 | } |
3492 | 3494 | ||
3493 | } | 3495 | } |
3494 | 3496 | ||
3495 | void audit_writable_sptes_have_rmaps(struct kvm_vcpu *vcpu) | 3497 | void audit_writable_sptes_have_rmaps(struct kvm_vcpu *vcpu) |
3496 | { | 3498 | { |
3497 | mmu_spte_walk(vcpu, inspect_spte_has_rmap); | 3499 | mmu_spte_walk(vcpu, inspect_spte_has_rmap); |
3498 | } | 3500 | } |
3499 | 3501 | ||
3500 | static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | 3502 | static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) |
3501 | { | 3503 | { |
3502 | struct kvm_mmu_page *sp; | 3504 | struct kvm_mmu_page *sp; |
3503 | int i; | 3505 | int i; |
3504 | 3506 | ||
3505 | list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { | 3507 | list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { |
3506 | u64 *pt = sp->spt; | 3508 | u64 *pt = sp->spt; |
3507 | 3509 | ||
3508 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) | 3510 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) |
3509 | continue; | 3511 | continue; |
3510 | 3512 | ||
3511 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 3513 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
3512 | u64 ent = pt[i]; | 3514 | u64 ent = pt[i]; |
3513 | 3515 | ||
3514 | if (!(ent & PT_PRESENT_MASK)) | 3516 | if (!(ent & PT_PRESENT_MASK)) |
3515 | continue; | 3517 | continue; |
3516 | if (!is_writable_pte(ent)) | 3518 | if (!is_writable_pte(ent)) |
3517 | continue; | 3519 | continue; |
3518 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); | 3520 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
3519 | } | 3521 | } |
3520 | } | 3522 | } |
3521 | return; | 3523 | return; |
3522 | } | 3524 | } |
3523 | 3525 | ||
3524 | static void audit_rmap(struct kvm_vcpu *vcpu) | 3526 | static void audit_rmap(struct kvm_vcpu *vcpu) |
3525 | { | 3527 | { |
3526 | check_writable_mappings_rmap(vcpu); | 3528 | check_writable_mappings_rmap(vcpu); |
3527 | count_rmaps(vcpu); | 3529 | count_rmaps(vcpu); |
3528 | } | 3530 | } |
3529 | 3531 | ||
3530 | static void audit_write_protection(struct kvm_vcpu *vcpu) | 3532 | static void audit_write_protection(struct kvm_vcpu *vcpu) |
3531 | { | 3533 | { |
3532 | struct kvm_mmu_page *sp; | 3534 | struct kvm_mmu_page *sp; |
3533 | struct kvm_memory_slot *slot; | 3535 | struct kvm_memory_slot *slot; |
3534 | unsigned long *rmapp; | 3536 | unsigned long *rmapp; |
3535 | u64 *spte; | 3537 | u64 *spte; |
3536 | gfn_t gfn; | 3538 | gfn_t gfn; |
3537 | 3539 | ||
3538 | list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { | 3540 | list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { |
3539 | if (sp->role.direct) | 3541 | if (sp->role.direct) |
3540 | continue; | 3542 | continue; |
3541 | if (sp->unsync) | 3543 | if (sp->unsync) |
3542 | continue; | 3544 | continue; |
3543 | 3545 | ||
3544 | slot = gfn_to_memslot(vcpu->kvm, sp->gfn); | 3546 | slot = gfn_to_memslot(vcpu->kvm, sp->gfn); |
3545 | rmapp = &slot->rmap[gfn - slot->base_gfn]; | 3547 | rmapp = &slot->rmap[gfn - slot->base_gfn]; |
3546 | 3548 | ||
3547 | spte = rmap_next(vcpu->kvm, rmapp, NULL); | 3549 | spte = rmap_next(vcpu->kvm, rmapp, NULL); |
3548 | while (spte) { | 3550 | while (spte) { |
3549 | if (is_writable_pte(*spte)) | 3551 | if (is_writable_pte(*spte)) |
3550 | printk(KERN_ERR "%s: (%s) shadow page has " | 3552 | printk(KERN_ERR "%s: (%s) shadow page has " |
3551 | "writable mappings: gfn %lx role %x\n", | 3553 | "writable mappings: gfn %lx role %x\n", |
3552 | __func__, audit_msg, sp->gfn, | 3554 | __func__, audit_msg, sp->gfn, |
3553 | sp->role.word); | 3555 | sp->role.word); |
3554 | spte = rmap_next(vcpu->kvm, rmapp, spte); | 3556 | spte = rmap_next(vcpu->kvm, rmapp, spte); |
3555 | } | 3557 | } |
3556 | } | 3558 | } |
3557 | } | 3559 | } |
3558 | 3560 | ||
3559 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) | 3561 | static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) |
3560 | { | 3562 | { |
3561 | int olddbg = dbg; | 3563 | int olddbg = dbg; |
3562 | 3564 | ||
3563 | dbg = 0; | 3565 | dbg = 0; |
3564 | audit_msg = msg; | 3566 | audit_msg = msg; |
3565 | audit_rmap(vcpu); | 3567 | audit_rmap(vcpu); |
3566 | audit_write_protection(vcpu); | 3568 | audit_write_protection(vcpu); |
3567 | if (strcmp("pre pte write", audit_msg) != 0) | 3569 | if (strcmp("pre pte write", audit_msg) != 0) |
3568 | audit_mappings(vcpu); | 3570 | audit_mappings(vcpu); |
3569 | audit_writable_sptes_have_rmaps(vcpu); | 3571 | audit_writable_sptes_have_rmaps(vcpu); |
3570 | dbg = olddbg; | 3572 | dbg = olddbg; |
3571 | } | 3573 | } |
3572 | 3574 | ||
3573 | #endif | 3575 | #endif |
3574 | 3576 |
include/linux/kvm_host.h
1 | #ifndef __KVM_HOST_H | 1 | #ifndef __KVM_HOST_H |
2 | #define __KVM_HOST_H | 2 | #define __KVM_HOST_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * This work is licensed under the terms of the GNU GPL, version 2. See | 5 | * This work is licensed under the terms of the GNU GPL, version 2. See |
6 | * the COPYING file in the top-level directory. | 6 | * the COPYING file in the top-level directory. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/hardirq.h> | 10 | #include <linux/hardirq.h> |
11 | #include <linux/list.h> | 11 | #include <linux/list.h> |
12 | #include <linux/mutex.h> | 12 | #include <linux/mutex.h> |
13 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
14 | #include <linux/signal.h> | 14 | #include <linux/signal.h> |
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/preempt.h> | 17 | #include <linux/preempt.h> |
18 | #include <linux/msi.h> | 18 | #include <linux/msi.h> |
19 | #include <asm/signal.h> | 19 | #include <asm/signal.h> |
20 | 20 | ||
21 | #include <linux/kvm.h> | 21 | #include <linux/kvm.h> |
22 | #include <linux/kvm_para.h> | 22 | #include <linux/kvm_para.h> |
23 | 23 | ||
24 | #include <linux/kvm_types.h> | 24 | #include <linux/kvm_types.h> |
25 | 25 | ||
26 | #include <asm/kvm_host.h> | 26 | #include <asm/kvm_host.h> |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * vcpu->requests bit members | 29 | * vcpu->requests bit members |
30 | */ | 30 | */ |
31 | #define KVM_REQ_TLB_FLUSH 0 | 31 | #define KVM_REQ_TLB_FLUSH 0 |
32 | #define KVM_REQ_MIGRATE_TIMER 1 | 32 | #define KVM_REQ_MIGRATE_TIMER 1 |
33 | #define KVM_REQ_REPORT_TPR_ACCESS 2 | 33 | #define KVM_REQ_REPORT_TPR_ACCESS 2 |
34 | #define KVM_REQ_MMU_RELOAD 3 | 34 | #define KVM_REQ_MMU_RELOAD 3 |
35 | #define KVM_REQ_TRIPLE_FAULT 4 | 35 | #define KVM_REQ_TRIPLE_FAULT 4 |
36 | #define KVM_REQ_PENDING_TIMER 5 | 36 | #define KVM_REQ_PENDING_TIMER 5 |
37 | #define KVM_REQ_UNHALT 6 | 37 | #define KVM_REQ_UNHALT 6 |
38 | #define KVM_REQ_MMU_SYNC 7 | 38 | #define KVM_REQ_MMU_SYNC 7 |
39 | #define KVM_REQ_KVMCLOCK_UPDATE 8 | 39 | #define KVM_REQ_KVMCLOCK_UPDATE 8 |
40 | #define KVM_REQ_KICK 9 | 40 | #define KVM_REQ_KICK 9 |
41 | #define KVM_REQ_DEACTIVATE_FPU 10 | 41 | #define KVM_REQ_DEACTIVATE_FPU 10 |
42 | 42 | ||
43 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 43 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
44 | 44 | ||
45 | struct kvm; | 45 | struct kvm; |
46 | struct kvm_vcpu; | 46 | struct kvm_vcpu; |
47 | extern struct kmem_cache *kvm_vcpu_cache; | 47 | extern struct kmem_cache *kvm_vcpu_cache; |
48 | 48 | ||
49 | /* | 49 | /* |
50 | * It would be nice to use something smarter than a linear search, TBD... | 50 | * It would be nice to use something smarter than a linear search, TBD... |
51 | * Thankfully we dont expect many devices to register (famous last words :), | 51 | * Thankfully we dont expect many devices to register (famous last words :), |
52 | * so until then it will suffice. At least its abstracted so we can change | 52 | * so until then it will suffice. At least its abstracted so we can change |
53 | * in one place. | 53 | * in one place. |
54 | */ | 54 | */ |
55 | struct kvm_io_bus { | 55 | struct kvm_io_bus { |
56 | int dev_count; | 56 | int dev_count; |
57 | #define NR_IOBUS_DEVS 200 | 57 | #define NR_IOBUS_DEVS 200 |
58 | struct kvm_io_device *devs[NR_IOBUS_DEVS]; | 58 | struct kvm_io_device *devs[NR_IOBUS_DEVS]; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | enum kvm_bus { | 61 | enum kvm_bus { |
62 | KVM_MMIO_BUS, | 62 | KVM_MMIO_BUS, |
63 | KVM_PIO_BUS, | 63 | KVM_PIO_BUS, |
64 | KVM_NR_BUSES | 64 | KVM_NR_BUSES |
65 | }; | 65 | }; |
66 | 66 | ||
67 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 67 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
68 | int len, const void *val); | 68 | int len, const void *val); |
69 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, | 69 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, |
70 | void *val); | 70 | void *val); |
71 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 71 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
72 | struct kvm_io_device *dev); | 72 | struct kvm_io_device *dev); |
73 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 73 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
74 | struct kvm_io_device *dev); | 74 | struct kvm_io_device *dev); |
75 | 75 | ||
76 | struct kvm_vcpu { | 76 | struct kvm_vcpu { |
77 | struct kvm *kvm; | 77 | struct kvm *kvm; |
78 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 78 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
79 | struct preempt_notifier preempt_notifier; | 79 | struct preempt_notifier preempt_notifier; |
80 | #endif | 80 | #endif |
81 | int vcpu_id; | 81 | int vcpu_id; |
82 | struct mutex mutex; | 82 | struct mutex mutex; |
83 | int cpu; | 83 | int cpu; |
84 | atomic_t guest_mode; | 84 | atomic_t guest_mode; |
85 | struct kvm_run *run; | 85 | struct kvm_run *run; |
86 | unsigned long requests; | 86 | unsigned long requests; |
87 | unsigned long guest_debug; | 87 | unsigned long guest_debug; |
88 | int srcu_idx; | 88 | int srcu_idx; |
89 | 89 | ||
90 | int fpu_active; | 90 | int fpu_active; |
91 | int guest_fpu_loaded, guest_xcr0_loaded; | 91 | int guest_fpu_loaded, guest_xcr0_loaded; |
92 | wait_queue_head_t wq; | 92 | wait_queue_head_t wq; |
93 | int sigset_active; | 93 | int sigset_active; |
94 | sigset_t sigset; | 94 | sigset_t sigset; |
95 | struct kvm_vcpu_stat stat; | 95 | struct kvm_vcpu_stat stat; |
96 | 96 | ||
97 | #ifdef CONFIG_HAS_IOMEM | 97 | #ifdef CONFIG_HAS_IOMEM |
98 | int mmio_needed; | 98 | int mmio_needed; |
99 | int mmio_read_completed; | 99 | int mmio_read_completed; |
100 | int mmio_is_write; | 100 | int mmio_is_write; |
101 | int mmio_size; | 101 | int mmio_size; |
102 | unsigned char mmio_data[8]; | 102 | unsigned char mmio_data[8]; |
103 | gpa_t mmio_phys_addr; | 103 | gpa_t mmio_phys_addr; |
104 | #endif | 104 | #endif |
105 | 105 | ||
106 | struct kvm_vcpu_arch arch; | 106 | struct kvm_vcpu_arch arch; |
107 | }; | 107 | }; |
108 | 108 | ||
109 | /* | 109 | /* |
110 | * Some of the bitops functions do not support too long bitmaps. | 110 | * Some of the bitops functions do not support too long bitmaps. |
111 | * This number must be determined not to exceed such limits. | 111 | * This number must be determined not to exceed such limits. |
112 | */ | 112 | */ |
113 | #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) | 113 | #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) |
114 | 114 | ||
115 | struct kvm_memory_slot { | 115 | struct kvm_memory_slot { |
116 | gfn_t base_gfn; | 116 | gfn_t base_gfn; |
117 | unsigned long npages; | 117 | unsigned long npages; |
118 | unsigned long flags; | 118 | unsigned long flags; |
119 | unsigned long *rmap; | 119 | unsigned long *rmap; |
120 | unsigned long *dirty_bitmap; | 120 | unsigned long *dirty_bitmap; |
121 | struct { | 121 | struct { |
122 | unsigned long rmap_pde; | 122 | unsigned long rmap_pde; |
123 | int write_count; | 123 | int write_count; |
124 | } *lpage_info[KVM_NR_PAGE_SIZES - 1]; | 124 | } *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
125 | unsigned long userspace_addr; | 125 | unsigned long userspace_addr; |
126 | int user_alloc; | 126 | int user_alloc; |
127 | int id; | 127 | int id; |
128 | }; | 128 | }; |
129 | 129 | ||
130 | static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) | 130 | static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) |
131 | { | 131 | { |
132 | return ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 132 | return ALIGN(memslot->npages, BITS_PER_LONG) / 8; |
133 | } | 133 | } |
134 | 134 | ||
135 | struct kvm_kernel_irq_routing_entry { | 135 | struct kvm_kernel_irq_routing_entry { |
136 | u32 gsi; | 136 | u32 gsi; |
137 | u32 type; | 137 | u32 type; |
138 | int (*set)(struct kvm_kernel_irq_routing_entry *e, | 138 | int (*set)(struct kvm_kernel_irq_routing_entry *e, |
139 | struct kvm *kvm, int irq_source_id, int level); | 139 | struct kvm *kvm, int irq_source_id, int level); |
140 | union { | 140 | union { |
141 | struct { | 141 | struct { |
142 | unsigned irqchip; | 142 | unsigned irqchip; |
143 | unsigned pin; | 143 | unsigned pin; |
144 | } irqchip; | 144 | } irqchip; |
145 | struct msi_msg msi; | 145 | struct msi_msg msi; |
146 | }; | 146 | }; |
147 | struct hlist_node link; | 147 | struct hlist_node link; |
148 | }; | 148 | }; |
149 | 149 | ||
150 | #ifdef __KVM_HAVE_IOAPIC | 150 | #ifdef __KVM_HAVE_IOAPIC |
151 | 151 | ||
152 | struct kvm_irq_routing_table { | 152 | struct kvm_irq_routing_table { |
153 | int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; | 153 | int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; |
154 | struct kvm_kernel_irq_routing_entry *rt_entries; | 154 | struct kvm_kernel_irq_routing_entry *rt_entries; |
155 | u32 nr_rt_entries; | 155 | u32 nr_rt_entries; |
156 | /* | 156 | /* |
157 | * Array indexed by gsi. Each entry contains list of irq chips | 157 | * Array indexed by gsi. Each entry contains list of irq chips |
158 | * the gsi is connected to. | 158 | * the gsi is connected to. |
159 | */ | 159 | */ |
160 | struct hlist_head map[0]; | 160 | struct hlist_head map[0]; |
161 | }; | 161 | }; |
162 | 162 | ||
163 | #else | 163 | #else |
164 | 164 | ||
165 | struct kvm_irq_routing_table {}; | 165 | struct kvm_irq_routing_table {}; |
166 | 166 | ||
167 | #endif | 167 | #endif |
168 | 168 | ||
169 | struct kvm_memslots { | 169 | struct kvm_memslots { |
170 | int nmemslots; | 170 | int nmemslots; |
171 | struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + | 171 | struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + |
172 | KVM_PRIVATE_MEM_SLOTS]; | 172 | KVM_PRIVATE_MEM_SLOTS]; |
173 | }; | 173 | }; |
174 | 174 | ||
175 | struct kvm { | 175 | struct kvm { |
176 | spinlock_t mmu_lock; | 176 | spinlock_t mmu_lock; |
177 | raw_spinlock_t requests_lock; | 177 | raw_spinlock_t requests_lock; |
178 | struct mutex slots_lock; | 178 | struct mutex slots_lock; |
179 | struct mm_struct *mm; /* userspace tied to this vm */ | 179 | struct mm_struct *mm; /* userspace tied to this vm */ |
180 | struct kvm_memslots *memslots; | 180 | struct kvm_memslots *memslots; |
181 | struct srcu_struct srcu; | 181 | struct srcu_struct srcu; |
182 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 182 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
183 | u32 bsp_vcpu_id; | 183 | u32 bsp_vcpu_id; |
184 | struct kvm_vcpu *bsp_vcpu; | 184 | struct kvm_vcpu *bsp_vcpu; |
185 | #endif | 185 | #endif |
186 | struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; | 186 | struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; |
187 | atomic_t online_vcpus; | 187 | atomic_t online_vcpus; |
188 | struct list_head vm_list; | 188 | struct list_head vm_list; |
189 | struct mutex lock; | 189 | struct mutex lock; |
190 | struct kvm_io_bus *buses[KVM_NR_BUSES]; | 190 | struct kvm_io_bus *buses[KVM_NR_BUSES]; |
191 | #ifdef CONFIG_HAVE_KVM_EVENTFD | 191 | #ifdef CONFIG_HAVE_KVM_EVENTFD |
192 | struct { | 192 | struct { |
193 | spinlock_t lock; | 193 | spinlock_t lock; |
194 | struct list_head items; | 194 | struct list_head items; |
195 | } irqfds; | 195 | } irqfds; |
196 | struct list_head ioeventfds; | 196 | struct list_head ioeventfds; |
197 | #endif | 197 | #endif |
198 | struct kvm_vm_stat stat; | 198 | struct kvm_vm_stat stat; |
199 | struct kvm_arch arch; | 199 | struct kvm_arch arch; |
200 | atomic_t users_count; | 200 | atomic_t users_count; |
201 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 201 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
202 | struct kvm_coalesced_mmio_dev *coalesced_mmio_dev; | 202 | struct kvm_coalesced_mmio_dev *coalesced_mmio_dev; |
203 | struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; | 203 | struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; |
204 | #endif | 204 | #endif |
205 | 205 | ||
206 | struct mutex irq_lock; | 206 | struct mutex irq_lock; |
207 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 207 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
208 | struct kvm_irq_routing_table *irq_routing; | 208 | struct kvm_irq_routing_table *irq_routing; |
209 | struct hlist_head mask_notifier_list; | 209 | struct hlist_head mask_notifier_list; |
210 | struct hlist_head irq_ack_notifier_list; | 210 | struct hlist_head irq_ack_notifier_list; |
211 | #endif | 211 | #endif |
212 | 212 | ||
213 | #ifdef KVM_ARCH_WANT_MMU_NOTIFIER | 213 | #ifdef KVM_ARCH_WANT_MMU_NOTIFIER |
214 | struct mmu_notifier mmu_notifier; | 214 | struct mmu_notifier mmu_notifier; |
215 | unsigned long mmu_notifier_seq; | 215 | unsigned long mmu_notifier_seq; |
216 | long mmu_notifier_count; | 216 | long mmu_notifier_count; |
217 | #endif | 217 | #endif |
218 | }; | 218 | }; |
219 | 219 | ||
220 | /* The guest did something we don't support. */ | 220 | /* The guest did something we don't support. */ |
221 | #define pr_unimpl(vcpu, fmt, ...) \ | 221 | #define pr_unimpl(vcpu, fmt, ...) \ |
222 | do { \ | 222 | do { \ |
223 | if (printk_ratelimit()) \ | 223 | if (printk_ratelimit()) \ |
224 | printk(KERN_ERR "kvm: %i: cpu%i " fmt, \ | 224 | printk(KERN_ERR "kvm: %i: cpu%i " fmt, \ |
225 | current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \ | 225 | current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \ |
226 | } while (0) | 226 | } while (0) |
227 | 227 | ||
228 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) | 228 | #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) |
229 | #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) | 229 | #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) |
230 | 230 | ||
231 | static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) | 231 | static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) |
232 | { | 232 | { |
233 | smp_rmb(); | 233 | smp_rmb(); |
234 | return kvm->vcpus[i]; | 234 | return kvm->vcpus[i]; |
235 | } | 235 | } |
236 | 236 | ||
237 | #define kvm_for_each_vcpu(idx, vcpup, kvm) \ | 237 | #define kvm_for_each_vcpu(idx, vcpup, kvm) \ |
238 | for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \ | 238 | for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \ |
239 | idx < atomic_read(&kvm->online_vcpus) && vcpup; \ | 239 | idx < atomic_read(&kvm->online_vcpus) && vcpup; \ |
240 | vcpup = kvm_get_vcpu(kvm, ++idx)) | 240 | vcpup = kvm_get_vcpu(kvm, ++idx)) |
241 | 241 | ||
242 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); | 242 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); |
243 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); | 243 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); |
244 | 244 | ||
245 | void vcpu_load(struct kvm_vcpu *vcpu); | 245 | void vcpu_load(struct kvm_vcpu *vcpu); |
246 | void vcpu_put(struct kvm_vcpu *vcpu); | 246 | void vcpu_put(struct kvm_vcpu *vcpu); |
247 | 247 | ||
248 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | 248 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
249 | struct module *module); | 249 | struct module *module); |
250 | void kvm_exit(void); | 250 | void kvm_exit(void); |
251 | 251 | ||
252 | void kvm_get_kvm(struct kvm *kvm); | 252 | void kvm_get_kvm(struct kvm *kvm); |
253 | void kvm_put_kvm(struct kvm *kvm); | 253 | void kvm_put_kvm(struct kvm *kvm); |
254 | 254 | ||
255 | static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) | 255 | static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) |
256 | { | 256 | { |
257 | return rcu_dereference_check(kvm->memslots, | 257 | return rcu_dereference_check(kvm->memslots, |
258 | srcu_read_lock_held(&kvm->srcu) | 258 | srcu_read_lock_held(&kvm->srcu) |
259 | || lockdep_is_held(&kvm->slots_lock)); | 259 | || lockdep_is_held(&kvm->slots_lock)); |
260 | } | 260 | } |
261 | 261 | ||
262 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) | 262 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) |
263 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) | 263 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) |
264 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | 264 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } |
265 | 265 | ||
266 | extern struct page *bad_page; | 266 | extern struct page *bad_page; |
267 | extern pfn_t bad_pfn; | 267 | extern pfn_t bad_pfn; |
268 | 268 | ||
269 | int is_error_page(struct page *page); | 269 | int is_error_page(struct page *page); |
270 | int is_error_pfn(pfn_t pfn); | 270 | int is_error_pfn(pfn_t pfn); |
271 | int is_hwpoison_pfn(pfn_t pfn); | 271 | int is_hwpoison_pfn(pfn_t pfn); |
272 | int is_fault_pfn(pfn_t pfn); | ||
272 | int kvm_is_error_hva(unsigned long addr); | 273 | int kvm_is_error_hva(unsigned long addr); |
273 | int kvm_set_memory_region(struct kvm *kvm, | 274 | int kvm_set_memory_region(struct kvm *kvm, |
274 | struct kvm_userspace_memory_region *mem, | 275 | struct kvm_userspace_memory_region *mem, |
275 | int user_alloc); | 276 | int user_alloc); |
276 | int __kvm_set_memory_region(struct kvm *kvm, | 277 | int __kvm_set_memory_region(struct kvm *kvm, |
277 | struct kvm_userspace_memory_region *mem, | 278 | struct kvm_userspace_memory_region *mem, |
278 | int user_alloc); | 279 | int user_alloc); |
279 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 280 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
280 | struct kvm_memory_slot *memslot, | 281 | struct kvm_memory_slot *memslot, |
281 | struct kvm_memory_slot old, | 282 | struct kvm_memory_slot old, |
282 | struct kvm_userspace_memory_region *mem, | 283 | struct kvm_userspace_memory_region *mem, |
283 | int user_alloc); | 284 | int user_alloc); |
284 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 285 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
285 | struct kvm_userspace_memory_region *mem, | 286 | struct kvm_userspace_memory_region *mem, |
286 | struct kvm_memory_slot old, | 287 | struct kvm_memory_slot old, |
287 | int user_alloc); | 288 | int user_alloc); |
288 | void kvm_disable_largepages(void); | 289 | void kvm_disable_largepages(void); |
289 | void kvm_arch_flush_shadow(struct kvm *kvm); | 290 | void kvm_arch_flush_shadow(struct kvm *kvm); |
290 | 291 | ||
291 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); | 292 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); |
292 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); | 293 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); |
293 | void kvm_release_page_clean(struct page *page); | 294 | void kvm_release_page_clean(struct page *page); |
294 | void kvm_release_page_dirty(struct page *page); | 295 | void kvm_release_page_dirty(struct page *page); |
295 | void kvm_set_page_dirty(struct page *page); | 296 | void kvm_set_page_dirty(struct page *page); |
296 | void kvm_set_page_accessed(struct page *page); | 297 | void kvm_set_page_accessed(struct page *page); |
297 | 298 | ||
298 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); | 299 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); |
299 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 300 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, |
300 | struct kvm_memory_slot *slot, gfn_t gfn); | 301 | struct kvm_memory_slot *slot, gfn_t gfn); |
301 | int memslot_id(struct kvm *kvm, gfn_t gfn); | 302 | int memslot_id(struct kvm *kvm, gfn_t gfn); |
302 | void kvm_release_pfn_dirty(pfn_t); | 303 | void kvm_release_pfn_dirty(pfn_t); |
303 | void kvm_release_pfn_clean(pfn_t pfn); | 304 | void kvm_release_pfn_clean(pfn_t pfn); |
304 | void kvm_set_pfn_dirty(pfn_t pfn); | 305 | void kvm_set_pfn_dirty(pfn_t pfn); |
305 | void kvm_set_pfn_accessed(pfn_t pfn); | 306 | void kvm_set_pfn_accessed(pfn_t pfn); |
306 | void kvm_get_pfn(pfn_t pfn); | 307 | void kvm_get_pfn(pfn_t pfn); |
307 | 308 | ||
308 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | 309 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, |
309 | int len); | 310 | int len); |
310 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | 311 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, |
311 | unsigned long len); | 312 | unsigned long len); |
312 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); | 313 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); |
313 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, | 314 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, |
314 | int offset, int len); | 315 | int offset, int len); |
315 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, | 316 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, |
316 | unsigned long len); | 317 | unsigned long len); |
317 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); | 318 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); |
318 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); | 319 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); |
319 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); | 320 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); |
320 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); | 321 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); |
321 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); | 322 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); |
322 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); | 323 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); |
323 | 324 | ||
324 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); | 325 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); |
325 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); | 326 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); |
326 | void kvm_resched(struct kvm_vcpu *vcpu); | 327 | void kvm_resched(struct kvm_vcpu *vcpu); |
327 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); | 328 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); |
328 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); | 329 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); |
329 | void kvm_flush_remote_tlbs(struct kvm *kvm); | 330 | void kvm_flush_remote_tlbs(struct kvm *kvm); |
330 | void kvm_reload_remote_mmus(struct kvm *kvm); | 331 | void kvm_reload_remote_mmus(struct kvm *kvm); |
331 | 332 | ||
332 | long kvm_arch_dev_ioctl(struct file *filp, | 333 | long kvm_arch_dev_ioctl(struct file *filp, |
333 | unsigned int ioctl, unsigned long arg); | 334 | unsigned int ioctl, unsigned long arg); |
334 | long kvm_arch_vcpu_ioctl(struct file *filp, | 335 | long kvm_arch_vcpu_ioctl(struct file *filp, |
335 | unsigned int ioctl, unsigned long arg); | 336 | unsigned int ioctl, unsigned long arg); |
336 | 337 | ||
337 | int kvm_dev_ioctl_check_extension(long ext); | 338 | int kvm_dev_ioctl_check_extension(long ext); |
338 | 339 | ||
339 | int kvm_get_dirty_log(struct kvm *kvm, | 340 | int kvm_get_dirty_log(struct kvm *kvm, |
340 | struct kvm_dirty_log *log, int *is_dirty); | 341 | struct kvm_dirty_log *log, int *is_dirty); |
341 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 342 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
342 | struct kvm_dirty_log *log); | 343 | struct kvm_dirty_log *log); |
343 | 344 | ||
344 | int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, | 345 | int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, |
345 | struct | 346 | struct |
346 | kvm_userspace_memory_region *mem, | 347 | kvm_userspace_memory_region *mem, |
347 | int user_alloc); | 348 | int user_alloc); |
348 | long kvm_arch_vm_ioctl(struct file *filp, | 349 | long kvm_arch_vm_ioctl(struct file *filp, |
349 | unsigned int ioctl, unsigned long arg); | 350 | unsigned int ioctl, unsigned long arg); |
350 | 351 | ||
351 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); | 352 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); |
352 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); | 353 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); |
353 | 354 | ||
354 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | 355 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, |
355 | struct kvm_translation *tr); | 356 | struct kvm_translation *tr); |
356 | 357 | ||
357 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); | 358 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); |
358 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); | 359 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); |
359 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 360 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
360 | struct kvm_sregs *sregs); | 361 | struct kvm_sregs *sregs); |
361 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 362 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
362 | struct kvm_sregs *sregs); | 363 | struct kvm_sregs *sregs); |
363 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 364 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
364 | struct kvm_mp_state *mp_state); | 365 | struct kvm_mp_state *mp_state); |
365 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | 366 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, |
366 | struct kvm_mp_state *mp_state); | 367 | struct kvm_mp_state *mp_state); |
367 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | 368 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, |
368 | struct kvm_guest_debug *dbg); | 369 | struct kvm_guest_debug *dbg); |
369 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); | 370 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); |
370 | 371 | ||
371 | int kvm_arch_init(void *opaque); | 372 | int kvm_arch_init(void *opaque); |
372 | void kvm_arch_exit(void); | 373 | void kvm_arch_exit(void); |
373 | 374 | ||
374 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); | 375 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); |
375 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); | 376 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); |
376 | 377 | ||
377 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); | 378 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); |
378 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); | 379 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); |
379 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); | 380 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); |
380 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); | 381 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); |
381 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); | 382 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); |
382 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); | 383 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); |
383 | 384 | ||
384 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); | 385 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); |
385 | int kvm_arch_hardware_enable(void *garbage); | 386 | int kvm_arch_hardware_enable(void *garbage); |
386 | void kvm_arch_hardware_disable(void *garbage); | 387 | void kvm_arch_hardware_disable(void *garbage); |
387 | int kvm_arch_hardware_setup(void); | 388 | int kvm_arch_hardware_setup(void); |
388 | void kvm_arch_hardware_unsetup(void); | 389 | void kvm_arch_hardware_unsetup(void); |
389 | void kvm_arch_check_processor_compat(void *rtn); | 390 | void kvm_arch_check_processor_compat(void *rtn); |
390 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); | 391 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); |
391 | 392 | ||
392 | void kvm_free_physmem(struct kvm *kvm); | 393 | void kvm_free_physmem(struct kvm *kvm); |
393 | 394 | ||
394 | struct kvm *kvm_arch_create_vm(void); | 395 | struct kvm *kvm_arch_create_vm(void); |
395 | void kvm_arch_destroy_vm(struct kvm *kvm); | 396 | void kvm_arch_destroy_vm(struct kvm *kvm); |
396 | void kvm_free_all_assigned_devices(struct kvm *kvm); | 397 | void kvm_free_all_assigned_devices(struct kvm *kvm); |
397 | void kvm_arch_sync_events(struct kvm *kvm); | 398 | void kvm_arch_sync_events(struct kvm *kvm); |
398 | 399 | ||
399 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); | 400 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); |
400 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); | 401 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); |
401 | 402 | ||
402 | int kvm_is_mmio_pfn(pfn_t pfn); | 403 | int kvm_is_mmio_pfn(pfn_t pfn); |
403 | 404 | ||
404 | struct kvm_irq_ack_notifier { | 405 | struct kvm_irq_ack_notifier { |
405 | struct hlist_node link; | 406 | struct hlist_node link; |
406 | unsigned gsi; | 407 | unsigned gsi; |
407 | void (*irq_acked)(struct kvm_irq_ack_notifier *kian); | 408 | void (*irq_acked)(struct kvm_irq_ack_notifier *kian); |
408 | }; | 409 | }; |
409 | 410 | ||
410 | #define KVM_ASSIGNED_MSIX_PENDING 0x1 | 411 | #define KVM_ASSIGNED_MSIX_PENDING 0x1 |
411 | struct kvm_guest_msix_entry { | 412 | struct kvm_guest_msix_entry { |
412 | u32 vector; | 413 | u32 vector; |
413 | u16 entry; | 414 | u16 entry; |
414 | u16 flags; | 415 | u16 flags; |
415 | }; | 416 | }; |
416 | 417 | ||
417 | struct kvm_assigned_dev_kernel { | 418 | struct kvm_assigned_dev_kernel { |
418 | struct kvm_irq_ack_notifier ack_notifier; | 419 | struct kvm_irq_ack_notifier ack_notifier; |
419 | struct work_struct interrupt_work; | 420 | struct work_struct interrupt_work; |
420 | struct list_head list; | 421 | struct list_head list; |
421 | int assigned_dev_id; | 422 | int assigned_dev_id; |
422 | int host_segnr; | 423 | int host_segnr; |
423 | int host_busnr; | 424 | int host_busnr; |
424 | int host_devfn; | 425 | int host_devfn; |
425 | unsigned int entries_nr; | 426 | unsigned int entries_nr; |
426 | int host_irq; | 427 | int host_irq; |
427 | bool host_irq_disabled; | 428 | bool host_irq_disabled; |
428 | struct msix_entry *host_msix_entries; | 429 | struct msix_entry *host_msix_entries; |
429 | int guest_irq; | 430 | int guest_irq; |
430 | struct kvm_guest_msix_entry *guest_msix_entries; | 431 | struct kvm_guest_msix_entry *guest_msix_entries; |
431 | unsigned long irq_requested_type; | 432 | unsigned long irq_requested_type; |
432 | int irq_source_id; | 433 | int irq_source_id; |
433 | int flags; | 434 | int flags; |
434 | struct pci_dev *dev; | 435 | struct pci_dev *dev; |
435 | struct kvm *kvm; | 436 | struct kvm *kvm; |
436 | spinlock_t assigned_dev_lock; | 437 | spinlock_t assigned_dev_lock; |
437 | }; | 438 | }; |
438 | 439 | ||
439 | struct kvm_irq_mask_notifier { | 440 | struct kvm_irq_mask_notifier { |
440 | void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); | 441 | void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); |
441 | int irq; | 442 | int irq; |
442 | struct hlist_node link; | 443 | struct hlist_node link; |
443 | }; | 444 | }; |
444 | 445 | ||
445 | void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | 446 | void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, |
446 | struct kvm_irq_mask_notifier *kimn); | 447 | struct kvm_irq_mask_notifier *kimn); |
447 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | 448 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, |
448 | struct kvm_irq_mask_notifier *kimn); | 449 | struct kvm_irq_mask_notifier *kimn); |
449 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); | 450 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); |
450 | 451 | ||
451 | #ifdef __KVM_HAVE_IOAPIC | 452 | #ifdef __KVM_HAVE_IOAPIC |
452 | void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, | 453 | void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, |
453 | union kvm_ioapic_redirect_entry *entry, | 454 | union kvm_ioapic_redirect_entry *entry, |
454 | unsigned long *deliver_bitmask); | 455 | unsigned long *deliver_bitmask); |
455 | #endif | 456 | #endif |
456 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); | 457 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); |
457 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); | 458 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); |
458 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 459 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
459 | struct kvm_irq_ack_notifier *kian); | 460 | struct kvm_irq_ack_notifier *kian); |
460 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | 461 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, |
461 | struct kvm_irq_ack_notifier *kian); | 462 | struct kvm_irq_ack_notifier *kian); |
462 | int kvm_request_irq_source_id(struct kvm *kvm); | 463 | int kvm_request_irq_source_id(struct kvm *kvm); |
463 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); | 464 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); |
464 | 465 | ||
465 | /* For vcpu->arch.iommu_flags */ | 466 | /* For vcpu->arch.iommu_flags */ |
466 | #define KVM_IOMMU_CACHE_COHERENCY 0x1 | 467 | #define KVM_IOMMU_CACHE_COHERENCY 0x1 |
467 | 468 | ||
468 | #ifdef CONFIG_IOMMU_API | 469 | #ifdef CONFIG_IOMMU_API |
469 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); | 470 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); |
470 | int kvm_iommu_map_guest(struct kvm *kvm); | 471 | int kvm_iommu_map_guest(struct kvm *kvm); |
471 | int kvm_iommu_unmap_guest(struct kvm *kvm); | 472 | int kvm_iommu_unmap_guest(struct kvm *kvm); |
472 | int kvm_assign_device(struct kvm *kvm, | 473 | int kvm_assign_device(struct kvm *kvm, |
473 | struct kvm_assigned_dev_kernel *assigned_dev); | 474 | struct kvm_assigned_dev_kernel *assigned_dev); |
474 | int kvm_deassign_device(struct kvm *kvm, | 475 | int kvm_deassign_device(struct kvm *kvm, |
475 | struct kvm_assigned_dev_kernel *assigned_dev); | 476 | struct kvm_assigned_dev_kernel *assigned_dev); |
476 | #else /* CONFIG_IOMMU_API */ | 477 | #else /* CONFIG_IOMMU_API */ |
477 | static inline int kvm_iommu_map_pages(struct kvm *kvm, | 478 | static inline int kvm_iommu_map_pages(struct kvm *kvm, |
478 | gfn_t base_gfn, | 479 | gfn_t base_gfn, |
479 | unsigned long npages) | 480 | unsigned long npages) |
480 | { | 481 | { |
481 | return 0; | 482 | return 0; |
482 | } | 483 | } |
483 | 484 | ||
484 | static inline int kvm_iommu_map_guest(struct kvm *kvm) | 485 | static inline int kvm_iommu_map_guest(struct kvm *kvm) |
485 | { | 486 | { |
486 | return -ENODEV; | 487 | return -ENODEV; |
487 | } | 488 | } |
488 | 489 | ||
489 | static inline int kvm_iommu_unmap_guest(struct kvm *kvm) | 490 | static inline int kvm_iommu_unmap_guest(struct kvm *kvm) |
490 | { | 491 | { |
491 | return 0; | 492 | return 0; |
492 | } | 493 | } |
493 | 494 | ||
494 | static inline int kvm_assign_device(struct kvm *kvm, | 495 | static inline int kvm_assign_device(struct kvm *kvm, |
495 | struct kvm_assigned_dev_kernel *assigned_dev) | 496 | struct kvm_assigned_dev_kernel *assigned_dev) |
496 | { | 497 | { |
497 | return 0; | 498 | return 0; |
498 | } | 499 | } |
499 | 500 | ||
500 | static inline int kvm_deassign_device(struct kvm *kvm, | 501 | static inline int kvm_deassign_device(struct kvm *kvm, |
501 | struct kvm_assigned_dev_kernel *assigned_dev) | 502 | struct kvm_assigned_dev_kernel *assigned_dev) |
502 | { | 503 | { |
503 | return 0; | 504 | return 0; |
504 | } | 505 | } |
505 | #endif /* CONFIG_IOMMU_API */ | 506 | #endif /* CONFIG_IOMMU_API */ |
506 | 507 | ||
507 | static inline void kvm_guest_enter(void) | 508 | static inline void kvm_guest_enter(void) |
508 | { | 509 | { |
509 | account_system_vtime(current); | 510 | account_system_vtime(current); |
510 | current->flags |= PF_VCPU; | 511 | current->flags |= PF_VCPU; |
511 | } | 512 | } |
512 | 513 | ||
513 | static inline void kvm_guest_exit(void) | 514 | static inline void kvm_guest_exit(void) |
514 | { | 515 | { |
515 | account_system_vtime(current); | 516 | account_system_vtime(current); |
516 | current->flags &= ~PF_VCPU; | 517 | current->flags &= ~PF_VCPU; |
517 | } | 518 | } |
518 | 519 | ||
519 | static inline gpa_t gfn_to_gpa(gfn_t gfn) | 520 | static inline gpa_t gfn_to_gpa(gfn_t gfn) |
520 | { | 521 | { |
521 | return (gpa_t)gfn << PAGE_SHIFT; | 522 | return (gpa_t)gfn << PAGE_SHIFT; |
522 | } | 523 | } |
523 | 524 | ||
524 | static inline hpa_t pfn_to_hpa(pfn_t pfn) | 525 | static inline hpa_t pfn_to_hpa(pfn_t pfn) |
525 | { | 526 | { |
526 | return (hpa_t)pfn << PAGE_SHIFT; | 527 | return (hpa_t)pfn << PAGE_SHIFT; |
527 | } | 528 | } |
528 | 529 | ||
529 | static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) | 530 | static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) |
530 | { | 531 | { |
531 | set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); | 532 | set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); |
532 | } | 533 | } |
533 | 534 | ||
534 | enum kvm_stat_kind { | 535 | enum kvm_stat_kind { |
535 | KVM_STAT_VM, | 536 | KVM_STAT_VM, |
536 | KVM_STAT_VCPU, | 537 | KVM_STAT_VCPU, |
537 | }; | 538 | }; |
538 | 539 | ||
539 | struct kvm_stats_debugfs_item { | 540 | struct kvm_stats_debugfs_item { |
540 | const char *name; | 541 | const char *name; |
541 | int offset; | 542 | int offset; |
542 | enum kvm_stat_kind kind; | 543 | enum kvm_stat_kind kind; |
543 | struct dentry *dentry; | 544 | struct dentry *dentry; |
544 | }; | 545 | }; |
545 | extern struct kvm_stats_debugfs_item debugfs_entries[]; | 546 | extern struct kvm_stats_debugfs_item debugfs_entries[]; |
546 | extern struct dentry *kvm_debugfs_dir; | 547 | extern struct dentry *kvm_debugfs_dir; |
547 | 548 | ||
548 | #ifdef KVM_ARCH_WANT_MMU_NOTIFIER | 549 | #ifdef KVM_ARCH_WANT_MMU_NOTIFIER |
549 | static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq) | 550 | static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq) |
550 | { | 551 | { |
551 | if (unlikely(vcpu->kvm->mmu_notifier_count)) | 552 | if (unlikely(vcpu->kvm->mmu_notifier_count)) |
552 | return 1; | 553 | return 1; |
553 | /* | 554 | /* |
554 | * Both reads happen under the mmu_lock and both values are | 555 | * Both reads happen under the mmu_lock and both values are |
555 | * modified under mmu_lock, so there's no need of smb_rmb() | 556 | * modified under mmu_lock, so there's no need of smb_rmb() |
556 | * here in between, otherwise mmu_notifier_count should be | 557 | * here in between, otherwise mmu_notifier_count should be |
557 | * read before mmu_notifier_seq, see | 558 | * read before mmu_notifier_seq, see |
558 | * mmu_notifier_invalidate_range_end write side. | 559 | * mmu_notifier_invalidate_range_end write side. |
559 | */ | 560 | */ |
560 | if (vcpu->kvm->mmu_notifier_seq != mmu_seq) | 561 | if (vcpu->kvm->mmu_notifier_seq != mmu_seq) |
561 | return 1; | 562 | return 1; |
562 | return 0; | 563 | return 0; |
563 | } | 564 | } |
564 | #endif | 565 | #endif |
565 | 566 | ||
566 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 567 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
567 | 568 | ||
568 | #define KVM_MAX_IRQ_ROUTES 1024 | 569 | #define KVM_MAX_IRQ_ROUTES 1024 |
569 | 570 | ||
570 | int kvm_setup_default_irq_routing(struct kvm *kvm); | 571 | int kvm_setup_default_irq_routing(struct kvm *kvm); |
571 | int kvm_set_irq_routing(struct kvm *kvm, | 572 | int kvm_set_irq_routing(struct kvm *kvm, |
572 | const struct kvm_irq_routing_entry *entries, | 573 | const struct kvm_irq_routing_entry *entries, |
573 | unsigned nr, | 574 | unsigned nr, |
574 | unsigned flags); | 575 | unsigned flags); |
575 | void kvm_free_irq_routing(struct kvm *kvm); | 576 | void kvm_free_irq_routing(struct kvm *kvm); |
576 | 577 | ||
577 | #else | 578 | #else |
578 | 579 | ||
579 | static inline void kvm_free_irq_routing(struct kvm *kvm) {} | 580 | static inline void kvm_free_irq_routing(struct kvm *kvm) {} |
580 | 581 | ||
581 | #endif | 582 | #endif |
582 | 583 | ||
583 | #ifdef CONFIG_HAVE_KVM_EVENTFD | 584 | #ifdef CONFIG_HAVE_KVM_EVENTFD |
584 | 585 | ||
585 | void kvm_eventfd_init(struct kvm *kvm); | 586 | void kvm_eventfd_init(struct kvm *kvm); |
586 | int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); | 587 | int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); |
587 | void kvm_irqfd_release(struct kvm *kvm); | 588 | void kvm_irqfd_release(struct kvm *kvm); |
588 | int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); | 589 | int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); |
589 | 590 | ||
590 | #else | 591 | #else |
591 | 592 | ||
592 | static inline void kvm_eventfd_init(struct kvm *kvm) {} | 593 | static inline void kvm_eventfd_init(struct kvm *kvm) {} |
593 | static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) | 594 | static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) |
594 | { | 595 | { |
595 | return -EINVAL; | 596 | return -EINVAL; |
596 | } | 597 | } |
597 | 598 | ||
598 | static inline void kvm_irqfd_release(struct kvm *kvm) {} | 599 | static inline void kvm_irqfd_release(struct kvm *kvm) {} |
599 | static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 600 | static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) |
600 | { | 601 | { |
601 | return -ENOSYS; | 602 | return -ENOSYS; |
602 | } | 603 | } |
603 | 604 | ||
604 | #endif /* CONFIG_HAVE_KVM_EVENTFD */ | 605 | #endif /* CONFIG_HAVE_KVM_EVENTFD */ |
605 | 606 | ||
606 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 607 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
607 | static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) | 608 | static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) |
608 | { | 609 | { |
609 | return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; | 610 | return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; |
610 | } | 611 | } |
611 | #endif | 612 | #endif |
612 | 613 | ||
613 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT | 614 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT |
614 | 615 | ||
615 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | 616 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, |
616 | unsigned long arg); | 617 | unsigned long arg); |
617 | 618 | ||
618 | #else | 619 | #else |
619 | 620 | ||
620 | static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | 621 | static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, |
621 | unsigned long arg) | 622 | unsigned long arg) |
622 | { | 623 | { |
623 | return -ENOTTY; | 624 | return -ENOTTY; |
624 | } | 625 | } |
625 | 626 | ||
626 | #endif | 627 | #endif |
627 | 628 | ||
628 | static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) | 629 | static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) |
629 | { | 630 | { |
630 | set_bit(req, &vcpu->requests); | 631 | set_bit(req, &vcpu->requests); |
631 | } | 632 | } |
632 | 633 | ||
633 | static inline bool kvm_make_check_request(int req, struct kvm_vcpu *vcpu) | 634 | static inline bool kvm_make_check_request(int req, struct kvm_vcpu *vcpu) |
634 | { | 635 | { |
635 | return test_and_set_bit(req, &vcpu->requests); | 636 | return test_and_set_bit(req, &vcpu->requests); |
636 | } | 637 | } |
637 | 638 | ||
638 | static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) | 639 | static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) |
639 | { | 640 | { |
640 | if (test_bit(req, &vcpu->requests)) { | 641 | if (test_bit(req, &vcpu->requests)) { |
641 | clear_bit(req, &vcpu->requests); | 642 | clear_bit(req, &vcpu->requests); |
642 | return true; | 643 | return true; |
643 | } else { | 644 | } else { |
644 | return false; | 645 | return false; |
645 | } | 646 | } |
646 | } | 647 | } |
647 | 648 | ||
648 | #endif | 649 | #endif |
649 | 650 | ||
650 | 651 |
virt/kvm/kvm_main.c
1 | /* | 1 | /* |
2 | * Kernel-based Virtual Machine driver for Linux | 2 | * Kernel-based Virtual Machine driver for Linux |
3 | * | 3 | * |
4 | * This module enables machines with Intel VT-x extensions to run virtual | 4 | * This module enables machines with Intel VT-x extensions to run virtual |
5 | * machines without emulation or binary translation. | 5 | * machines without emulation or binary translation. |
6 | * | 6 | * |
7 | * Copyright (C) 2006 Qumranet, Inc. | 7 | * Copyright (C) 2006 Qumranet, Inc. |
8 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | 8 | * Copyright 2010 Red Hat, Inc. and/or its affilates. |
9 | * | 9 | * |
10 | * Authors: | 10 | * Authors: |
11 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
12 | * Yaniv Kamay <yaniv@qumranet.com> | 12 | * Yaniv Kamay <yaniv@qumranet.com> |
13 | * | 13 | * |
14 | * This work is licensed under the terms of the GNU GPL, version 2. See | 14 | * This work is licensed under the terms of the GNU GPL, version 2. See |
15 | * the COPYING file in the top-level directory. | 15 | * the COPYING file in the top-level directory. |
16 | * | 16 | * |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include "iodev.h" | 19 | #include "iodev.h" |
20 | 20 | ||
21 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
22 | #include <linux/kvm.h> | 22 | #include <linux/kvm.h> |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
25 | #include <linux/percpu.h> | 25 | #include <linux/percpu.h> |
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/miscdevice.h> | 27 | #include <linux/miscdevice.h> |
28 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
29 | #include <linux/reboot.h> | 29 | #include <linux/reboot.h> |
30 | #include <linux/debugfs.h> | 30 | #include <linux/debugfs.h> |
31 | #include <linux/highmem.h> | 31 | #include <linux/highmem.h> |
32 | #include <linux/file.h> | 32 | #include <linux/file.h> |
33 | #include <linux/sysdev.h> | 33 | #include <linux/sysdev.h> |
34 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
35 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
36 | #include <linux/cpumask.h> | 36 | #include <linux/cpumask.h> |
37 | #include <linux/smp.h> | 37 | #include <linux/smp.h> |
38 | #include <linux/anon_inodes.h> | 38 | #include <linux/anon_inodes.h> |
39 | #include <linux/profile.h> | 39 | #include <linux/profile.h> |
40 | #include <linux/kvm_para.h> | 40 | #include <linux/kvm_para.h> |
41 | #include <linux/pagemap.h> | 41 | #include <linux/pagemap.h> |
42 | #include <linux/mman.h> | 42 | #include <linux/mman.h> |
43 | #include <linux/swap.h> | 43 | #include <linux/swap.h> |
44 | #include <linux/bitops.h> | 44 | #include <linux/bitops.h> |
45 | #include <linux/spinlock.h> | 45 | #include <linux/spinlock.h> |
46 | #include <linux/compat.h> | 46 | #include <linux/compat.h> |
47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
48 | #include <linux/hugetlb.h> | 48 | #include <linux/hugetlb.h> |
49 | #include <linux/slab.h> | 49 | #include <linux/slab.h> |
50 | 50 | ||
51 | #include <asm/processor.h> | 51 | #include <asm/processor.h> |
52 | #include <asm/io.h> | 52 | #include <asm/io.h> |
53 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
54 | #include <asm/pgtable.h> | 54 | #include <asm/pgtable.h> |
55 | #include <asm-generic/bitops/le.h> | 55 | #include <asm-generic/bitops/le.h> |
56 | 56 | ||
57 | #include "coalesced_mmio.h" | 57 | #include "coalesced_mmio.h" |
58 | 58 | ||
59 | #define CREATE_TRACE_POINTS | 59 | #define CREATE_TRACE_POINTS |
60 | #include <trace/events/kvm.h> | 60 | #include <trace/events/kvm.h> |
61 | 61 | ||
62 | MODULE_AUTHOR("Qumranet"); | 62 | MODULE_AUTHOR("Qumranet"); |
63 | MODULE_LICENSE("GPL"); | 63 | MODULE_LICENSE("GPL"); |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * Ordering of locks: | 66 | * Ordering of locks: |
67 | * | 67 | * |
68 | * kvm->lock --> kvm->slots_lock --> kvm->irq_lock | 68 | * kvm->lock --> kvm->slots_lock --> kvm->irq_lock |
69 | */ | 69 | */ |
70 | 70 | ||
71 | DEFINE_SPINLOCK(kvm_lock); | 71 | DEFINE_SPINLOCK(kvm_lock); |
72 | LIST_HEAD(vm_list); | 72 | LIST_HEAD(vm_list); |
73 | 73 | ||
74 | static cpumask_var_t cpus_hardware_enabled; | 74 | static cpumask_var_t cpus_hardware_enabled; |
75 | static int kvm_usage_count = 0; | 75 | static int kvm_usage_count = 0; |
76 | static atomic_t hardware_enable_failed; | 76 | static atomic_t hardware_enable_failed; |
77 | 77 | ||
78 | struct kmem_cache *kvm_vcpu_cache; | 78 | struct kmem_cache *kvm_vcpu_cache; |
79 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | 79 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); |
80 | 80 | ||
81 | static __read_mostly struct preempt_ops kvm_preempt_ops; | 81 | static __read_mostly struct preempt_ops kvm_preempt_ops; |
82 | 82 | ||
83 | struct dentry *kvm_debugfs_dir; | 83 | struct dentry *kvm_debugfs_dir; |
84 | 84 | ||
85 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 85 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
86 | unsigned long arg); | 86 | unsigned long arg); |
87 | static int hardware_enable_all(void); | 87 | static int hardware_enable_all(void); |
88 | static void hardware_disable_all(void); | 88 | static void hardware_disable_all(void); |
89 | 89 | ||
90 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); | 90 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); |
91 | 91 | ||
92 | static bool kvm_rebooting; | 92 | static bool kvm_rebooting; |
93 | 93 | ||
94 | static bool largepages_enabled = true; | 94 | static bool largepages_enabled = true; |
95 | 95 | ||
96 | static struct page *hwpoison_page; | 96 | static struct page *hwpoison_page; |
97 | static pfn_t hwpoison_pfn; | 97 | static pfn_t hwpoison_pfn; |
98 | 98 | ||
99 | static struct page *fault_page; | ||
100 | static pfn_t fault_pfn; | ||
101 | |||
99 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 102 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
100 | { | 103 | { |
101 | if (pfn_valid(pfn)) { | 104 | if (pfn_valid(pfn)) { |
102 | struct page *page = compound_head(pfn_to_page(pfn)); | 105 | struct page *page = compound_head(pfn_to_page(pfn)); |
103 | return PageReserved(page); | 106 | return PageReserved(page); |
104 | } | 107 | } |
105 | 108 | ||
106 | return true; | 109 | return true; |
107 | } | 110 | } |
108 | 111 | ||
109 | /* | 112 | /* |
110 | * Switches to specified vcpu, until a matching vcpu_put() | 113 | * Switches to specified vcpu, until a matching vcpu_put() |
111 | */ | 114 | */ |
112 | void vcpu_load(struct kvm_vcpu *vcpu) | 115 | void vcpu_load(struct kvm_vcpu *vcpu) |
113 | { | 116 | { |
114 | int cpu; | 117 | int cpu; |
115 | 118 | ||
116 | mutex_lock(&vcpu->mutex); | 119 | mutex_lock(&vcpu->mutex); |
117 | cpu = get_cpu(); | 120 | cpu = get_cpu(); |
118 | preempt_notifier_register(&vcpu->preempt_notifier); | 121 | preempt_notifier_register(&vcpu->preempt_notifier); |
119 | kvm_arch_vcpu_load(vcpu, cpu); | 122 | kvm_arch_vcpu_load(vcpu, cpu); |
120 | put_cpu(); | 123 | put_cpu(); |
121 | } | 124 | } |
122 | 125 | ||
123 | void vcpu_put(struct kvm_vcpu *vcpu) | 126 | void vcpu_put(struct kvm_vcpu *vcpu) |
124 | { | 127 | { |
125 | preempt_disable(); | 128 | preempt_disable(); |
126 | kvm_arch_vcpu_put(vcpu); | 129 | kvm_arch_vcpu_put(vcpu); |
127 | preempt_notifier_unregister(&vcpu->preempt_notifier); | 130 | preempt_notifier_unregister(&vcpu->preempt_notifier); |
128 | preempt_enable(); | 131 | preempt_enable(); |
129 | mutex_unlock(&vcpu->mutex); | 132 | mutex_unlock(&vcpu->mutex); |
130 | } | 133 | } |
131 | 134 | ||
132 | static void ack_flush(void *_completed) | 135 | static void ack_flush(void *_completed) |
133 | { | 136 | { |
134 | } | 137 | } |
135 | 138 | ||
136 | static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | 139 | static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) |
137 | { | 140 | { |
138 | int i, cpu, me; | 141 | int i, cpu, me; |
139 | cpumask_var_t cpus; | 142 | cpumask_var_t cpus; |
140 | bool called = true; | 143 | bool called = true; |
141 | struct kvm_vcpu *vcpu; | 144 | struct kvm_vcpu *vcpu; |
142 | 145 | ||
143 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); | 146 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); |
144 | 147 | ||
145 | raw_spin_lock(&kvm->requests_lock); | 148 | raw_spin_lock(&kvm->requests_lock); |
146 | me = smp_processor_id(); | 149 | me = smp_processor_id(); |
147 | kvm_for_each_vcpu(i, vcpu, kvm) { | 150 | kvm_for_each_vcpu(i, vcpu, kvm) { |
148 | if (kvm_make_check_request(req, vcpu)) | 151 | if (kvm_make_check_request(req, vcpu)) |
149 | continue; | 152 | continue; |
150 | cpu = vcpu->cpu; | 153 | cpu = vcpu->cpu; |
151 | if (cpus != NULL && cpu != -1 && cpu != me) | 154 | if (cpus != NULL && cpu != -1 && cpu != me) |
152 | cpumask_set_cpu(cpu, cpus); | 155 | cpumask_set_cpu(cpu, cpus); |
153 | } | 156 | } |
154 | if (unlikely(cpus == NULL)) | 157 | if (unlikely(cpus == NULL)) |
155 | smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); | 158 | smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); |
156 | else if (!cpumask_empty(cpus)) | 159 | else if (!cpumask_empty(cpus)) |
157 | smp_call_function_many(cpus, ack_flush, NULL, 1); | 160 | smp_call_function_many(cpus, ack_flush, NULL, 1); |
158 | else | 161 | else |
159 | called = false; | 162 | called = false; |
160 | raw_spin_unlock(&kvm->requests_lock); | 163 | raw_spin_unlock(&kvm->requests_lock); |
161 | free_cpumask_var(cpus); | 164 | free_cpumask_var(cpus); |
162 | return called; | 165 | return called; |
163 | } | 166 | } |
164 | 167 | ||
165 | void kvm_flush_remote_tlbs(struct kvm *kvm) | 168 | void kvm_flush_remote_tlbs(struct kvm *kvm) |
166 | { | 169 | { |
167 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) | 170 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) |
168 | ++kvm->stat.remote_tlb_flush; | 171 | ++kvm->stat.remote_tlb_flush; |
169 | } | 172 | } |
170 | 173 | ||
171 | void kvm_reload_remote_mmus(struct kvm *kvm) | 174 | void kvm_reload_remote_mmus(struct kvm *kvm) |
172 | { | 175 | { |
173 | make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); | 176 | make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); |
174 | } | 177 | } |
175 | 178 | ||
176 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 179 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
177 | { | 180 | { |
178 | struct page *page; | 181 | struct page *page; |
179 | int r; | 182 | int r; |
180 | 183 | ||
181 | mutex_init(&vcpu->mutex); | 184 | mutex_init(&vcpu->mutex); |
182 | vcpu->cpu = -1; | 185 | vcpu->cpu = -1; |
183 | vcpu->kvm = kvm; | 186 | vcpu->kvm = kvm; |
184 | vcpu->vcpu_id = id; | 187 | vcpu->vcpu_id = id; |
185 | init_waitqueue_head(&vcpu->wq); | 188 | init_waitqueue_head(&vcpu->wq); |
186 | 189 | ||
187 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 190 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
188 | if (!page) { | 191 | if (!page) { |
189 | r = -ENOMEM; | 192 | r = -ENOMEM; |
190 | goto fail; | 193 | goto fail; |
191 | } | 194 | } |
192 | vcpu->run = page_address(page); | 195 | vcpu->run = page_address(page); |
193 | 196 | ||
194 | r = kvm_arch_vcpu_init(vcpu); | 197 | r = kvm_arch_vcpu_init(vcpu); |
195 | if (r < 0) | 198 | if (r < 0) |
196 | goto fail_free_run; | 199 | goto fail_free_run; |
197 | return 0; | 200 | return 0; |
198 | 201 | ||
199 | fail_free_run: | 202 | fail_free_run: |
200 | free_page((unsigned long)vcpu->run); | 203 | free_page((unsigned long)vcpu->run); |
201 | fail: | 204 | fail: |
202 | return r; | 205 | return r; |
203 | } | 206 | } |
204 | EXPORT_SYMBOL_GPL(kvm_vcpu_init); | 207 | EXPORT_SYMBOL_GPL(kvm_vcpu_init); |
205 | 208 | ||
206 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) | 209 | void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) |
207 | { | 210 | { |
208 | kvm_arch_vcpu_uninit(vcpu); | 211 | kvm_arch_vcpu_uninit(vcpu); |
209 | free_page((unsigned long)vcpu->run); | 212 | free_page((unsigned long)vcpu->run); |
210 | } | 213 | } |
211 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); | 214 | EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); |
212 | 215 | ||
213 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 216 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
214 | static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) | 217 | static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) |
215 | { | 218 | { |
216 | return container_of(mn, struct kvm, mmu_notifier); | 219 | return container_of(mn, struct kvm, mmu_notifier); |
217 | } | 220 | } |
218 | 221 | ||
219 | static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | 222 | static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, |
220 | struct mm_struct *mm, | 223 | struct mm_struct *mm, |
221 | unsigned long address) | 224 | unsigned long address) |
222 | { | 225 | { |
223 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 226 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
224 | int need_tlb_flush, idx; | 227 | int need_tlb_flush, idx; |
225 | 228 | ||
226 | /* | 229 | /* |
227 | * When ->invalidate_page runs, the linux pte has been zapped | 230 | * When ->invalidate_page runs, the linux pte has been zapped |
228 | * already but the page is still allocated until | 231 | * already but the page is still allocated until |
229 | * ->invalidate_page returns. So if we increase the sequence | 232 | * ->invalidate_page returns. So if we increase the sequence |
230 | * here the kvm page fault will notice if the spte can't be | 233 | * here the kvm page fault will notice if the spte can't be |
231 | * established because the page is going to be freed. If | 234 | * established because the page is going to be freed. If |
232 | * instead the kvm page fault establishes the spte before | 235 | * instead the kvm page fault establishes the spte before |
233 | * ->invalidate_page runs, kvm_unmap_hva will release it | 236 | * ->invalidate_page runs, kvm_unmap_hva will release it |
234 | * before returning. | 237 | * before returning. |
235 | * | 238 | * |
236 | * The sequence increase only need to be seen at spin_unlock | 239 | * The sequence increase only need to be seen at spin_unlock |
237 | * time, and not at spin_lock time. | 240 | * time, and not at spin_lock time. |
238 | * | 241 | * |
239 | * Increasing the sequence after the spin_unlock would be | 242 | * Increasing the sequence after the spin_unlock would be |
240 | * unsafe because the kvm page fault could then establish the | 243 | * unsafe because the kvm page fault could then establish the |
241 | * pte after kvm_unmap_hva returned, without noticing the page | 244 | * pte after kvm_unmap_hva returned, without noticing the page |
242 | * is going to be freed. | 245 | * is going to be freed. |
243 | */ | 246 | */ |
244 | idx = srcu_read_lock(&kvm->srcu); | 247 | idx = srcu_read_lock(&kvm->srcu); |
245 | spin_lock(&kvm->mmu_lock); | 248 | spin_lock(&kvm->mmu_lock); |
246 | kvm->mmu_notifier_seq++; | 249 | kvm->mmu_notifier_seq++; |
247 | need_tlb_flush = kvm_unmap_hva(kvm, address); | 250 | need_tlb_flush = kvm_unmap_hva(kvm, address); |
248 | spin_unlock(&kvm->mmu_lock); | 251 | spin_unlock(&kvm->mmu_lock); |
249 | srcu_read_unlock(&kvm->srcu, idx); | 252 | srcu_read_unlock(&kvm->srcu, idx); |
250 | 253 | ||
251 | /* we've to flush the tlb before the pages can be freed */ | 254 | /* we've to flush the tlb before the pages can be freed */ |
252 | if (need_tlb_flush) | 255 | if (need_tlb_flush) |
253 | kvm_flush_remote_tlbs(kvm); | 256 | kvm_flush_remote_tlbs(kvm); |
254 | 257 | ||
255 | } | 258 | } |
256 | 259 | ||
257 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | 260 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, |
258 | struct mm_struct *mm, | 261 | struct mm_struct *mm, |
259 | unsigned long address, | 262 | unsigned long address, |
260 | pte_t pte) | 263 | pte_t pte) |
261 | { | 264 | { |
262 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 265 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
263 | int idx; | 266 | int idx; |
264 | 267 | ||
265 | idx = srcu_read_lock(&kvm->srcu); | 268 | idx = srcu_read_lock(&kvm->srcu); |
266 | spin_lock(&kvm->mmu_lock); | 269 | spin_lock(&kvm->mmu_lock); |
267 | kvm->mmu_notifier_seq++; | 270 | kvm->mmu_notifier_seq++; |
268 | kvm_set_spte_hva(kvm, address, pte); | 271 | kvm_set_spte_hva(kvm, address, pte); |
269 | spin_unlock(&kvm->mmu_lock); | 272 | spin_unlock(&kvm->mmu_lock); |
270 | srcu_read_unlock(&kvm->srcu, idx); | 273 | srcu_read_unlock(&kvm->srcu, idx); |
271 | } | 274 | } |
272 | 275 | ||
273 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | 276 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, |
274 | struct mm_struct *mm, | 277 | struct mm_struct *mm, |
275 | unsigned long start, | 278 | unsigned long start, |
276 | unsigned long end) | 279 | unsigned long end) |
277 | { | 280 | { |
278 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 281 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
279 | int need_tlb_flush = 0, idx; | 282 | int need_tlb_flush = 0, idx; |
280 | 283 | ||
281 | idx = srcu_read_lock(&kvm->srcu); | 284 | idx = srcu_read_lock(&kvm->srcu); |
282 | spin_lock(&kvm->mmu_lock); | 285 | spin_lock(&kvm->mmu_lock); |
283 | /* | 286 | /* |
284 | * The count increase must become visible at unlock time as no | 287 | * The count increase must become visible at unlock time as no |
285 | * spte can be established without taking the mmu_lock and | 288 | * spte can be established without taking the mmu_lock and |
286 | * count is also read inside the mmu_lock critical section. | 289 | * count is also read inside the mmu_lock critical section. |
287 | */ | 290 | */ |
288 | kvm->mmu_notifier_count++; | 291 | kvm->mmu_notifier_count++; |
289 | for (; start < end; start += PAGE_SIZE) | 292 | for (; start < end; start += PAGE_SIZE) |
290 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | 293 | need_tlb_flush |= kvm_unmap_hva(kvm, start); |
291 | spin_unlock(&kvm->mmu_lock); | 294 | spin_unlock(&kvm->mmu_lock); |
292 | srcu_read_unlock(&kvm->srcu, idx); | 295 | srcu_read_unlock(&kvm->srcu, idx); |
293 | 296 | ||
294 | /* we've to flush the tlb before the pages can be freed */ | 297 | /* we've to flush the tlb before the pages can be freed */ |
295 | if (need_tlb_flush) | 298 | if (need_tlb_flush) |
296 | kvm_flush_remote_tlbs(kvm); | 299 | kvm_flush_remote_tlbs(kvm); |
297 | } | 300 | } |
298 | 301 | ||
299 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | 302 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, |
300 | struct mm_struct *mm, | 303 | struct mm_struct *mm, |
301 | unsigned long start, | 304 | unsigned long start, |
302 | unsigned long end) | 305 | unsigned long end) |
303 | { | 306 | { |
304 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 307 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
305 | 308 | ||
306 | spin_lock(&kvm->mmu_lock); | 309 | spin_lock(&kvm->mmu_lock); |
307 | /* | 310 | /* |
308 | * This sequence increase will notify the kvm page fault that | 311 | * This sequence increase will notify the kvm page fault that |
309 | * the page that is going to be mapped in the spte could have | 312 | * the page that is going to be mapped in the spte could have |
310 | * been freed. | 313 | * been freed. |
311 | */ | 314 | */ |
312 | kvm->mmu_notifier_seq++; | 315 | kvm->mmu_notifier_seq++; |
313 | /* | 316 | /* |
314 | * The above sequence increase must be visible before the | 317 | * The above sequence increase must be visible before the |
315 | * below count decrease but both values are read by the kvm | 318 | * below count decrease but both values are read by the kvm |
316 | * page fault under mmu_lock spinlock so we don't need to add | 319 | * page fault under mmu_lock spinlock so we don't need to add |
317 | * a smb_wmb() here in between the two. | 320 | * a smb_wmb() here in between the two. |
318 | */ | 321 | */ |
319 | kvm->mmu_notifier_count--; | 322 | kvm->mmu_notifier_count--; |
320 | spin_unlock(&kvm->mmu_lock); | 323 | spin_unlock(&kvm->mmu_lock); |
321 | 324 | ||
322 | BUG_ON(kvm->mmu_notifier_count < 0); | 325 | BUG_ON(kvm->mmu_notifier_count < 0); |
323 | } | 326 | } |
324 | 327 | ||
325 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | 328 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, |
326 | struct mm_struct *mm, | 329 | struct mm_struct *mm, |
327 | unsigned long address) | 330 | unsigned long address) |
328 | { | 331 | { |
329 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 332 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
330 | int young, idx; | 333 | int young, idx; |
331 | 334 | ||
332 | idx = srcu_read_lock(&kvm->srcu); | 335 | idx = srcu_read_lock(&kvm->srcu); |
333 | spin_lock(&kvm->mmu_lock); | 336 | spin_lock(&kvm->mmu_lock); |
334 | young = kvm_age_hva(kvm, address); | 337 | young = kvm_age_hva(kvm, address); |
335 | spin_unlock(&kvm->mmu_lock); | 338 | spin_unlock(&kvm->mmu_lock); |
336 | srcu_read_unlock(&kvm->srcu, idx); | 339 | srcu_read_unlock(&kvm->srcu, idx); |
337 | 340 | ||
338 | if (young) | 341 | if (young) |
339 | kvm_flush_remote_tlbs(kvm); | 342 | kvm_flush_remote_tlbs(kvm); |
340 | 343 | ||
341 | return young; | 344 | return young; |
342 | } | 345 | } |
343 | 346 | ||
344 | static void kvm_mmu_notifier_release(struct mmu_notifier *mn, | 347 | static void kvm_mmu_notifier_release(struct mmu_notifier *mn, |
345 | struct mm_struct *mm) | 348 | struct mm_struct *mm) |
346 | { | 349 | { |
347 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 350 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
348 | int idx; | 351 | int idx; |
349 | 352 | ||
350 | idx = srcu_read_lock(&kvm->srcu); | 353 | idx = srcu_read_lock(&kvm->srcu); |
351 | kvm_arch_flush_shadow(kvm); | 354 | kvm_arch_flush_shadow(kvm); |
352 | srcu_read_unlock(&kvm->srcu, idx); | 355 | srcu_read_unlock(&kvm->srcu, idx); |
353 | } | 356 | } |
354 | 357 | ||
355 | static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | 358 | static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { |
356 | .invalidate_page = kvm_mmu_notifier_invalidate_page, | 359 | .invalidate_page = kvm_mmu_notifier_invalidate_page, |
357 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, | 360 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, |
358 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, | 361 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, |
359 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, | 362 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, |
360 | .change_pte = kvm_mmu_notifier_change_pte, | 363 | .change_pte = kvm_mmu_notifier_change_pte, |
361 | .release = kvm_mmu_notifier_release, | 364 | .release = kvm_mmu_notifier_release, |
362 | }; | 365 | }; |
363 | 366 | ||
364 | static int kvm_init_mmu_notifier(struct kvm *kvm) | 367 | static int kvm_init_mmu_notifier(struct kvm *kvm) |
365 | { | 368 | { |
366 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | 369 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; |
367 | return mmu_notifier_register(&kvm->mmu_notifier, current->mm); | 370 | return mmu_notifier_register(&kvm->mmu_notifier, current->mm); |
368 | } | 371 | } |
369 | 372 | ||
370 | #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ | 373 | #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ |
371 | 374 | ||
372 | static int kvm_init_mmu_notifier(struct kvm *kvm) | 375 | static int kvm_init_mmu_notifier(struct kvm *kvm) |
373 | { | 376 | { |
374 | return 0; | 377 | return 0; |
375 | } | 378 | } |
376 | 379 | ||
377 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | 380 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ |
378 | 381 | ||
379 | static struct kvm *kvm_create_vm(void) | 382 | static struct kvm *kvm_create_vm(void) |
380 | { | 383 | { |
381 | int r = 0, i; | 384 | int r = 0, i; |
382 | struct kvm *kvm = kvm_arch_create_vm(); | 385 | struct kvm *kvm = kvm_arch_create_vm(); |
383 | 386 | ||
384 | if (IS_ERR(kvm)) | 387 | if (IS_ERR(kvm)) |
385 | goto out; | 388 | goto out; |
386 | 389 | ||
387 | r = hardware_enable_all(); | 390 | r = hardware_enable_all(); |
388 | if (r) | 391 | if (r) |
389 | goto out_err_nodisable; | 392 | goto out_err_nodisable; |
390 | 393 | ||
391 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 394 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
392 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); | 395 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); |
393 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); | 396 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); |
394 | #endif | 397 | #endif |
395 | 398 | ||
396 | r = -ENOMEM; | 399 | r = -ENOMEM; |
397 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 400 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
398 | if (!kvm->memslots) | 401 | if (!kvm->memslots) |
399 | goto out_err; | 402 | goto out_err; |
400 | if (init_srcu_struct(&kvm->srcu)) | 403 | if (init_srcu_struct(&kvm->srcu)) |
401 | goto out_err; | 404 | goto out_err; |
402 | for (i = 0; i < KVM_NR_BUSES; i++) { | 405 | for (i = 0; i < KVM_NR_BUSES; i++) { |
403 | kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), | 406 | kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), |
404 | GFP_KERNEL); | 407 | GFP_KERNEL); |
405 | if (!kvm->buses[i]) { | 408 | if (!kvm->buses[i]) { |
406 | cleanup_srcu_struct(&kvm->srcu); | 409 | cleanup_srcu_struct(&kvm->srcu); |
407 | goto out_err; | 410 | goto out_err; |
408 | } | 411 | } |
409 | } | 412 | } |
410 | 413 | ||
411 | r = kvm_init_mmu_notifier(kvm); | 414 | r = kvm_init_mmu_notifier(kvm); |
412 | if (r) { | 415 | if (r) { |
413 | cleanup_srcu_struct(&kvm->srcu); | 416 | cleanup_srcu_struct(&kvm->srcu); |
414 | goto out_err; | 417 | goto out_err; |
415 | } | 418 | } |
416 | 419 | ||
417 | kvm->mm = current->mm; | 420 | kvm->mm = current->mm; |
418 | atomic_inc(&kvm->mm->mm_count); | 421 | atomic_inc(&kvm->mm->mm_count); |
419 | spin_lock_init(&kvm->mmu_lock); | 422 | spin_lock_init(&kvm->mmu_lock); |
420 | raw_spin_lock_init(&kvm->requests_lock); | 423 | raw_spin_lock_init(&kvm->requests_lock); |
421 | kvm_eventfd_init(kvm); | 424 | kvm_eventfd_init(kvm); |
422 | mutex_init(&kvm->lock); | 425 | mutex_init(&kvm->lock); |
423 | mutex_init(&kvm->irq_lock); | 426 | mutex_init(&kvm->irq_lock); |
424 | mutex_init(&kvm->slots_lock); | 427 | mutex_init(&kvm->slots_lock); |
425 | atomic_set(&kvm->users_count, 1); | 428 | atomic_set(&kvm->users_count, 1); |
426 | spin_lock(&kvm_lock); | 429 | spin_lock(&kvm_lock); |
427 | list_add(&kvm->vm_list, &vm_list); | 430 | list_add(&kvm->vm_list, &vm_list); |
428 | spin_unlock(&kvm_lock); | 431 | spin_unlock(&kvm_lock); |
429 | out: | 432 | out: |
430 | return kvm; | 433 | return kvm; |
431 | 434 | ||
432 | out_err: | 435 | out_err: |
433 | hardware_disable_all(); | 436 | hardware_disable_all(); |
434 | out_err_nodisable: | 437 | out_err_nodisable: |
435 | for (i = 0; i < KVM_NR_BUSES; i++) | 438 | for (i = 0; i < KVM_NR_BUSES; i++) |
436 | kfree(kvm->buses[i]); | 439 | kfree(kvm->buses[i]); |
437 | kfree(kvm->memslots); | 440 | kfree(kvm->memslots); |
438 | kfree(kvm); | 441 | kfree(kvm); |
439 | return ERR_PTR(r); | 442 | return ERR_PTR(r); |
440 | } | 443 | } |
441 | 444 | ||
442 | /* | 445 | /* |
443 | * Free any memory in @free but not in @dont. | 446 | * Free any memory in @free but not in @dont. |
444 | */ | 447 | */ |
445 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | 448 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, |
446 | struct kvm_memory_slot *dont) | 449 | struct kvm_memory_slot *dont) |
447 | { | 450 | { |
448 | int i; | 451 | int i; |
449 | 452 | ||
450 | if (!dont || free->rmap != dont->rmap) | 453 | if (!dont || free->rmap != dont->rmap) |
451 | vfree(free->rmap); | 454 | vfree(free->rmap); |
452 | 455 | ||
453 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 456 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
454 | vfree(free->dirty_bitmap); | 457 | vfree(free->dirty_bitmap); |
455 | 458 | ||
456 | 459 | ||
457 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 460 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
458 | if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { | 461 | if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { |
459 | vfree(free->lpage_info[i]); | 462 | vfree(free->lpage_info[i]); |
460 | free->lpage_info[i] = NULL; | 463 | free->lpage_info[i] = NULL; |
461 | } | 464 | } |
462 | } | 465 | } |
463 | 466 | ||
464 | free->npages = 0; | 467 | free->npages = 0; |
465 | free->dirty_bitmap = NULL; | 468 | free->dirty_bitmap = NULL; |
466 | free->rmap = NULL; | 469 | free->rmap = NULL; |
467 | } | 470 | } |
468 | 471 | ||
469 | void kvm_free_physmem(struct kvm *kvm) | 472 | void kvm_free_physmem(struct kvm *kvm) |
470 | { | 473 | { |
471 | int i; | 474 | int i; |
472 | struct kvm_memslots *slots = kvm->memslots; | 475 | struct kvm_memslots *slots = kvm->memslots; |
473 | 476 | ||
474 | for (i = 0; i < slots->nmemslots; ++i) | 477 | for (i = 0; i < slots->nmemslots; ++i) |
475 | kvm_free_physmem_slot(&slots->memslots[i], NULL); | 478 | kvm_free_physmem_slot(&slots->memslots[i], NULL); |
476 | 479 | ||
477 | kfree(kvm->memslots); | 480 | kfree(kvm->memslots); |
478 | } | 481 | } |
479 | 482 | ||
480 | static void kvm_destroy_vm(struct kvm *kvm) | 483 | static void kvm_destroy_vm(struct kvm *kvm) |
481 | { | 484 | { |
482 | int i; | 485 | int i; |
483 | struct mm_struct *mm = kvm->mm; | 486 | struct mm_struct *mm = kvm->mm; |
484 | 487 | ||
485 | kvm_arch_sync_events(kvm); | 488 | kvm_arch_sync_events(kvm); |
486 | spin_lock(&kvm_lock); | 489 | spin_lock(&kvm_lock); |
487 | list_del(&kvm->vm_list); | 490 | list_del(&kvm->vm_list); |
488 | spin_unlock(&kvm_lock); | 491 | spin_unlock(&kvm_lock); |
489 | kvm_free_irq_routing(kvm); | 492 | kvm_free_irq_routing(kvm); |
490 | for (i = 0; i < KVM_NR_BUSES; i++) | 493 | for (i = 0; i < KVM_NR_BUSES; i++) |
491 | kvm_io_bus_destroy(kvm->buses[i]); | 494 | kvm_io_bus_destroy(kvm->buses[i]); |
492 | kvm_coalesced_mmio_free(kvm); | 495 | kvm_coalesced_mmio_free(kvm); |
493 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 496 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
494 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | 497 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); |
495 | #else | 498 | #else |
496 | kvm_arch_flush_shadow(kvm); | 499 | kvm_arch_flush_shadow(kvm); |
497 | #endif | 500 | #endif |
498 | kvm_arch_destroy_vm(kvm); | 501 | kvm_arch_destroy_vm(kvm); |
499 | hardware_disable_all(); | 502 | hardware_disable_all(); |
500 | mmdrop(mm); | 503 | mmdrop(mm); |
501 | } | 504 | } |
502 | 505 | ||
503 | void kvm_get_kvm(struct kvm *kvm) | 506 | void kvm_get_kvm(struct kvm *kvm) |
504 | { | 507 | { |
505 | atomic_inc(&kvm->users_count); | 508 | atomic_inc(&kvm->users_count); |
506 | } | 509 | } |
507 | EXPORT_SYMBOL_GPL(kvm_get_kvm); | 510 | EXPORT_SYMBOL_GPL(kvm_get_kvm); |
508 | 511 | ||
509 | void kvm_put_kvm(struct kvm *kvm) | 512 | void kvm_put_kvm(struct kvm *kvm) |
510 | { | 513 | { |
511 | if (atomic_dec_and_test(&kvm->users_count)) | 514 | if (atomic_dec_and_test(&kvm->users_count)) |
512 | kvm_destroy_vm(kvm); | 515 | kvm_destroy_vm(kvm); |
513 | } | 516 | } |
514 | EXPORT_SYMBOL_GPL(kvm_put_kvm); | 517 | EXPORT_SYMBOL_GPL(kvm_put_kvm); |
515 | 518 | ||
516 | 519 | ||
517 | static int kvm_vm_release(struct inode *inode, struct file *filp) | 520 | static int kvm_vm_release(struct inode *inode, struct file *filp) |
518 | { | 521 | { |
519 | struct kvm *kvm = filp->private_data; | 522 | struct kvm *kvm = filp->private_data; |
520 | 523 | ||
521 | kvm_irqfd_release(kvm); | 524 | kvm_irqfd_release(kvm); |
522 | 525 | ||
523 | kvm_put_kvm(kvm); | 526 | kvm_put_kvm(kvm); |
524 | return 0; | 527 | return 0; |
525 | } | 528 | } |
526 | 529 | ||
527 | /* | 530 | /* |
528 | * Allocate some memory and give it an address in the guest physical address | 531 | * Allocate some memory and give it an address in the guest physical address |
529 | * space. | 532 | * space. |
530 | * | 533 | * |
531 | * Discontiguous memory is allowed, mostly for framebuffers. | 534 | * Discontiguous memory is allowed, mostly for framebuffers. |
532 | * | 535 | * |
533 | * Must be called holding mmap_sem for write. | 536 | * Must be called holding mmap_sem for write. |
534 | */ | 537 | */ |
535 | int __kvm_set_memory_region(struct kvm *kvm, | 538 | int __kvm_set_memory_region(struct kvm *kvm, |
536 | struct kvm_userspace_memory_region *mem, | 539 | struct kvm_userspace_memory_region *mem, |
537 | int user_alloc) | 540 | int user_alloc) |
538 | { | 541 | { |
539 | int r, flush_shadow = 0; | 542 | int r, flush_shadow = 0; |
540 | gfn_t base_gfn; | 543 | gfn_t base_gfn; |
541 | unsigned long npages; | 544 | unsigned long npages; |
542 | unsigned long i; | 545 | unsigned long i; |
543 | struct kvm_memory_slot *memslot; | 546 | struct kvm_memory_slot *memslot; |
544 | struct kvm_memory_slot old, new; | 547 | struct kvm_memory_slot old, new; |
545 | struct kvm_memslots *slots, *old_memslots; | 548 | struct kvm_memslots *slots, *old_memslots; |
546 | 549 | ||
547 | r = -EINVAL; | 550 | r = -EINVAL; |
548 | /* General sanity checks */ | 551 | /* General sanity checks */ |
549 | if (mem->memory_size & (PAGE_SIZE - 1)) | 552 | if (mem->memory_size & (PAGE_SIZE - 1)) |
550 | goto out; | 553 | goto out; |
551 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) | 554 | if (mem->guest_phys_addr & (PAGE_SIZE - 1)) |
552 | goto out; | 555 | goto out; |
553 | if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) | 556 | if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) |
554 | goto out; | 557 | goto out; |
555 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | 558 | if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) |
556 | goto out; | 559 | goto out; |
557 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) | 560 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) |
558 | goto out; | 561 | goto out; |
559 | 562 | ||
560 | memslot = &kvm->memslots->memslots[mem->slot]; | 563 | memslot = &kvm->memslots->memslots[mem->slot]; |
561 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 564 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
562 | npages = mem->memory_size >> PAGE_SHIFT; | 565 | npages = mem->memory_size >> PAGE_SHIFT; |
563 | 566 | ||
564 | r = -EINVAL; | 567 | r = -EINVAL; |
565 | if (npages > KVM_MEM_MAX_NR_PAGES) | 568 | if (npages > KVM_MEM_MAX_NR_PAGES) |
566 | goto out; | 569 | goto out; |
567 | 570 | ||
568 | if (!npages) | 571 | if (!npages) |
569 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; | 572 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; |
570 | 573 | ||
571 | new = old = *memslot; | 574 | new = old = *memslot; |
572 | 575 | ||
573 | new.id = mem->slot; | 576 | new.id = mem->slot; |
574 | new.base_gfn = base_gfn; | 577 | new.base_gfn = base_gfn; |
575 | new.npages = npages; | 578 | new.npages = npages; |
576 | new.flags = mem->flags; | 579 | new.flags = mem->flags; |
577 | 580 | ||
578 | /* Disallow changing a memory slot's size. */ | 581 | /* Disallow changing a memory slot's size. */ |
579 | r = -EINVAL; | 582 | r = -EINVAL; |
580 | if (npages && old.npages && npages != old.npages) | 583 | if (npages && old.npages && npages != old.npages) |
581 | goto out_free; | 584 | goto out_free; |
582 | 585 | ||
583 | /* Check for overlaps */ | 586 | /* Check for overlaps */ |
584 | r = -EEXIST; | 587 | r = -EEXIST; |
585 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 588 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
586 | struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; | 589 | struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; |
587 | 590 | ||
588 | if (s == memslot || !s->npages) | 591 | if (s == memslot || !s->npages) |
589 | continue; | 592 | continue; |
590 | if (!((base_gfn + npages <= s->base_gfn) || | 593 | if (!((base_gfn + npages <= s->base_gfn) || |
591 | (base_gfn >= s->base_gfn + s->npages))) | 594 | (base_gfn >= s->base_gfn + s->npages))) |
592 | goto out_free; | 595 | goto out_free; |
593 | } | 596 | } |
594 | 597 | ||
595 | /* Free page dirty bitmap if unneeded */ | 598 | /* Free page dirty bitmap if unneeded */ |
596 | if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) | 599 | if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) |
597 | new.dirty_bitmap = NULL; | 600 | new.dirty_bitmap = NULL; |
598 | 601 | ||
599 | r = -ENOMEM; | 602 | r = -ENOMEM; |
600 | 603 | ||
601 | /* Allocate if a slot is being created */ | 604 | /* Allocate if a slot is being created */ |
602 | #ifndef CONFIG_S390 | 605 | #ifndef CONFIG_S390 |
603 | if (npages && !new.rmap) { | 606 | if (npages && !new.rmap) { |
604 | new.rmap = vmalloc(npages * sizeof(*new.rmap)); | 607 | new.rmap = vmalloc(npages * sizeof(*new.rmap)); |
605 | 608 | ||
606 | if (!new.rmap) | 609 | if (!new.rmap) |
607 | goto out_free; | 610 | goto out_free; |
608 | 611 | ||
609 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); | 612 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); |
610 | 613 | ||
611 | new.user_alloc = user_alloc; | 614 | new.user_alloc = user_alloc; |
612 | new.userspace_addr = mem->userspace_addr; | 615 | new.userspace_addr = mem->userspace_addr; |
613 | } | 616 | } |
614 | if (!npages) | 617 | if (!npages) |
615 | goto skip_lpage; | 618 | goto skip_lpage; |
616 | 619 | ||
617 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 620 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
618 | unsigned long ugfn; | 621 | unsigned long ugfn; |
619 | unsigned long j; | 622 | unsigned long j; |
620 | int lpages; | 623 | int lpages; |
621 | int level = i + 2; | 624 | int level = i + 2; |
622 | 625 | ||
623 | /* Avoid unused variable warning if no large pages */ | 626 | /* Avoid unused variable warning if no large pages */ |
624 | (void)level; | 627 | (void)level; |
625 | 628 | ||
626 | if (new.lpage_info[i]) | 629 | if (new.lpage_info[i]) |
627 | continue; | 630 | continue; |
628 | 631 | ||
629 | lpages = 1 + ((base_gfn + npages - 1) | 632 | lpages = 1 + ((base_gfn + npages - 1) |
630 | >> KVM_HPAGE_GFN_SHIFT(level)); | 633 | >> KVM_HPAGE_GFN_SHIFT(level)); |
631 | lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); | 634 | lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); |
632 | 635 | ||
633 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); | 636 | new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); |
634 | 637 | ||
635 | if (!new.lpage_info[i]) | 638 | if (!new.lpage_info[i]) |
636 | goto out_free; | 639 | goto out_free; |
637 | 640 | ||
638 | memset(new.lpage_info[i], 0, | 641 | memset(new.lpage_info[i], 0, |
639 | lpages * sizeof(*new.lpage_info[i])); | 642 | lpages * sizeof(*new.lpage_info[i])); |
640 | 643 | ||
641 | if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) | 644 | if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) |
642 | new.lpage_info[i][0].write_count = 1; | 645 | new.lpage_info[i][0].write_count = 1; |
643 | if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) | 646 | if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) |
644 | new.lpage_info[i][lpages - 1].write_count = 1; | 647 | new.lpage_info[i][lpages - 1].write_count = 1; |
645 | ugfn = new.userspace_addr >> PAGE_SHIFT; | 648 | ugfn = new.userspace_addr >> PAGE_SHIFT; |
646 | /* | 649 | /* |
647 | * If the gfn and userspace address are not aligned wrt each | 650 | * If the gfn and userspace address are not aligned wrt each |
648 | * other, or if explicitly asked to, disable large page | 651 | * other, or if explicitly asked to, disable large page |
649 | * support for this slot | 652 | * support for this slot |
650 | */ | 653 | */ |
651 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || | 654 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || |
652 | !largepages_enabled) | 655 | !largepages_enabled) |
653 | for (j = 0; j < lpages; ++j) | 656 | for (j = 0; j < lpages; ++j) |
654 | new.lpage_info[i][j].write_count = 1; | 657 | new.lpage_info[i][j].write_count = 1; |
655 | } | 658 | } |
656 | 659 | ||
657 | skip_lpage: | 660 | skip_lpage: |
658 | 661 | ||
659 | /* Allocate page dirty bitmap if needed */ | 662 | /* Allocate page dirty bitmap if needed */ |
660 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 663 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
661 | unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); | 664 | unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); |
662 | 665 | ||
663 | new.dirty_bitmap = vmalloc(dirty_bytes); | 666 | new.dirty_bitmap = vmalloc(dirty_bytes); |
664 | if (!new.dirty_bitmap) | 667 | if (!new.dirty_bitmap) |
665 | goto out_free; | 668 | goto out_free; |
666 | memset(new.dirty_bitmap, 0, dirty_bytes); | 669 | memset(new.dirty_bitmap, 0, dirty_bytes); |
667 | /* destroy any largepage mappings for dirty tracking */ | 670 | /* destroy any largepage mappings for dirty tracking */ |
668 | if (old.npages) | 671 | if (old.npages) |
669 | flush_shadow = 1; | 672 | flush_shadow = 1; |
670 | } | 673 | } |
671 | #else /* not defined CONFIG_S390 */ | 674 | #else /* not defined CONFIG_S390 */ |
672 | new.user_alloc = user_alloc; | 675 | new.user_alloc = user_alloc; |
673 | if (user_alloc) | 676 | if (user_alloc) |
674 | new.userspace_addr = mem->userspace_addr; | 677 | new.userspace_addr = mem->userspace_addr; |
675 | #endif /* not defined CONFIG_S390 */ | 678 | #endif /* not defined CONFIG_S390 */ |
676 | 679 | ||
677 | if (!npages) { | 680 | if (!npages) { |
678 | r = -ENOMEM; | 681 | r = -ENOMEM; |
679 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 682 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
680 | if (!slots) | 683 | if (!slots) |
681 | goto out_free; | 684 | goto out_free; |
682 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 685 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
683 | if (mem->slot >= slots->nmemslots) | 686 | if (mem->slot >= slots->nmemslots) |
684 | slots->nmemslots = mem->slot + 1; | 687 | slots->nmemslots = mem->slot + 1; |
685 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; | 688 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; |
686 | 689 | ||
687 | old_memslots = kvm->memslots; | 690 | old_memslots = kvm->memslots; |
688 | rcu_assign_pointer(kvm->memslots, slots); | 691 | rcu_assign_pointer(kvm->memslots, slots); |
689 | synchronize_srcu_expedited(&kvm->srcu); | 692 | synchronize_srcu_expedited(&kvm->srcu); |
690 | /* From this point no new shadow pages pointing to a deleted | 693 | /* From this point no new shadow pages pointing to a deleted |
691 | * memslot will be created. | 694 | * memslot will be created. |
692 | * | 695 | * |
693 | * validation of sp->gfn happens in: | 696 | * validation of sp->gfn happens in: |
694 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) | 697 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) |
695 | * - kvm_is_visible_gfn (mmu_check_roots) | 698 | * - kvm_is_visible_gfn (mmu_check_roots) |
696 | */ | 699 | */ |
697 | kvm_arch_flush_shadow(kvm); | 700 | kvm_arch_flush_shadow(kvm); |
698 | kfree(old_memslots); | 701 | kfree(old_memslots); |
699 | } | 702 | } |
700 | 703 | ||
701 | r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); | 704 | r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); |
702 | if (r) | 705 | if (r) |
703 | goto out_free; | 706 | goto out_free; |
704 | 707 | ||
705 | #ifdef CONFIG_DMAR | 708 | #ifdef CONFIG_DMAR |
706 | /* map the pages in iommu page table */ | 709 | /* map the pages in iommu page table */ |
707 | if (npages) { | 710 | if (npages) { |
708 | r = kvm_iommu_map_pages(kvm, &new); | 711 | r = kvm_iommu_map_pages(kvm, &new); |
709 | if (r) | 712 | if (r) |
710 | goto out_free; | 713 | goto out_free; |
711 | } | 714 | } |
712 | #endif | 715 | #endif |
713 | 716 | ||
714 | r = -ENOMEM; | 717 | r = -ENOMEM; |
715 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 718 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
716 | if (!slots) | 719 | if (!slots) |
717 | goto out_free; | 720 | goto out_free; |
718 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 721 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
719 | if (mem->slot >= slots->nmemslots) | 722 | if (mem->slot >= slots->nmemslots) |
720 | slots->nmemslots = mem->slot + 1; | 723 | slots->nmemslots = mem->slot + 1; |
721 | 724 | ||
722 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | 725 | /* actual memory is freed via old in kvm_free_physmem_slot below */ |
723 | if (!npages) { | 726 | if (!npages) { |
724 | new.rmap = NULL; | 727 | new.rmap = NULL; |
725 | new.dirty_bitmap = NULL; | 728 | new.dirty_bitmap = NULL; |
726 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) | 729 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) |
727 | new.lpage_info[i] = NULL; | 730 | new.lpage_info[i] = NULL; |
728 | } | 731 | } |
729 | 732 | ||
730 | slots->memslots[mem->slot] = new; | 733 | slots->memslots[mem->slot] = new; |
731 | old_memslots = kvm->memslots; | 734 | old_memslots = kvm->memslots; |
732 | rcu_assign_pointer(kvm->memslots, slots); | 735 | rcu_assign_pointer(kvm->memslots, slots); |
733 | synchronize_srcu_expedited(&kvm->srcu); | 736 | synchronize_srcu_expedited(&kvm->srcu); |
734 | 737 | ||
735 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); | 738 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); |
736 | 739 | ||
737 | kvm_free_physmem_slot(&old, &new); | 740 | kvm_free_physmem_slot(&old, &new); |
738 | kfree(old_memslots); | 741 | kfree(old_memslots); |
739 | 742 | ||
740 | if (flush_shadow) | 743 | if (flush_shadow) |
741 | kvm_arch_flush_shadow(kvm); | 744 | kvm_arch_flush_shadow(kvm); |
742 | 745 | ||
743 | return 0; | 746 | return 0; |
744 | 747 | ||
745 | out_free: | 748 | out_free: |
746 | kvm_free_physmem_slot(&new, &old); | 749 | kvm_free_physmem_slot(&new, &old); |
747 | out: | 750 | out: |
748 | return r; | 751 | return r; |
749 | 752 | ||
750 | } | 753 | } |
751 | EXPORT_SYMBOL_GPL(__kvm_set_memory_region); | 754 | EXPORT_SYMBOL_GPL(__kvm_set_memory_region); |
752 | 755 | ||
753 | int kvm_set_memory_region(struct kvm *kvm, | 756 | int kvm_set_memory_region(struct kvm *kvm, |
754 | struct kvm_userspace_memory_region *mem, | 757 | struct kvm_userspace_memory_region *mem, |
755 | int user_alloc) | 758 | int user_alloc) |
756 | { | 759 | { |
757 | int r; | 760 | int r; |
758 | 761 | ||
759 | mutex_lock(&kvm->slots_lock); | 762 | mutex_lock(&kvm->slots_lock); |
760 | r = __kvm_set_memory_region(kvm, mem, user_alloc); | 763 | r = __kvm_set_memory_region(kvm, mem, user_alloc); |
761 | mutex_unlock(&kvm->slots_lock); | 764 | mutex_unlock(&kvm->slots_lock); |
762 | return r; | 765 | return r; |
763 | } | 766 | } |
764 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); | 767 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); |
765 | 768 | ||
766 | int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, | 769 | int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, |
767 | struct | 770 | struct |
768 | kvm_userspace_memory_region *mem, | 771 | kvm_userspace_memory_region *mem, |
769 | int user_alloc) | 772 | int user_alloc) |
770 | { | 773 | { |
771 | if (mem->slot >= KVM_MEMORY_SLOTS) | 774 | if (mem->slot >= KVM_MEMORY_SLOTS) |
772 | return -EINVAL; | 775 | return -EINVAL; |
773 | return kvm_set_memory_region(kvm, mem, user_alloc); | 776 | return kvm_set_memory_region(kvm, mem, user_alloc); |
774 | } | 777 | } |
775 | 778 | ||
776 | int kvm_get_dirty_log(struct kvm *kvm, | 779 | int kvm_get_dirty_log(struct kvm *kvm, |
777 | struct kvm_dirty_log *log, int *is_dirty) | 780 | struct kvm_dirty_log *log, int *is_dirty) |
778 | { | 781 | { |
779 | struct kvm_memory_slot *memslot; | 782 | struct kvm_memory_slot *memslot; |
780 | int r, i; | 783 | int r, i; |
781 | unsigned long n; | 784 | unsigned long n; |
782 | unsigned long any = 0; | 785 | unsigned long any = 0; |
783 | 786 | ||
784 | r = -EINVAL; | 787 | r = -EINVAL; |
785 | if (log->slot >= KVM_MEMORY_SLOTS) | 788 | if (log->slot >= KVM_MEMORY_SLOTS) |
786 | goto out; | 789 | goto out; |
787 | 790 | ||
788 | memslot = &kvm->memslots->memslots[log->slot]; | 791 | memslot = &kvm->memslots->memslots[log->slot]; |
789 | r = -ENOENT; | 792 | r = -ENOENT; |
790 | if (!memslot->dirty_bitmap) | 793 | if (!memslot->dirty_bitmap) |
791 | goto out; | 794 | goto out; |
792 | 795 | ||
793 | n = kvm_dirty_bitmap_bytes(memslot); | 796 | n = kvm_dirty_bitmap_bytes(memslot); |
794 | 797 | ||
795 | for (i = 0; !any && i < n/sizeof(long); ++i) | 798 | for (i = 0; !any && i < n/sizeof(long); ++i) |
796 | any = memslot->dirty_bitmap[i]; | 799 | any = memslot->dirty_bitmap[i]; |
797 | 800 | ||
798 | r = -EFAULT; | 801 | r = -EFAULT; |
799 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) | 802 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) |
800 | goto out; | 803 | goto out; |
801 | 804 | ||
802 | if (any) | 805 | if (any) |
803 | *is_dirty = 1; | 806 | *is_dirty = 1; |
804 | 807 | ||
805 | r = 0; | 808 | r = 0; |
806 | out: | 809 | out: |
807 | return r; | 810 | return r; |
808 | } | 811 | } |
809 | 812 | ||
810 | void kvm_disable_largepages(void) | 813 | void kvm_disable_largepages(void) |
811 | { | 814 | { |
812 | largepages_enabled = false; | 815 | largepages_enabled = false; |
813 | } | 816 | } |
814 | EXPORT_SYMBOL_GPL(kvm_disable_largepages); | 817 | EXPORT_SYMBOL_GPL(kvm_disable_largepages); |
815 | 818 | ||
816 | int is_error_page(struct page *page) | 819 | int is_error_page(struct page *page) |
817 | { | 820 | { |
818 | return page == bad_page || page == hwpoison_page; | 821 | return page == bad_page || page == hwpoison_page || page == fault_page; |
819 | } | 822 | } |
820 | EXPORT_SYMBOL_GPL(is_error_page); | 823 | EXPORT_SYMBOL_GPL(is_error_page); |
821 | 824 | ||
822 | int is_error_pfn(pfn_t pfn) | 825 | int is_error_pfn(pfn_t pfn) |
823 | { | 826 | { |
824 | return pfn == bad_pfn || pfn == hwpoison_pfn; | 827 | return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn; |
825 | } | 828 | } |
826 | EXPORT_SYMBOL_GPL(is_error_pfn); | 829 | EXPORT_SYMBOL_GPL(is_error_pfn); |
827 | 830 | ||
828 | int is_hwpoison_pfn(pfn_t pfn) | 831 | int is_hwpoison_pfn(pfn_t pfn) |
829 | { | 832 | { |
830 | return pfn == hwpoison_pfn; | 833 | return pfn == hwpoison_pfn; |
831 | } | 834 | } |
832 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); | 835 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); |
833 | 836 | ||
837 | int is_fault_pfn(pfn_t pfn) | ||
838 | { | ||
839 | return pfn == fault_pfn; | ||
840 | } | ||
841 | EXPORT_SYMBOL_GPL(is_fault_pfn); | ||
842 | |||
834 | static inline unsigned long bad_hva(void) | 843 | static inline unsigned long bad_hva(void) |
835 | { | 844 | { |
836 | return PAGE_OFFSET; | 845 | return PAGE_OFFSET; |
837 | } | 846 | } |
838 | 847 | ||
839 | int kvm_is_error_hva(unsigned long addr) | 848 | int kvm_is_error_hva(unsigned long addr) |
840 | { | 849 | { |
841 | return addr == bad_hva(); | 850 | return addr == bad_hva(); |
842 | } | 851 | } |
843 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); | 852 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); |
844 | 853 | ||
845 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 854 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) |
846 | { | 855 | { |
847 | int i; | 856 | int i; |
848 | struct kvm_memslots *slots = kvm_memslots(kvm); | 857 | struct kvm_memslots *slots = kvm_memslots(kvm); |
849 | 858 | ||
850 | for (i = 0; i < slots->nmemslots; ++i) { | 859 | for (i = 0; i < slots->nmemslots; ++i) { |
851 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 860 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
852 | 861 | ||
853 | if (gfn >= memslot->base_gfn | 862 | if (gfn >= memslot->base_gfn |
854 | && gfn < memslot->base_gfn + memslot->npages) | 863 | && gfn < memslot->base_gfn + memslot->npages) |
855 | return memslot; | 864 | return memslot; |
856 | } | 865 | } |
857 | return NULL; | 866 | return NULL; |
858 | } | 867 | } |
859 | EXPORT_SYMBOL_GPL(gfn_to_memslot); | 868 | EXPORT_SYMBOL_GPL(gfn_to_memslot); |
860 | 869 | ||
861 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 870 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
862 | { | 871 | { |
863 | int i; | 872 | int i; |
864 | struct kvm_memslots *slots = kvm_memslots(kvm); | 873 | struct kvm_memslots *slots = kvm_memslots(kvm); |
865 | 874 | ||
866 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 875 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
867 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 876 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
868 | 877 | ||
869 | if (memslot->flags & KVM_MEMSLOT_INVALID) | 878 | if (memslot->flags & KVM_MEMSLOT_INVALID) |
870 | continue; | 879 | continue; |
871 | 880 | ||
872 | if (gfn >= memslot->base_gfn | 881 | if (gfn >= memslot->base_gfn |
873 | && gfn < memslot->base_gfn + memslot->npages) | 882 | && gfn < memslot->base_gfn + memslot->npages) |
874 | return 1; | 883 | return 1; |
875 | } | 884 | } |
876 | return 0; | 885 | return 0; |
877 | } | 886 | } |
878 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 887 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
879 | 888 | ||
880 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) | 889 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) |
881 | { | 890 | { |
882 | struct vm_area_struct *vma; | 891 | struct vm_area_struct *vma; |
883 | unsigned long addr, size; | 892 | unsigned long addr, size; |
884 | 893 | ||
885 | size = PAGE_SIZE; | 894 | size = PAGE_SIZE; |
886 | 895 | ||
887 | addr = gfn_to_hva(kvm, gfn); | 896 | addr = gfn_to_hva(kvm, gfn); |
888 | if (kvm_is_error_hva(addr)) | 897 | if (kvm_is_error_hva(addr)) |
889 | return PAGE_SIZE; | 898 | return PAGE_SIZE; |
890 | 899 | ||
891 | down_read(¤t->mm->mmap_sem); | 900 | down_read(¤t->mm->mmap_sem); |
892 | vma = find_vma(current->mm, addr); | 901 | vma = find_vma(current->mm, addr); |
893 | if (!vma) | 902 | if (!vma) |
894 | goto out; | 903 | goto out; |
895 | 904 | ||
896 | size = vma_kernel_pagesize(vma); | 905 | size = vma_kernel_pagesize(vma); |
897 | 906 | ||
898 | out: | 907 | out: |
899 | up_read(¤t->mm->mmap_sem); | 908 | up_read(¤t->mm->mmap_sem); |
900 | 909 | ||
901 | return size; | 910 | return size; |
902 | } | 911 | } |
903 | 912 | ||
904 | int memslot_id(struct kvm *kvm, gfn_t gfn) | 913 | int memslot_id(struct kvm *kvm, gfn_t gfn) |
905 | { | 914 | { |
906 | int i; | 915 | int i; |
907 | struct kvm_memslots *slots = kvm_memslots(kvm); | 916 | struct kvm_memslots *slots = kvm_memslots(kvm); |
908 | struct kvm_memory_slot *memslot = NULL; | 917 | struct kvm_memory_slot *memslot = NULL; |
909 | 918 | ||
910 | for (i = 0; i < slots->nmemslots; ++i) { | 919 | for (i = 0; i < slots->nmemslots; ++i) { |
911 | memslot = &slots->memslots[i]; | 920 | memslot = &slots->memslots[i]; |
912 | 921 | ||
913 | if (gfn >= memslot->base_gfn | 922 | if (gfn >= memslot->base_gfn |
914 | && gfn < memslot->base_gfn + memslot->npages) | 923 | && gfn < memslot->base_gfn + memslot->npages) |
915 | break; | 924 | break; |
916 | } | 925 | } |
917 | 926 | ||
918 | return memslot - slots->memslots; | 927 | return memslot - slots->memslots; |
919 | } | 928 | } |
920 | 929 | ||
921 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | 930 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) |
922 | { | 931 | { |
923 | return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; | 932 | return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; |
924 | } | 933 | } |
925 | 934 | ||
926 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 935 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
927 | { | 936 | { |
928 | struct kvm_memory_slot *slot; | 937 | struct kvm_memory_slot *slot; |
929 | 938 | ||
930 | slot = gfn_to_memslot(kvm, gfn); | 939 | slot = gfn_to_memslot(kvm, gfn); |
931 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | 940 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
932 | return bad_hva(); | 941 | return bad_hva(); |
933 | return gfn_to_hva_memslot(slot, gfn); | 942 | return gfn_to_hva_memslot(slot, gfn); |
934 | } | 943 | } |
935 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 944 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
936 | 945 | ||
937 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) | 946 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) |
938 | { | 947 | { |
939 | struct page *page[1]; | 948 | struct page *page[1]; |
940 | int npages; | 949 | int npages; |
941 | pfn_t pfn; | 950 | pfn_t pfn; |
942 | 951 | ||
943 | might_sleep(); | 952 | might_sleep(); |
944 | 953 | ||
945 | npages = get_user_pages_fast(addr, 1, 1, page); | 954 | npages = get_user_pages_fast(addr, 1, 1, page); |
946 | 955 | ||
947 | if (unlikely(npages != 1)) { | 956 | if (unlikely(npages != 1)) { |
948 | struct vm_area_struct *vma; | 957 | struct vm_area_struct *vma; |
949 | 958 | ||
950 | down_read(¤t->mm->mmap_sem); | 959 | down_read(¤t->mm->mmap_sem); |
951 | if (is_hwpoison_address(addr)) { | 960 | if (is_hwpoison_address(addr)) { |
952 | up_read(¤t->mm->mmap_sem); | 961 | up_read(¤t->mm->mmap_sem); |
953 | get_page(hwpoison_page); | 962 | get_page(hwpoison_page); |
954 | return page_to_pfn(hwpoison_page); | 963 | return page_to_pfn(hwpoison_page); |
955 | } | 964 | } |
956 | 965 | ||
957 | vma = find_vma(current->mm, addr); | 966 | vma = find_vma(current->mm, addr); |
958 | 967 | ||
959 | if (vma == NULL || addr < vma->vm_start || | 968 | if (vma == NULL || addr < vma->vm_start || |
960 | !(vma->vm_flags & VM_PFNMAP)) { | 969 | !(vma->vm_flags & VM_PFNMAP)) { |
961 | up_read(¤t->mm->mmap_sem); | 970 | up_read(¤t->mm->mmap_sem); |
962 | get_page(bad_page); | 971 | get_page(fault_page); |
963 | return page_to_pfn(bad_page); | 972 | return page_to_pfn(fault_page); |
964 | } | 973 | } |
965 | 974 | ||
966 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 975 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
967 | up_read(¤t->mm->mmap_sem); | 976 | up_read(¤t->mm->mmap_sem); |
968 | BUG_ON(!kvm_is_mmio_pfn(pfn)); | 977 | BUG_ON(!kvm_is_mmio_pfn(pfn)); |
969 | } else | 978 | } else |
970 | pfn = page_to_pfn(page[0]); | 979 | pfn = page_to_pfn(page[0]); |
971 | 980 | ||
972 | return pfn; | 981 | return pfn; |
973 | } | 982 | } |
974 | 983 | ||
975 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | 984 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) |
976 | { | 985 | { |
977 | unsigned long addr; | 986 | unsigned long addr; |
978 | 987 | ||
979 | addr = gfn_to_hva(kvm, gfn); | 988 | addr = gfn_to_hva(kvm, gfn); |
980 | if (kvm_is_error_hva(addr)) { | 989 | if (kvm_is_error_hva(addr)) { |
981 | get_page(bad_page); | 990 | get_page(bad_page); |
982 | return page_to_pfn(bad_page); | 991 | return page_to_pfn(bad_page); |
983 | } | 992 | } |
984 | 993 | ||
985 | return hva_to_pfn(kvm, addr); | 994 | return hva_to_pfn(kvm, addr); |
986 | } | 995 | } |
987 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 996 | EXPORT_SYMBOL_GPL(gfn_to_pfn); |
988 | 997 | ||
989 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 998 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, |
990 | struct kvm_memory_slot *slot, gfn_t gfn) | 999 | struct kvm_memory_slot *slot, gfn_t gfn) |
991 | { | 1000 | { |
992 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); | 1001 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); |
993 | return hva_to_pfn(kvm, addr); | 1002 | return hva_to_pfn(kvm, addr); |
994 | } | 1003 | } |
995 | 1004 | ||
996 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 1005 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) |
997 | { | 1006 | { |
998 | pfn_t pfn; | 1007 | pfn_t pfn; |
999 | 1008 | ||
1000 | pfn = gfn_to_pfn(kvm, gfn); | 1009 | pfn = gfn_to_pfn(kvm, gfn); |
1001 | if (!kvm_is_mmio_pfn(pfn)) | 1010 | if (!kvm_is_mmio_pfn(pfn)) |
1002 | return pfn_to_page(pfn); | 1011 | return pfn_to_page(pfn); |
1003 | 1012 | ||
1004 | WARN_ON(kvm_is_mmio_pfn(pfn)); | 1013 | WARN_ON(kvm_is_mmio_pfn(pfn)); |
1005 | 1014 | ||
1006 | get_page(bad_page); | 1015 | get_page(bad_page); |
1007 | return bad_page; | 1016 | return bad_page; |
1008 | } | 1017 | } |
1009 | 1018 | ||
1010 | EXPORT_SYMBOL_GPL(gfn_to_page); | 1019 | EXPORT_SYMBOL_GPL(gfn_to_page); |
1011 | 1020 | ||
1012 | void kvm_release_page_clean(struct page *page) | 1021 | void kvm_release_page_clean(struct page *page) |
1013 | { | 1022 | { |
1014 | kvm_release_pfn_clean(page_to_pfn(page)); | 1023 | kvm_release_pfn_clean(page_to_pfn(page)); |
1015 | } | 1024 | } |
1016 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); | 1025 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); |
1017 | 1026 | ||
1018 | void kvm_release_pfn_clean(pfn_t pfn) | 1027 | void kvm_release_pfn_clean(pfn_t pfn) |
1019 | { | 1028 | { |
1020 | if (!kvm_is_mmio_pfn(pfn)) | 1029 | if (!kvm_is_mmio_pfn(pfn)) |
1021 | put_page(pfn_to_page(pfn)); | 1030 | put_page(pfn_to_page(pfn)); |
1022 | } | 1031 | } |
1023 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); | 1032 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); |
1024 | 1033 | ||
1025 | void kvm_release_page_dirty(struct page *page) | 1034 | void kvm_release_page_dirty(struct page *page) |
1026 | { | 1035 | { |
1027 | kvm_release_pfn_dirty(page_to_pfn(page)); | 1036 | kvm_release_pfn_dirty(page_to_pfn(page)); |
1028 | } | 1037 | } |
1029 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | 1038 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); |
1030 | 1039 | ||
1031 | void kvm_release_pfn_dirty(pfn_t pfn) | 1040 | void kvm_release_pfn_dirty(pfn_t pfn) |
1032 | { | 1041 | { |
1033 | kvm_set_pfn_dirty(pfn); | 1042 | kvm_set_pfn_dirty(pfn); |
1034 | kvm_release_pfn_clean(pfn); | 1043 | kvm_release_pfn_clean(pfn); |
1035 | } | 1044 | } |
1036 | EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); | 1045 | EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); |
1037 | 1046 | ||
1038 | void kvm_set_page_dirty(struct page *page) | 1047 | void kvm_set_page_dirty(struct page *page) |
1039 | { | 1048 | { |
1040 | kvm_set_pfn_dirty(page_to_pfn(page)); | 1049 | kvm_set_pfn_dirty(page_to_pfn(page)); |
1041 | } | 1050 | } |
1042 | EXPORT_SYMBOL_GPL(kvm_set_page_dirty); | 1051 | EXPORT_SYMBOL_GPL(kvm_set_page_dirty); |
1043 | 1052 | ||
1044 | void kvm_set_pfn_dirty(pfn_t pfn) | 1053 | void kvm_set_pfn_dirty(pfn_t pfn) |
1045 | { | 1054 | { |
1046 | if (!kvm_is_mmio_pfn(pfn)) { | 1055 | if (!kvm_is_mmio_pfn(pfn)) { |
1047 | struct page *page = pfn_to_page(pfn); | 1056 | struct page *page = pfn_to_page(pfn); |
1048 | if (!PageReserved(page)) | 1057 | if (!PageReserved(page)) |
1049 | SetPageDirty(page); | 1058 | SetPageDirty(page); |
1050 | } | 1059 | } |
1051 | } | 1060 | } |
1052 | EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); | 1061 | EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); |
1053 | 1062 | ||
1054 | void kvm_set_pfn_accessed(pfn_t pfn) | 1063 | void kvm_set_pfn_accessed(pfn_t pfn) |
1055 | { | 1064 | { |
1056 | if (!kvm_is_mmio_pfn(pfn)) | 1065 | if (!kvm_is_mmio_pfn(pfn)) |
1057 | mark_page_accessed(pfn_to_page(pfn)); | 1066 | mark_page_accessed(pfn_to_page(pfn)); |
1058 | } | 1067 | } |
1059 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); | 1068 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); |
1060 | 1069 | ||
1061 | void kvm_get_pfn(pfn_t pfn) | 1070 | void kvm_get_pfn(pfn_t pfn) |
1062 | { | 1071 | { |
1063 | if (!kvm_is_mmio_pfn(pfn)) | 1072 | if (!kvm_is_mmio_pfn(pfn)) |
1064 | get_page(pfn_to_page(pfn)); | 1073 | get_page(pfn_to_page(pfn)); |
1065 | } | 1074 | } |
1066 | EXPORT_SYMBOL_GPL(kvm_get_pfn); | 1075 | EXPORT_SYMBOL_GPL(kvm_get_pfn); |
1067 | 1076 | ||
1068 | static int next_segment(unsigned long len, int offset) | 1077 | static int next_segment(unsigned long len, int offset) |
1069 | { | 1078 | { |
1070 | if (len > PAGE_SIZE - offset) | 1079 | if (len > PAGE_SIZE - offset) |
1071 | return PAGE_SIZE - offset; | 1080 | return PAGE_SIZE - offset; |
1072 | else | 1081 | else |
1073 | return len; | 1082 | return len; |
1074 | } | 1083 | } |
1075 | 1084 | ||
1076 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | 1085 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, |
1077 | int len) | 1086 | int len) |
1078 | { | 1087 | { |
1079 | int r; | 1088 | int r; |
1080 | unsigned long addr; | 1089 | unsigned long addr; |
1081 | 1090 | ||
1082 | addr = gfn_to_hva(kvm, gfn); | 1091 | addr = gfn_to_hva(kvm, gfn); |
1083 | if (kvm_is_error_hva(addr)) | 1092 | if (kvm_is_error_hva(addr)) |
1084 | return -EFAULT; | 1093 | return -EFAULT; |
1085 | r = copy_from_user(data, (void __user *)addr + offset, len); | 1094 | r = copy_from_user(data, (void __user *)addr + offset, len); |
1086 | if (r) | 1095 | if (r) |
1087 | return -EFAULT; | 1096 | return -EFAULT; |
1088 | return 0; | 1097 | return 0; |
1089 | } | 1098 | } |
1090 | EXPORT_SYMBOL_GPL(kvm_read_guest_page); | 1099 | EXPORT_SYMBOL_GPL(kvm_read_guest_page); |
1091 | 1100 | ||
1092 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) | 1101 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) |
1093 | { | 1102 | { |
1094 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1103 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1095 | int seg; | 1104 | int seg; |
1096 | int offset = offset_in_page(gpa); | 1105 | int offset = offset_in_page(gpa); |
1097 | int ret; | 1106 | int ret; |
1098 | 1107 | ||
1099 | while ((seg = next_segment(len, offset)) != 0) { | 1108 | while ((seg = next_segment(len, offset)) != 0) { |
1100 | ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); | 1109 | ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); |
1101 | if (ret < 0) | 1110 | if (ret < 0) |
1102 | return ret; | 1111 | return ret; |
1103 | offset = 0; | 1112 | offset = 0; |
1104 | len -= seg; | 1113 | len -= seg; |
1105 | data += seg; | 1114 | data += seg; |
1106 | ++gfn; | 1115 | ++gfn; |
1107 | } | 1116 | } |
1108 | return 0; | 1117 | return 0; |
1109 | } | 1118 | } |
1110 | EXPORT_SYMBOL_GPL(kvm_read_guest); | 1119 | EXPORT_SYMBOL_GPL(kvm_read_guest); |
1111 | 1120 | ||
1112 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | 1121 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, |
1113 | unsigned long len) | 1122 | unsigned long len) |
1114 | { | 1123 | { |
1115 | int r; | 1124 | int r; |
1116 | unsigned long addr; | 1125 | unsigned long addr; |
1117 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1126 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1118 | int offset = offset_in_page(gpa); | 1127 | int offset = offset_in_page(gpa); |
1119 | 1128 | ||
1120 | addr = gfn_to_hva(kvm, gfn); | 1129 | addr = gfn_to_hva(kvm, gfn); |
1121 | if (kvm_is_error_hva(addr)) | 1130 | if (kvm_is_error_hva(addr)) |
1122 | return -EFAULT; | 1131 | return -EFAULT; |
1123 | pagefault_disable(); | 1132 | pagefault_disable(); |
1124 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); | 1133 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); |
1125 | pagefault_enable(); | 1134 | pagefault_enable(); |
1126 | if (r) | 1135 | if (r) |
1127 | return -EFAULT; | 1136 | return -EFAULT; |
1128 | return 0; | 1137 | return 0; |
1129 | } | 1138 | } |
1130 | EXPORT_SYMBOL(kvm_read_guest_atomic); | 1139 | EXPORT_SYMBOL(kvm_read_guest_atomic); |
1131 | 1140 | ||
1132 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, | 1141 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, |
1133 | int offset, int len) | 1142 | int offset, int len) |
1134 | { | 1143 | { |
1135 | int r; | 1144 | int r; |
1136 | unsigned long addr; | 1145 | unsigned long addr; |
1137 | 1146 | ||
1138 | addr = gfn_to_hva(kvm, gfn); | 1147 | addr = gfn_to_hva(kvm, gfn); |
1139 | if (kvm_is_error_hva(addr)) | 1148 | if (kvm_is_error_hva(addr)) |
1140 | return -EFAULT; | 1149 | return -EFAULT; |
1141 | r = copy_to_user((void __user *)addr + offset, data, len); | 1150 | r = copy_to_user((void __user *)addr + offset, data, len); |
1142 | if (r) | 1151 | if (r) |
1143 | return -EFAULT; | 1152 | return -EFAULT; |
1144 | mark_page_dirty(kvm, gfn); | 1153 | mark_page_dirty(kvm, gfn); |
1145 | return 0; | 1154 | return 0; |
1146 | } | 1155 | } |
1147 | EXPORT_SYMBOL_GPL(kvm_write_guest_page); | 1156 | EXPORT_SYMBOL_GPL(kvm_write_guest_page); |
1148 | 1157 | ||
1149 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, | 1158 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, |
1150 | unsigned long len) | 1159 | unsigned long len) |
1151 | { | 1160 | { |
1152 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1161 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1153 | int seg; | 1162 | int seg; |
1154 | int offset = offset_in_page(gpa); | 1163 | int offset = offset_in_page(gpa); |
1155 | int ret; | 1164 | int ret; |
1156 | 1165 | ||
1157 | while ((seg = next_segment(len, offset)) != 0) { | 1166 | while ((seg = next_segment(len, offset)) != 0) { |
1158 | ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); | 1167 | ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); |
1159 | if (ret < 0) | 1168 | if (ret < 0) |
1160 | return ret; | 1169 | return ret; |
1161 | offset = 0; | 1170 | offset = 0; |
1162 | len -= seg; | 1171 | len -= seg; |
1163 | data += seg; | 1172 | data += seg; |
1164 | ++gfn; | 1173 | ++gfn; |
1165 | } | 1174 | } |
1166 | return 0; | 1175 | return 0; |
1167 | } | 1176 | } |
1168 | 1177 | ||
1169 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) | 1178 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) |
1170 | { | 1179 | { |
1171 | return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); | 1180 | return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); |
1172 | } | 1181 | } |
1173 | EXPORT_SYMBOL_GPL(kvm_clear_guest_page); | 1182 | EXPORT_SYMBOL_GPL(kvm_clear_guest_page); |
1174 | 1183 | ||
1175 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) | 1184 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) |
1176 | { | 1185 | { |
1177 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1186 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1178 | int seg; | 1187 | int seg; |
1179 | int offset = offset_in_page(gpa); | 1188 | int offset = offset_in_page(gpa); |
1180 | int ret; | 1189 | int ret; |
1181 | 1190 | ||
1182 | while ((seg = next_segment(len, offset)) != 0) { | 1191 | while ((seg = next_segment(len, offset)) != 0) { |
1183 | ret = kvm_clear_guest_page(kvm, gfn, offset, seg); | 1192 | ret = kvm_clear_guest_page(kvm, gfn, offset, seg); |
1184 | if (ret < 0) | 1193 | if (ret < 0) |
1185 | return ret; | 1194 | return ret; |
1186 | offset = 0; | 1195 | offset = 0; |
1187 | len -= seg; | 1196 | len -= seg; |
1188 | ++gfn; | 1197 | ++gfn; |
1189 | } | 1198 | } |
1190 | return 0; | 1199 | return 0; |
1191 | } | 1200 | } |
1192 | EXPORT_SYMBOL_GPL(kvm_clear_guest); | 1201 | EXPORT_SYMBOL_GPL(kvm_clear_guest); |
1193 | 1202 | ||
1194 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | 1203 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn) |
1195 | { | 1204 | { |
1196 | struct kvm_memory_slot *memslot; | 1205 | struct kvm_memory_slot *memslot; |
1197 | 1206 | ||
1198 | memslot = gfn_to_memslot(kvm, gfn); | 1207 | memslot = gfn_to_memslot(kvm, gfn); |
1199 | if (memslot && memslot->dirty_bitmap) { | 1208 | if (memslot && memslot->dirty_bitmap) { |
1200 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1209 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
1201 | 1210 | ||
1202 | generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); | 1211 | generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); |
1203 | } | 1212 | } |
1204 | } | 1213 | } |
1205 | 1214 | ||
1206 | /* | 1215 | /* |
1207 | * The vCPU has executed a HLT instruction with in-kernel mode enabled. | 1216 | * The vCPU has executed a HLT instruction with in-kernel mode enabled. |
1208 | */ | 1217 | */ |
1209 | void kvm_vcpu_block(struct kvm_vcpu *vcpu) | 1218 | void kvm_vcpu_block(struct kvm_vcpu *vcpu) |
1210 | { | 1219 | { |
1211 | DEFINE_WAIT(wait); | 1220 | DEFINE_WAIT(wait); |
1212 | 1221 | ||
1213 | for (;;) { | 1222 | for (;;) { |
1214 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1223 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
1215 | 1224 | ||
1216 | if (kvm_arch_vcpu_runnable(vcpu)) { | 1225 | if (kvm_arch_vcpu_runnable(vcpu)) { |
1217 | kvm_make_request(KVM_REQ_UNHALT, vcpu); | 1226 | kvm_make_request(KVM_REQ_UNHALT, vcpu); |
1218 | break; | 1227 | break; |
1219 | } | 1228 | } |
1220 | if (kvm_cpu_has_pending_timer(vcpu)) | 1229 | if (kvm_cpu_has_pending_timer(vcpu)) |
1221 | break; | 1230 | break; |
1222 | if (signal_pending(current)) | 1231 | if (signal_pending(current)) |
1223 | break; | 1232 | break; |
1224 | 1233 | ||
1225 | schedule(); | 1234 | schedule(); |
1226 | } | 1235 | } |
1227 | 1236 | ||
1228 | finish_wait(&vcpu->wq, &wait); | 1237 | finish_wait(&vcpu->wq, &wait); |
1229 | } | 1238 | } |
1230 | 1239 | ||
1231 | void kvm_resched(struct kvm_vcpu *vcpu) | 1240 | void kvm_resched(struct kvm_vcpu *vcpu) |
1232 | { | 1241 | { |
1233 | if (!need_resched()) | 1242 | if (!need_resched()) |
1234 | return; | 1243 | return; |
1235 | cond_resched(); | 1244 | cond_resched(); |
1236 | } | 1245 | } |
1237 | EXPORT_SYMBOL_GPL(kvm_resched); | 1246 | EXPORT_SYMBOL_GPL(kvm_resched); |
1238 | 1247 | ||
1239 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) | 1248 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) |
1240 | { | 1249 | { |
1241 | ktime_t expires; | 1250 | ktime_t expires; |
1242 | DEFINE_WAIT(wait); | 1251 | DEFINE_WAIT(wait); |
1243 | 1252 | ||
1244 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1253 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
1245 | 1254 | ||
1246 | /* Sleep for 100 us, and hope lock-holder got scheduled */ | 1255 | /* Sleep for 100 us, and hope lock-holder got scheduled */ |
1247 | expires = ktime_add_ns(ktime_get(), 100000UL); | 1256 | expires = ktime_add_ns(ktime_get(), 100000UL); |
1248 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | 1257 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); |
1249 | 1258 | ||
1250 | finish_wait(&vcpu->wq, &wait); | 1259 | finish_wait(&vcpu->wq, &wait); |
1251 | } | 1260 | } |
1252 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | 1261 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); |
1253 | 1262 | ||
1254 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1263 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1255 | { | 1264 | { |
1256 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; | 1265 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; |
1257 | struct page *page; | 1266 | struct page *page; |
1258 | 1267 | ||
1259 | if (vmf->pgoff == 0) | 1268 | if (vmf->pgoff == 0) |
1260 | page = virt_to_page(vcpu->run); | 1269 | page = virt_to_page(vcpu->run); |
1261 | #ifdef CONFIG_X86 | 1270 | #ifdef CONFIG_X86 |
1262 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) | 1271 | else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) |
1263 | page = virt_to_page(vcpu->arch.pio_data); | 1272 | page = virt_to_page(vcpu->arch.pio_data); |
1264 | #endif | 1273 | #endif |
1265 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 1274 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
1266 | else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) | 1275 | else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) |
1267 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); | 1276 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); |
1268 | #endif | 1277 | #endif |
1269 | else | 1278 | else |
1270 | return VM_FAULT_SIGBUS; | 1279 | return VM_FAULT_SIGBUS; |
1271 | get_page(page); | 1280 | get_page(page); |
1272 | vmf->page = page; | 1281 | vmf->page = page; |
1273 | return 0; | 1282 | return 0; |
1274 | } | 1283 | } |
1275 | 1284 | ||
1276 | static const struct vm_operations_struct kvm_vcpu_vm_ops = { | 1285 | static const struct vm_operations_struct kvm_vcpu_vm_ops = { |
1277 | .fault = kvm_vcpu_fault, | 1286 | .fault = kvm_vcpu_fault, |
1278 | }; | 1287 | }; |
1279 | 1288 | ||
1280 | static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) | 1289 | static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) |
1281 | { | 1290 | { |
1282 | vma->vm_ops = &kvm_vcpu_vm_ops; | 1291 | vma->vm_ops = &kvm_vcpu_vm_ops; |
1283 | return 0; | 1292 | return 0; |
1284 | } | 1293 | } |
1285 | 1294 | ||
1286 | static int kvm_vcpu_release(struct inode *inode, struct file *filp) | 1295 | static int kvm_vcpu_release(struct inode *inode, struct file *filp) |
1287 | { | 1296 | { |
1288 | struct kvm_vcpu *vcpu = filp->private_data; | 1297 | struct kvm_vcpu *vcpu = filp->private_data; |
1289 | 1298 | ||
1290 | kvm_put_kvm(vcpu->kvm); | 1299 | kvm_put_kvm(vcpu->kvm); |
1291 | return 0; | 1300 | return 0; |
1292 | } | 1301 | } |
1293 | 1302 | ||
1294 | static struct file_operations kvm_vcpu_fops = { | 1303 | static struct file_operations kvm_vcpu_fops = { |
1295 | .release = kvm_vcpu_release, | 1304 | .release = kvm_vcpu_release, |
1296 | .unlocked_ioctl = kvm_vcpu_ioctl, | 1305 | .unlocked_ioctl = kvm_vcpu_ioctl, |
1297 | .compat_ioctl = kvm_vcpu_ioctl, | 1306 | .compat_ioctl = kvm_vcpu_ioctl, |
1298 | .mmap = kvm_vcpu_mmap, | 1307 | .mmap = kvm_vcpu_mmap, |
1299 | }; | 1308 | }; |
1300 | 1309 | ||
1301 | /* | 1310 | /* |
1302 | * Allocates an inode for the vcpu. | 1311 | * Allocates an inode for the vcpu. |
1303 | */ | 1312 | */ |
1304 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) | 1313 | static int create_vcpu_fd(struct kvm_vcpu *vcpu) |
1305 | { | 1314 | { |
1306 | return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); | 1315 | return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); |
1307 | } | 1316 | } |
1308 | 1317 | ||
1309 | /* | 1318 | /* |
1310 | * Creates some virtual cpus. Good luck creating more than one. | 1319 | * Creates some virtual cpus. Good luck creating more than one. |
1311 | */ | 1320 | */ |
1312 | static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) | 1321 | static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) |
1313 | { | 1322 | { |
1314 | int r; | 1323 | int r; |
1315 | struct kvm_vcpu *vcpu, *v; | 1324 | struct kvm_vcpu *vcpu, *v; |
1316 | 1325 | ||
1317 | vcpu = kvm_arch_vcpu_create(kvm, id); | 1326 | vcpu = kvm_arch_vcpu_create(kvm, id); |
1318 | if (IS_ERR(vcpu)) | 1327 | if (IS_ERR(vcpu)) |
1319 | return PTR_ERR(vcpu); | 1328 | return PTR_ERR(vcpu); |
1320 | 1329 | ||
1321 | preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); | 1330 | preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); |
1322 | 1331 | ||
1323 | r = kvm_arch_vcpu_setup(vcpu); | 1332 | r = kvm_arch_vcpu_setup(vcpu); |
1324 | if (r) | 1333 | if (r) |
1325 | return r; | 1334 | return r; |
1326 | 1335 | ||
1327 | mutex_lock(&kvm->lock); | 1336 | mutex_lock(&kvm->lock); |
1328 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { | 1337 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { |
1329 | r = -EINVAL; | 1338 | r = -EINVAL; |
1330 | goto vcpu_destroy; | 1339 | goto vcpu_destroy; |
1331 | } | 1340 | } |
1332 | 1341 | ||
1333 | kvm_for_each_vcpu(r, v, kvm) | 1342 | kvm_for_each_vcpu(r, v, kvm) |
1334 | if (v->vcpu_id == id) { | 1343 | if (v->vcpu_id == id) { |
1335 | r = -EEXIST; | 1344 | r = -EEXIST; |
1336 | goto vcpu_destroy; | 1345 | goto vcpu_destroy; |
1337 | } | 1346 | } |
1338 | 1347 | ||
1339 | BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); | 1348 | BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); |
1340 | 1349 | ||
1341 | /* Now it's all set up, let userspace reach it */ | 1350 | /* Now it's all set up, let userspace reach it */ |
1342 | kvm_get_kvm(kvm); | 1351 | kvm_get_kvm(kvm); |
1343 | r = create_vcpu_fd(vcpu); | 1352 | r = create_vcpu_fd(vcpu); |
1344 | if (r < 0) { | 1353 | if (r < 0) { |
1345 | kvm_put_kvm(kvm); | 1354 | kvm_put_kvm(kvm); |
1346 | goto vcpu_destroy; | 1355 | goto vcpu_destroy; |
1347 | } | 1356 | } |
1348 | 1357 | ||
1349 | kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; | 1358 | kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; |
1350 | smp_wmb(); | 1359 | smp_wmb(); |
1351 | atomic_inc(&kvm->online_vcpus); | 1360 | atomic_inc(&kvm->online_vcpus); |
1352 | 1361 | ||
1353 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 1362 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
1354 | if (kvm->bsp_vcpu_id == id) | 1363 | if (kvm->bsp_vcpu_id == id) |
1355 | kvm->bsp_vcpu = vcpu; | 1364 | kvm->bsp_vcpu = vcpu; |
1356 | #endif | 1365 | #endif |
1357 | mutex_unlock(&kvm->lock); | 1366 | mutex_unlock(&kvm->lock); |
1358 | return r; | 1367 | return r; |
1359 | 1368 | ||
1360 | vcpu_destroy: | 1369 | vcpu_destroy: |
1361 | mutex_unlock(&kvm->lock); | 1370 | mutex_unlock(&kvm->lock); |
1362 | kvm_arch_vcpu_destroy(vcpu); | 1371 | kvm_arch_vcpu_destroy(vcpu); |
1363 | return r; | 1372 | return r; |
1364 | } | 1373 | } |
1365 | 1374 | ||
1366 | static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) | 1375 | static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) |
1367 | { | 1376 | { |
1368 | if (sigset) { | 1377 | if (sigset) { |
1369 | sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); | 1378 | sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); |
1370 | vcpu->sigset_active = 1; | 1379 | vcpu->sigset_active = 1; |
1371 | vcpu->sigset = *sigset; | 1380 | vcpu->sigset = *sigset; |
1372 | } else | 1381 | } else |
1373 | vcpu->sigset_active = 0; | 1382 | vcpu->sigset_active = 0; |
1374 | return 0; | 1383 | return 0; |
1375 | } | 1384 | } |
1376 | 1385 | ||
1377 | static long kvm_vcpu_ioctl(struct file *filp, | 1386 | static long kvm_vcpu_ioctl(struct file *filp, |
1378 | unsigned int ioctl, unsigned long arg) | 1387 | unsigned int ioctl, unsigned long arg) |
1379 | { | 1388 | { |
1380 | struct kvm_vcpu *vcpu = filp->private_data; | 1389 | struct kvm_vcpu *vcpu = filp->private_data; |
1381 | void __user *argp = (void __user *)arg; | 1390 | void __user *argp = (void __user *)arg; |
1382 | int r; | 1391 | int r; |
1383 | struct kvm_fpu *fpu = NULL; | 1392 | struct kvm_fpu *fpu = NULL; |
1384 | struct kvm_sregs *kvm_sregs = NULL; | 1393 | struct kvm_sregs *kvm_sregs = NULL; |
1385 | 1394 | ||
1386 | if (vcpu->kvm->mm != current->mm) | 1395 | if (vcpu->kvm->mm != current->mm) |
1387 | return -EIO; | 1396 | return -EIO; |
1388 | 1397 | ||
1389 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) | 1398 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) |
1390 | /* | 1399 | /* |
1391 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, | 1400 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, |
1392 | * so vcpu_load() would break it. | 1401 | * so vcpu_load() would break it. |
1393 | */ | 1402 | */ |
1394 | if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) | 1403 | if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) |
1395 | return kvm_arch_vcpu_ioctl(filp, ioctl, arg); | 1404 | return kvm_arch_vcpu_ioctl(filp, ioctl, arg); |
1396 | #endif | 1405 | #endif |
1397 | 1406 | ||
1398 | 1407 | ||
1399 | vcpu_load(vcpu); | 1408 | vcpu_load(vcpu); |
1400 | switch (ioctl) { | 1409 | switch (ioctl) { |
1401 | case KVM_RUN: | 1410 | case KVM_RUN: |
1402 | r = -EINVAL; | 1411 | r = -EINVAL; |
1403 | if (arg) | 1412 | if (arg) |
1404 | goto out; | 1413 | goto out; |
1405 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); | 1414 | r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); |
1406 | break; | 1415 | break; |
1407 | case KVM_GET_REGS: { | 1416 | case KVM_GET_REGS: { |
1408 | struct kvm_regs *kvm_regs; | 1417 | struct kvm_regs *kvm_regs; |
1409 | 1418 | ||
1410 | r = -ENOMEM; | 1419 | r = -ENOMEM; |
1411 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); | 1420 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); |
1412 | if (!kvm_regs) | 1421 | if (!kvm_regs) |
1413 | goto out; | 1422 | goto out; |
1414 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); | 1423 | r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); |
1415 | if (r) | 1424 | if (r) |
1416 | goto out_free1; | 1425 | goto out_free1; |
1417 | r = -EFAULT; | 1426 | r = -EFAULT; |
1418 | if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) | 1427 | if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) |
1419 | goto out_free1; | 1428 | goto out_free1; |
1420 | r = 0; | 1429 | r = 0; |
1421 | out_free1: | 1430 | out_free1: |
1422 | kfree(kvm_regs); | 1431 | kfree(kvm_regs); |
1423 | break; | 1432 | break; |
1424 | } | 1433 | } |
1425 | case KVM_SET_REGS: { | 1434 | case KVM_SET_REGS: { |
1426 | struct kvm_regs *kvm_regs; | 1435 | struct kvm_regs *kvm_regs; |
1427 | 1436 | ||
1428 | r = -ENOMEM; | 1437 | r = -ENOMEM; |
1429 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); | 1438 | kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); |
1430 | if (!kvm_regs) | 1439 | if (!kvm_regs) |
1431 | goto out; | 1440 | goto out; |
1432 | r = -EFAULT; | 1441 | r = -EFAULT; |
1433 | if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) | 1442 | if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) |
1434 | goto out_free2; | 1443 | goto out_free2; |
1435 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); | 1444 | r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); |
1436 | if (r) | 1445 | if (r) |
1437 | goto out_free2; | 1446 | goto out_free2; |
1438 | r = 0; | 1447 | r = 0; |
1439 | out_free2: | 1448 | out_free2: |
1440 | kfree(kvm_regs); | 1449 | kfree(kvm_regs); |
1441 | break; | 1450 | break; |
1442 | } | 1451 | } |
1443 | case KVM_GET_SREGS: { | 1452 | case KVM_GET_SREGS: { |
1444 | kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); | 1453 | kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); |
1445 | r = -ENOMEM; | 1454 | r = -ENOMEM; |
1446 | if (!kvm_sregs) | 1455 | if (!kvm_sregs) |
1447 | goto out; | 1456 | goto out; |
1448 | r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); | 1457 | r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); |
1449 | if (r) | 1458 | if (r) |
1450 | goto out; | 1459 | goto out; |
1451 | r = -EFAULT; | 1460 | r = -EFAULT; |
1452 | if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) | 1461 | if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) |
1453 | goto out; | 1462 | goto out; |
1454 | r = 0; | 1463 | r = 0; |
1455 | break; | 1464 | break; |
1456 | } | 1465 | } |
1457 | case KVM_SET_SREGS: { | 1466 | case KVM_SET_SREGS: { |
1458 | kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); | 1467 | kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); |
1459 | r = -ENOMEM; | 1468 | r = -ENOMEM; |
1460 | if (!kvm_sregs) | 1469 | if (!kvm_sregs) |
1461 | goto out; | 1470 | goto out; |
1462 | r = -EFAULT; | 1471 | r = -EFAULT; |
1463 | if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) | 1472 | if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) |
1464 | goto out; | 1473 | goto out; |
1465 | r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); | 1474 | r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); |
1466 | if (r) | 1475 | if (r) |
1467 | goto out; | 1476 | goto out; |
1468 | r = 0; | 1477 | r = 0; |
1469 | break; | 1478 | break; |
1470 | } | 1479 | } |
1471 | case KVM_GET_MP_STATE: { | 1480 | case KVM_GET_MP_STATE: { |
1472 | struct kvm_mp_state mp_state; | 1481 | struct kvm_mp_state mp_state; |
1473 | 1482 | ||
1474 | r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); | 1483 | r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); |
1475 | if (r) | 1484 | if (r) |
1476 | goto out; | 1485 | goto out; |
1477 | r = -EFAULT; | 1486 | r = -EFAULT; |
1478 | if (copy_to_user(argp, &mp_state, sizeof mp_state)) | 1487 | if (copy_to_user(argp, &mp_state, sizeof mp_state)) |
1479 | goto out; | 1488 | goto out; |
1480 | r = 0; | 1489 | r = 0; |
1481 | break; | 1490 | break; |
1482 | } | 1491 | } |
1483 | case KVM_SET_MP_STATE: { | 1492 | case KVM_SET_MP_STATE: { |
1484 | struct kvm_mp_state mp_state; | 1493 | struct kvm_mp_state mp_state; |
1485 | 1494 | ||
1486 | r = -EFAULT; | 1495 | r = -EFAULT; |
1487 | if (copy_from_user(&mp_state, argp, sizeof mp_state)) | 1496 | if (copy_from_user(&mp_state, argp, sizeof mp_state)) |
1488 | goto out; | 1497 | goto out; |
1489 | r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); | 1498 | r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); |
1490 | if (r) | 1499 | if (r) |
1491 | goto out; | 1500 | goto out; |
1492 | r = 0; | 1501 | r = 0; |
1493 | break; | 1502 | break; |
1494 | } | 1503 | } |
1495 | case KVM_TRANSLATE: { | 1504 | case KVM_TRANSLATE: { |
1496 | struct kvm_translation tr; | 1505 | struct kvm_translation tr; |
1497 | 1506 | ||
1498 | r = -EFAULT; | 1507 | r = -EFAULT; |
1499 | if (copy_from_user(&tr, argp, sizeof tr)) | 1508 | if (copy_from_user(&tr, argp, sizeof tr)) |
1500 | goto out; | 1509 | goto out; |
1501 | r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); | 1510 | r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); |
1502 | if (r) | 1511 | if (r) |
1503 | goto out; | 1512 | goto out; |
1504 | r = -EFAULT; | 1513 | r = -EFAULT; |
1505 | if (copy_to_user(argp, &tr, sizeof tr)) | 1514 | if (copy_to_user(argp, &tr, sizeof tr)) |
1506 | goto out; | 1515 | goto out; |
1507 | r = 0; | 1516 | r = 0; |
1508 | break; | 1517 | break; |
1509 | } | 1518 | } |
1510 | case KVM_SET_GUEST_DEBUG: { | 1519 | case KVM_SET_GUEST_DEBUG: { |
1511 | struct kvm_guest_debug dbg; | 1520 | struct kvm_guest_debug dbg; |
1512 | 1521 | ||
1513 | r = -EFAULT; | 1522 | r = -EFAULT; |
1514 | if (copy_from_user(&dbg, argp, sizeof dbg)) | 1523 | if (copy_from_user(&dbg, argp, sizeof dbg)) |
1515 | goto out; | 1524 | goto out; |
1516 | r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); | 1525 | r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); |
1517 | if (r) | 1526 | if (r) |
1518 | goto out; | 1527 | goto out; |
1519 | r = 0; | 1528 | r = 0; |
1520 | break; | 1529 | break; |
1521 | } | 1530 | } |
1522 | case KVM_SET_SIGNAL_MASK: { | 1531 | case KVM_SET_SIGNAL_MASK: { |
1523 | struct kvm_signal_mask __user *sigmask_arg = argp; | 1532 | struct kvm_signal_mask __user *sigmask_arg = argp; |
1524 | struct kvm_signal_mask kvm_sigmask; | 1533 | struct kvm_signal_mask kvm_sigmask; |
1525 | sigset_t sigset, *p; | 1534 | sigset_t sigset, *p; |
1526 | 1535 | ||
1527 | p = NULL; | 1536 | p = NULL; |
1528 | if (argp) { | 1537 | if (argp) { |
1529 | r = -EFAULT; | 1538 | r = -EFAULT; |
1530 | if (copy_from_user(&kvm_sigmask, argp, | 1539 | if (copy_from_user(&kvm_sigmask, argp, |
1531 | sizeof kvm_sigmask)) | 1540 | sizeof kvm_sigmask)) |
1532 | goto out; | 1541 | goto out; |
1533 | r = -EINVAL; | 1542 | r = -EINVAL; |
1534 | if (kvm_sigmask.len != sizeof sigset) | 1543 | if (kvm_sigmask.len != sizeof sigset) |
1535 | goto out; | 1544 | goto out; |
1536 | r = -EFAULT; | 1545 | r = -EFAULT; |
1537 | if (copy_from_user(&sigset, sigmask_arg->sigset, | 1546 | if (copy_from_user(&sigset, sigmask_arg->sigset, |
1538 | sizeof sigset)) | 1547 | sizeof sigset)) |
1539 | goto out; | 1548 | goto out; |
1540 | p = &sigset; | 1549 | p = &sigset; |
1541 | } | 1550 | } |
1542 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); | 1551 | r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); |
1543 | break; | 1552 | break; |
1544 | } | 1553 | } |
1545 | case KVM_GET_FPU: { | 1554 | case KVM_GET_FPU: { |
1546 | fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); | 1555 | fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); |
1547 | r = -ENOMEM; | 1556 | r = -ENOMEM; |
1548 | if (!fpu) | 1557 | if (!fpu) |
1549 | goto out; | 1558 | goto out; |
1550 | r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); | 1559 | r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); |
1551 | if (r) | 1560 | if (r) |
1552 | goto out; | 1561 | goto out; |
1553 | r = -EFAULT; | 1562 | r = -EFAULT; |
1554 | if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) | 1563 | if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) |
1555 | goto out; | 1564 | goto out; |
1556 | r = 0; | 1565 | r = 0; |
1557 | break; | 1566 | break; |
1558 | } | 1567 | } |
1559 | case KVM_SET_FPU: { | 1568 | case KVM_SET_FPU: { |
1560 | fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); | 1569 | fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); |
1561 | r = -ENOMEM; | 1570 | r = -ENOMEM; |
1562 | if (!fpu) | 1571 | if (!fpu) |
1563 | goto out; | 1572 | goto out; |
1564 | r = -EFAULT; | 1573 | r = -EFAULT; |
1565 | if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) | 1574 | if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) |
1566 | goto out; | 1575 | goto out; |
1567 | r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); | 1576 | r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); |
1568 | if (r) | 1577 | if (r) |
1569 | goto out; | 1578 | goto out; |
1570 | r = 0; | 1579 | r = 0; |
1571 | break; | 1580 | break; |
1572 | } | 1581 | } |
1573 | default: | 1582 | default: |
1574 | r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); | 1583 | r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); |
1575 | } | 1584 | } |
1576 | out: | 1585 | out: |
1577 | vcpu_put(vcpu); | 1586 | vcpu_put(vcpu); |
1578 | kfree(fpu); | 1587 | kfree(fpu); |
1579 | kfree(kvm_sregs); | 1588 | kfree(kvm_sregs); |
1580 | return r; | 1589 | return r; |
1581 | } | 1590 | } |
1582 | 1591 | ||
1583 | static long kvm_vm_ioctl(struct file *filp, | 1592 | static long kvm_vm_ioctl(struct file *filp, |
1584 | unsigned int ioctl, unsigned long arg) | 1593 | unsigned int ioctl, unsigned long arg) |
1585 | { | 1594 | { |
1586 | struct kvm *kvm = filp->private_data; | 1595 | struct kvm *kvm = filp->private_data; |
1587 | void __user *argp = (void __user *)arg; | 1596 | void __user *argp = (void __user *)arg; |
1588 | int r; | 1597 | int r; |
1589 | 1598 | ||
1590 | if (kvm->mm != current->mm) | 1599 | if (kvm->mm != current->mm) |
1591 | return -EIO; | 1600 | return -EIO; |
1592 | switch (ioctl) { | 1601 | switch (ioctl) { |
1593 | case KVM_CREATE_VCPU: | 1602 | case KVM_CREATE_VCPU: |
1594 | r = kvm_vm_ioctl_create_vcpu(kvm, arg); | 1603 | r = kvm_vm_ioctl_create_vcpu(kvm, arg); |
1595 | if (r < 0) | 1604 | if (r < 0) |
1596 | goto out; | 1605 | goto out; |
1597 | break; | 1606 | break; |
1598 | case KVM_SET_USER_MEMORY_REGION: { | 1607 | case KVM_SET_USER_MEMORY_REGION: { |
1599 | struct kvm_userspace_memory_region kvm_userspace_mem; | 1608 | struct kvm_userspace_memory_region kvm_userspace_mem; |
1600 | 1609 | ||
1601 | r = -EFAULT; | 1610 | r = -EFAULT; |
1602 | if (copy_from_user(&kvm_userspace_mem, argp, | 1611 | if (copy_from_user(&kvm_userspace_mem, argp, |
1603 | sizeof kvm_userspace_mem)) | 1612 | sizeof kvm_userspace_mem)) |
1604 | goto out; | 1613 | goto out; |
1605 | 1614 | ||
1606 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); | 1615 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); |
1607 | if (r) | 1616 | if (r) |
1608 | goto out; | 1617 | goto out; |
1609 | break; | 1618 | break; |
1610 | } | 1619 | } |
1611 | case KVM_GET_DIRTY_LOG: { | 1620 | case KVM_GET_DIRTY_LOG: { |
1612 | struct kvm_dirty_log log; | 1621 | struct kvm_dirty_log log; |
1613 | 1622 | ||
1614 | r = -EFAULT; | 1623 | r = -EFAULT; |
1615 | if (copy_from_user(&log, argp, sizeof log)) | 1624 | if (copy_from_user(&log, argp, sizeof log)) |
1616 | goto out; | 1625 | goto out; |
1617 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | 1626 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); |
1618 | if (r) | 1627 | if (r) |
1619 | goto out; | 1628 | goto out; |
1620 | break; | 1629 | break; |
1621 | } | 1630 | } |
1622 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 1631 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
1623 | case KVM_REGISTER_COALESCED_MMIO: { | 1632 | case KVM_REGISTER_COALESCED_MMIO: { |
1624 | struct kvm_coalesced_mmio_zone zone; | 1633 | struct kvm_coalesced_mmio_zone zone; |
1625 | r = -EFAULT; | 1634 | r = -EFAULT; |
1626 | if (copy_from_user(&zone, argp, sizeof zone)) | 1635 | if (copy_from_user(&zone, argp, sizeof zone)) |
1627 | goto out; | 1636 | goto out; |
1628 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); | 1637 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); |
1629 | if (r) | 1638 | if (r) |
1630 | goto out; | 1639 | goto out; |
1631 | r = 0; | 1640 | r = 0; |
1632 | break; | 1641 | break; |
1633 | } | 1642 | } |
1634 | case KVM_UNREGISTER_COALESCED_MMIO: { | 1643 | case KVM_UNREGISTER_COALESCED_MMIO: { |
1635 | struct kvm_coalesced_mmio_zone zone; | 1644 | struct kvm_coalesced_mmio_zone zone; |
1636 | r = -EFAULT; | 1645 | r = -EFAULT; |
1637 | if (copy_from_user(&zone, argp, sizeof zone)) | 1646 | if (copy_from_user(&zone, argp, sizeof zone)) |
1638 | goto out; | 1647 | goto out; |
1639 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); | 1648 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); |
1640 | if (r) | 1649 | if (r) |
1641 | goto out; | 1650 | goto out; |
1642 | r = 0; | 1651 | r = 0; |
1643 | break; | 1652 | break; |
1644 | } | 1653 | } |
1645 | #endif | 1654 | #endif |
1646 | case KVM_IRQFD: { | 1655 | case KVM_IRQFD: { |
1647 | struct kvm_irqfd data; | 1656 | struct kvm_irqfd data; |
1648 | 1657 | ||
1649 | r = -EFAULT; | 1658 | r = -EFAULT; |
1650 | if (copy_from_user(&data, argp, sizeof data)) | 1659 | if (copy_from_user(&data, argp, sizeof data)) |
1651 | goto out; | 1660 | goto out; |
1652 | r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); | 1661 | r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); |
1653 | break; | 1662 | break; |
1654 | } | 1663 | } |
1655 | case KVM_IOEVENTFD: { | 1664 | case KVM_IOEVENTFD: { |
1656 | struct kvm_ioeventfd data; | 1665 | struct kvm_ioeventfd data; |
1657 | 1666 | ||
1658 | r = -EFAULT; | 1667 | r = -EFAULT; |
1659 | if (copy_from_user(&data, argp, sizeof data)) | 1668 | if (copy_from_user(&data, argp, sizeof data)) |
1660 | goto out; | 1669 | goto out; |
1661 | r = kvm_ioeventfd(kvm, &data); | 1670 | r = kvm_ioeventfd(kvm, &data); |
1662 | break; | 1671 | break; |
1663 | } | 1672 | } |
1664 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 1673 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
1665 | case KVM_SET_BOOT_CPU_ID: | 1674 | case KVM_SET_BOOT_CPU_ID: |
1666 | r = 0; | 1675 | r = 0; |
1667 | mutex_lock(&kvm->lock); | 1676 | mutex_lock(&kvm->lock); |
1668 | if (atomic_read(&kvm->online_vcpus) != 0) | 1677 | if (atomic_read(&kvm->online_vcpus) != 0) |
1669 | r = -EBUSY; | 1678 | r = -EBUSY; |
1670 | else | 1679 | else |
1671 | kvm->bsp_vcpu_id = arg; | 1680 | kvm->bsp_vcpu_id = arg; |
1672 | mutex_unlock(&kvm->lock); | 1681 | mutex_unlock(&kvm->lock); |
1673 | break; | 1682 | break; |
1674 | #endif | 1683 | #endif |
1675 | default: | 1684 | default: |
1676 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 1685 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
1677 | if (r == -ENOTTY) | 1686 | if (r == -ENOTTY) |
1678 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); | 1687 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); |
1679 | } | 1688 | } |
1680 | out: | 1689 | out: |
1681 | return r; | 1690 | return r; |
1682 | } | 1691 | } |
1683 | 1692 | ||
1684 | #ifdef CONFIG_COMPAT | 1693 | #ifdef CONFIG_COMPAT |
1685 | struct compat_kvm_dirty_log { | 1694 | struct compat_kvm_dirty_log { |
1686 | __u32 slot; | 1695 | __u32 slot; |
1687 | __u32 padding1; | 1696 | __u32 padding1; |
1688 | union { | 1697 | union { |
1689 | compat_uptr_t dirty_bitmap; /* one bit per page */ | 1698 | compat_uptr_t dirty_bitmap; /* one bit per page */ |
1690 | __u64 padding2; | 1699 | __u64 padding2; |
1691 | }; | 1700 | }; |
1692 | }; | 1701 | }; |
1693 | 1702 | ||
1694 | static long kvm_vm_compat_ioctl(struct file *filp, | 1703 | static long kvm_vm_compat_ioctl(struct file *filp, |
1695 | unsigned int ioctl, unsigned long arg) | 1704 | unsigned int ioctl, unsigned long arg) |
1696 | { | 1705 | { |
1697 | struct kvm *kvm = filp->private_data; | 1706 | struct kvm *kvm = filp->private_data; |
1698 | int r; | 1707 | int r; |
1699 | 1708 | ||
1700 | if (kvm->mm != current->mm) | 1709 | if (kvm->mm != current->mm) |
1701 | return -EIO; | 1710 | return -EIO; |
1702 | switch (ioctl) { | 1711 | switch (ioctl) { |
1703 | case KVM_GET_DIRTY_LOG: { | 1712 | case KVM_GET_DIRTY_LOG: { |
1704 | struct compat_kvm_dirty_log compat_log; | 1713 | struct compat_kvm_dirty_log compat_log; |
1705 | struct kvm_dirty_log log; | 1714 | struct kvm_dirty_log log; |
1706 | 1715 | ||
1707 | r = -EFAULT; | 1716 | r = -EFAULT; |
1708 | if (copy_from_user(&compat_log, (void __user *)arg, | 1717 | if (copy_from_user(&compat_log, (void __user *)arg, |
1709 | sizeof(compat_log))) | 1718 | sizeof(compat_log))) |
1710 | goto out; | 1719 | goto out; |
1711 | log.slot = compat_log.slot; | 1720 | log.slot = compat_log.slot; |
1712 | log.padding1 = compat_log.padding1; | 1721 | log.padding1 = compat_log.padding1; |
1713 | log.padding2 = compat_log.padding2; | 1722 | log.padding2 = compat_log.padding2; |
1714 | log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); | 1723 | log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); |
1715 | 1724 | ||
1716 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | 1725 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); |
1717 | if (r) | 1726 | if (r) |
1718 | goto out; | 1727 | goto out; |
1719 | break; | 1728 | break; |
1720 | } | 1729 | } |
1721 | default: | 1730 | default: |
1722 | r = kvm_vm_ioctl(filp, ioctl, arg); | 1731 | r = kvm_vm_ioctl(filp, ioctl, arg); |
1723 | } | 1732 | } |
1724 | 1733 | ||
1725 | out: | 1734 | out: |
1726 | return r; | 1735 | return r; |
1727 | } | 1736 | } |
1728 | #endif | 1737 | #endif |
1729 | 1738 | ||
1730 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1739 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1731 | { | 1740 | { |
1732 | struct page *page[1]; | 1741 | struct page *page[1]; |
1733 | unsigned long addr; | 1742 | unsigned long addr; |
1734 | int npages; | 1743 | int npages; |
1735 | gfn_t gfn = vmf->pgoff; | 1744 | gfn_t gfn = vmf->pgoff; |
1736 | struct kvm *kvm = vma->vm_file->private_data; | 1745 | struct kvm *kvm = vma->vm_file->private_data; |
1737 | 1746 | ||
1738 | addr = gfn_to_hva(kvm, gfn); | 1747 | addr = gfn_to_hva(kvm, gfn); |
1739 | if (kvm_is_error_hva(addr)) | 1748 | if (kvm_is_error_hva(addr)) |
1740 | return VM_FAULT_SIGBUS; | 1749 | return VM_FAULT_SIGBUS; |
1741 | 1750 | ||
1742 | npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, | 1751 | npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, |
1743 | NULL); | 1752 | NULL); |
1744 | if (unlikely(npages != 1)) | 1753 | if (unlikely(npages != 1)) |
1745 | return VM_FAULT_SIGBUS; | 1754 | return VM_FAULT_SIGBUS; |
1746 | 1755 | ||
1747 | vmf->page = page[0]; | 1756 | vmf->page = page[0]; |
1748 | return 0; | 1757 | return 0; |
1749 | } | 1758 | } |
1750 | 1759 | ||
1751 | static const struct vm_operations_struct kvm_vm_vm_ops = { | 1760 | static const struct vm_operations_struct kvm_vm_vm_ops = { |
1752 | .fault = kvm_vm_fault, | 1761 | .fault = kvm_vm_fault, |
1753 | }; | 1762 | }; |
1754 | 1763 | ||
1755 | static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | 1764 | static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) |
1756 | { | 1765 | { |
1757 | vma->vm_ops = &kvm_vm_vm_ops; | 1766 | vma->vm_ops = &kvm_vm_vm_ops; |
1758 | return 0; | 1767 | return 0; |
1759 | } | 1768 | } |
1760 | 1769 | ||
1761 | static struct file_operations kvm_vm_fops = { | 1770 | static struct file_operations kvm_vm_fops = { |
1762 | .release = kvm_vm_release, | 1771 | .release = kvm_vm_release, |
1763 | .unlocked_ioctl = kvm_vm_ioctl, | 1772 | .unlocked_ioctl = kvm_vm_ioctl, |
1764 | #ifdef CONFIG_COMPAT | 1773 | #ifdef CONFIG_COMPAT |
1765 | .compat_ioctl = kvm_vm_compat_ioctl, | 1774 | .compat_ioctl = kvm_vm_compat_ioctl, |
1766 | #endif | 1775 | #endif |
1767 | .mmap = kvm_vm_mmap, | 1776 | .mmap = kvm_vm_mmap, |
1768 | }; | 1777 | }; |
1769 | 1778 | ||
1770 | static int kvm_dev_ioctl_create_vm(void) | 1779 | static int kvm_dev_ioctl_create_vm(void) |
1771 | { | 1780 | { |
1772 | int fd, r; | 1781 | int fd, r; |
1773 | struct kvm *kvm; | 1782 | struct kvm *kvm; |
1774 | 1783 | ||
1775 | kvm = kvm_create_vm(); | 1784 | kvm = kvm_create_vm(); |
1776 | if (IS_ERR(kvm)) | 1785 | if (IS_ERR(kvm)) |
1777 | return PTR_ERR(kvm); | 1786 | return PTR_ERR(kvm); |
1778 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 1787 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
1779 | r = kvm_coalesced_mmio_init(kvm); | 1788 | r = kvm_coalesced_mmio_init(kvm); |
1780 | if (r < 0) { | 1789 | if (r < 0) { |
1781 | kvm_put_kvm(kvm); | 1790 | kvm_put_kvm(kvm); |
1782 | return r; | 1791 | return r; |
1783 | } | 1792 | } |
1784 | #endif | 1793 | #endif |
1785 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); | 1794 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); |
1786 | if (fd < 0) | 1795 | if (fd < 0) |
1787 | kvm_put_kvm(kvm); | 1796 | kvm_put_kvm(kvm); |
1788 | 1797 | ||
1789 | return fd; | 1798 | return fd; |
1790 | } | 1799 | } |
1791 | 1800 | ||
1792 | static long kvm_dev_ioctl_check_extension_generic(long arg) | 1801 | static long kvm_dev_ioctl_check_extension_generic(long arg) |
1793 | { | 1802 | { |
1794 | switch (arg) { | 1803 | switch (arg) { |
1795 | case KVM_CAP_USER_MEMORY: | 1804 | case KVM_CAP_USER_MEMORY: |
1796 | case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: | 1805 | case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: |
1797 | case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: | 1806 | case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: |
1798 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 1807 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
1799 | case KVM_CAP_SET_BOOT_CPU_ID: | 1808 | case KVM_CAP_SET_BOOT_CPU_ID: |
1800 | #endif | 1809 | #endif |
1801 | case KVM_CAP_INTERNAL_ERROR_DATA: | 1810 | case KVM_CAP_INTERNAL_ERROR_DATA: |
1802 | return 1; | 1811 | return 1; |
1803 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 1812 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
1804 | case KVM_CAP_IRQ_ROUTING: | 1813 | case KVM_CAP_IRQ_ROUTING: |
1805 | return KVM_MAX_IRQ_ROUTES; | 1814 | return KVM_MAX_IRQ_ROUTES; |
1806 | #endif | 1815 | #endif |
1807 | default: | 1816 | default: |
1808 | break; | 1817 | break; |
1809 | } | 1818 | } |
1810 | return kvm_dev_ioctl_check_extension(arg); | 1819 | return kvm_dev_ioctl_check_extension(arg); |
1811 | } | 1820 | } |
1812 | 1821 | ||
1813 | static long kvm_dev_ioctl(struct file *filp, | 1822 | static long kvm_dev_ioctl(struct file *filp, |
1814 | unsigned int ioctl, unsigned long arg) | 1823 | unsigned int ioctl, unsigned long arg) |
1815 | { | 1824 | { |
1816 | long r = -EINVAL; | 1825 | long r = -EINVAL; |
1817 | 1826 | ||
1818 | switch (ioctl) { | 1827 | switch (ioctl) { |
1819 | case KVM_GET_API_VERSION: | 1828 | case KVM_GET_API_VERSION: |
1820 | r = -EINVAL; | 1829 | r = -EINVAL; |
1821 | if (arg) | 1830 | if (arg) |
1822 | goto out; | 1831 | goto out; |
1823 | r = KVM_API_VERSION; | 1832 | r = KVM_API_VERSION; |
1824 | break; | 1833 | break; |
1825 | case KVM_CREATE_VM: | 1834 | case KVM_CREATE_VM: |
1826 | r = -EINVAL; | 1835 | r = -EINVAL; |
1827 | if (arg) | 1836 | if (arg) |
1828 | goto out; | 1837 | goto out; |
1829 | r = kvm_dev_ioctl_create_vm(); | 1838 | r = kvm_dev_ioctl_create_vm(); |
1830 | break; | 1839 | break; |
1831 | case KVM_CHECK_EXTENSION: | 1840 | case KVM_CHECK_EXTENSION: |
1832 | r = kvm_dev_ioctl_check_extension_generic(arg); | 1841 | r = kvm_dev_ioctl_check_extension_generic(arg); |
1833 | break; | 1842 | break; |
1834 | case KVM_GET_VCPU_MMAP_SIZE: | 1843 | case KVM_GET_VCPU_MMAP_SIZE: |
1835 | r = -EINVAL; | 1844 | r = -EINVAL; |
1836 | if (arg) | 1845 | if (arg) |
1837 | goto out; | 1846 | goto out; |
1838 | r = PAGE_SIZE; /* struct kvm_run */ | 1847 | r = PAGE_SIZE; /* struct kvm_run */ |
1839 | #ifdef CONFIG_X86 | 1848 | #ifdef CONFIG_X86 |
1840 | r += PAGE_SIZE; /* pio data page */ | 1849 | r += PAGE_SIZE; /* pio data page */ |
1841 | #endif | 1850 | #endif |
1842 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 1851 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
1843 | r += PAGE_SIZE; /* coalesced mmio ring page */ | 1852 | r += PAGE_SIZE; /* coalesced mmio ring page */ |
1844 | #endif | 1853 | #endif |
1845 | break; | 1854 | break; |
1846 | case KVM_TRACE_ENABLE: | 1855 | case KVM_TRACE_ENABLE: |
1847 | case KVM_TRACE_PAUSE: | 1856 | case KVM_TRACE_PAUSE: |
1848 | case KVM_TRACE_DISABLE: | 1857 | case KVM_TRACE_DISABLE: |
1849 | r = -EOPNOTSUPP; | 1858 | r = -EOPNOTSUPP; |
1850 | break; | 1859 | break; |
1851 | default: | 1860 | default: |
1852 | return kvm_arch_dev_ioctl(filp, ioctl, arg); | 1861 | return kvm_arch_dev_ioctl(filp, ioctl, arg); |
1853 | } | 1862 | } |
1854 | out: | 1863 | out: |
1855 | return r; | 1864 | return r; |
1856 | } | 1865 | } |
1857 | 1866 | ||
1858 | static struct file_operations kvm_chardev_ops = { | 1867 | static struct file_operations kvm_chardev_ops = { |
1859 | .unlocked_ioctl = kvm_dev_ioctl, | 1868 | .unlocked_ioctl = kvm_dev_ioctl, |
1860 | .compat_ioctl = kvm_dev_ioctl, | 1869 | .compat_ioctl = kvm_dev_ioctl, |
1861 | }; | 1870 | }; |
1862 | 1871 | ||
1863 | static struct miscdevice kvm_dev = { | 1872 | static struct miscdevice kvm_dev = { |
1864 | KVM_MINOR, | 1873 | KVM_MINOR, |
1865 | "kvm", | 1874 | "kvm", |
1866 | &kvm_chardev_ops, | 1875 | &kvm_chardev_ops, |
1867 | }; | 1876 | }; |
1868 | 1877 | ||
1869 | static void hardware_enable(void *junk) | 1878 | static void hardware_enable(void *junk) |
1870 | { | 1879 | { |
1871 | int cpu = raw_smp_processor_id(); | 1880 | int cpu = raw_smp_processor_id(); |
1872 | int r; | 1881 | int r; |
1873 | 1882 | ||
1874 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) | 1883 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
1875 | return; | 1884 | return; |
1876 | 1885 | ||
1877 | cpumask_set_cpu(cpu, cpus_hardware_enabled); | 1886 | cpumask_set_cpu(cpu, cpus_hardware_enabled); |
1878 | 1887 | ||
1879 | r = kvm_arch_hardware_enable(NULL); | 1888 | r = kvm_arch_hardware_enable(NULL); |
1880 | 1889 | ||
1881 | if (r) { | 1890 | if (r) { |
1882 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | 1891 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); |
1883 | atomic_inc(&hardware_enable_failed); | 1892 | atomic_inc(&hardware_enable_failed); |
1884 | printk(KERN_INFO "kvm: enabling virtualization on " | 1893 | printk(KERN_INFO "kvm: enabling virtualization on " |
1885 | "CPU%d failed\n", cpu); | 1894 | "CPU%d failed\n", cpu); |
1886 | } | 1895 | } |
1887 | } | 1896 | } |
1888 | 1897 | ||
1889 | static void hardware_disable(void *junk) | 1898 | static void hardware_disable(void *junk) |
1890 | { | 1899 | { |
1891 | int cpu = raw_smp_processor_id(); | 1900 | int cpu = raw_smp_processor_id(); |
1892 | 1901 | ||
1893 | if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) | 1902 | if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
1894 | return; | 1903 | return; |
1895 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | 1904 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); |
1896 | kvm_arch_hardware_disable(NULL); | 1905 | kvm_arch_hardware_disable(NULL); |
1897 | } | 1906 | } |
1898 | 1907 | ||
1899 | static void hardware_disable_all_nolock(void) | 1908 | static void hardware_disable_all_nolock(void) |
1900 | { | 1909 | { |
1901 | BUG_ON(!kvm_usage_count); | 1910 | BUG_ON(!kvm_usage_count); |
1902 | 1911 | ||
1903 | kvm_usage_count--; | 1912 | kvm_usage_count--; |
1904 | if (!kvm_usage_count) | 1913 | if (!kvm_usage_count) |
1905 | on_each_cpu(hardware_disable, NULL, 1); | 1914 | on_each_cpu(hardware_disable, NULL, 1); |
1906 | } | 1915 | } |
1907 | 1916 | ||
1908 | static void hardware_disable_all(void) | 1917 | static void hardware_disable_all(void) |
1909 | { | 1918 | { |
1910 | spin_lock(&kvm_lock); | 1919 | spin_lock(&kvm_lock); |
1911 | hardware_disable_all_nolock(); | 1920 | hardware_disable_all_nolock(); |
1912 | spin_unlock(&kvm_lock); | 1921 | spin_unlock(&kvm_lock); |
1913 | } | 1922 | } |
1914 | 1923 | ||
1915 | static int hardware_enable_all(void) | 1924 | static int hardware_enable_all(void) |
1916 | { | 1925 | { |
1917 | int r = 0; | 1926 | int r = 0; |
1918 | 1927 | ||
1919 | spin_lock(&kvm_lock); | 1928 | spin_lock(&kvm_lock); |
1920 | 1929 | ||
1921 | kvm_usage_count++; | 1930 | kvm_usage_count++; |
1922 | if (kvm_usage_count == 1) { | 1931 | if (kvm_usage_count == 1) { |
1923 | atomic_set(&hardware_enable_failed, 0); | 1932 | atomic_set(&hardware_enable_failed, 0); |
1924 | on_each_cpu(hardware_enable, NULL, 1); | 1933 | on_each_cpu(hardware_enable, NULL, 1); |
1925 | 1934 | ||
1926 | if (atomic_read(&hardware_enable_failed)) { | 1935 | if (atomic_read(&hardware_enable_failed)) { |
1927 | hardware_disable_all_nolock(); | 1936 | hardware_disable_all_nolock(); |
1928 | r = -EBUSY; | 1937 | r = -EBUSY; |
1929 | } | 1938 | } |
1930 | } | 1939 | } |
1931 | 1940 | ||
1932 | spin_unlock(&kvm_lock); | 1941 | spin_unlock(&kvm_lock); |
1933 | 1942 | ||
1934 | return r; | 1943 | return r; |
1935 | } | 1944 | } |
1936 | 1945 | ||
1937 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | 1946 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, |
1938 | void *v) | 1947 | void *v) |
1939 | { | 1948 | { |
1940 | int cpu = (long)v; | 1949 | int cpu = (long)v; |
1941 | 1950 | ||
1942 | if (!kvm_usage_count) | 1951 | if (!kvm_usage_count) |
1943 | return NOTIFY_OK; | 1952 | return NOTIFY_OK; |
1944 | 1953 | ||
1945 | val &= ~CPU_TASKS_FROZEN; | 1954 | val &= ~CPU_TASKS_FROZEN; |
1946 | switch (val) { | 1955 | switch (val) { |
1947 | case CPU_DYING: | 1956 | case CPU_DYING: |
1948 | printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", | 1957 | printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", |
1949 | cpu); | 1958 | cpu); |
1950 | hardware_disable(NULL); | 1959 | hardware_disable(NULL); |
1951 | break; | 1960 | break; |
1952 | case CPU_ONLINE: | 1961 | case CPU_ONLINE: |
1953 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", | 1962 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", |
1954 | cpu); | 1963 | cpu); |
1955 | smp_call_function_single(cpu, hardware_enable, NULL, 1); | 1964 | smp_call_function_single(cpu, hardware_enable, NULL, 1); |
1956 | break; | 1965 | break; |
1957 | } | 1966 | } |
1958 | return NOTIFY_OK; | 1967 | return NOTIFY_OK; |
1959 | } | 1968 | } |
1960 | 1969 | ||
1961 | 1970 | ||
1962 | asmlinkage void kvm_handle_fault_on_reboot(void) | 1971 | asmlinkage void kvm_handle_fault_on_reboot(void) |
1963 | { | 1972 | { |
1964 | if (kvm_rebooting) | 1973 | if (kvm_rebooting) |
1965 | /* spin while reset goes on */ | 1974 | /* spin while reset goes on */ |
1966 | while (true) | 1975 | while (true) |
1967 | ; | 1976 | ; |
1968 | /* Fault while not rebooting. We want the trace. */ | 1977 | /* Fault while not rebooting. We want the trace. */ |
1969 | BUG(); | 1978 | BUG(); |
1970 | } | 1979 | } |
1971 | EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); | 1980 | EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); |
1972 | 1981 | ||
1973 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | 1982 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, |
1974 | void *v) | 1983 | void *v) |
1975 | { | 1984 | { |
1976 | /* | 1985 | /* |
1977 | * Some (well, at least mine) BIOSes hang on reboot if | 1986 | * Some (well, at least mine) BIOSes hang on reboot if |
1978 | * in vmx root mode. | 1987 | * in vmx root mode. |
1979 | * | 1988 | * |
1980 | * And Intel TXT required VMX off for all cpu when system shutdown. | 1989 | * And Intel TXT required VMX off for all cpu when system shutdown. |
1981 | */ | 1990 | */ |
1982 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | 1991 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); |
1983 | kvm_rebooting = true; | 1992 | kvm_rebooting = true; |
1984 | on_each_cpu(hardware_disable, NULL, 1); | 1993 | on_each_cpu(hardware_disable, NULL, 1); |
1985 | return NOTIFY_OK; | 1994 | return NOTIFY_OK; |
1986 | } | 1995 | } |
1987 | 1996 | ||
1988 | static struct notifier_block kvm_reboot_notifier = { | 1997 | static struct notifier_block kvm_reboot_notifier = { |
1989 | .notifier_call = kvm_reboot, | 1998 | .notifier_call = kvm_reboot, |
1990 | .priority = 0, | 1999 | .priority = 0, |
1991 | }; | 2000 | }; |
1992 | 2001 | ||
1993 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus) | 2002 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus) |
1994 | { | 2003 | { |
1995 | int i; | 2004 | int i; |
1996 | 2005 | ||
1997 | for (i = 0; i < bus->dev_count; i++) { | 2006 | for (i = 0; i < bus->dev_count; i++) { |
1998 | struct kvm_io_device *pos = bus->devs[i]; | 2007 | struct kvm_io_device *pos = bus->devs[i]; |
1999 | 2008 | ||
2000 | kvm_iodevice_destructor(pos); | 2009 | kvm_iodevice_destructor(pos); |
2001 | } | 2010 | } |
2002 | kfree(bus); | 2011 | kfree(bus); |
2003 | } | 2012 | } |
2004 | 2013 | ||
2005 | /* kvm_io_bus_write - called under kvm->slots_lock */ | 2014 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
2006 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2015 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2007 | int len, const void *val) | 2016 | int len, const void *val) |
2008 | { | 2017 | { |
2009 | int i; | 2018 | int i; |
2010 | struct kvm_io_bus *bus; | 2019 | struct kvm_io_bus *bus; |
2011 | 2020 | ||
2012 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2021 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
2013 | for (i = 0; i < bus->dev_count; i++) | 2022 | for (i = 0; i < bus->dev_count; i++) |
2014 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | 2023 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) |
2015 | return 0; | 2024 | return 0; |
2016 | return -EOPNOTSUPP; | 2025 | return -EOPNOTSUPP; |
2017 | } | 2026 | } |
2018 | 2027 | ||
2019 | /* kvm_io_bus_read - called under kvm->slots_lock */ | 2028 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
2020 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 2029 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
2021 | int len, void *val) | 2030 | int len, void *val) |
2022 | { | 2031 | { |
2023 | int i; | 2032 | int i; |
2024 | struct kvm_io_bus *bus; | 2033 | struct kvm_io_bus *bus; |
2025 | 2034 | ||
2026 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | 2035 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); |
2027 | for (i = 0; i < bus->dev_count; i++) | 2036 | for (i = 0; i < bus->dev_count; i++) |
2028 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | 2037 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) |
2029 | return 0; | 2038 | return 0; |
2030 | return -EOPNOTSUPP; | 2039 | return -EOPNOTSUPP; |
2031 | } | 2040 | } |
2032 | 2041 | ||
2033 | /* Caller must hold slots_lock. */ | 2042 | /* Caller must hold slots_lock. */ |
2034 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 2043 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
2035 | struct kvm_io_device *dev) | 2044 | struct kvm_io_device *dev) |
2036 | { | 2045 | { |
2037 | struct kvm_io_bus *new_bus, *bus; | 2046 | struct kvm_io_bus *new_bus, *bus; |
2038 | 2047 | ||
2039 | bus = kvm->buses[bus_idx]; | 2048 | bus = kvm->buses[bus_idx]; |
2040 | if (bus->dev_count > NR_IOBUS_DEVS-1) | 2049 | if (bus->dev_count > NR_IOBUS_DEVS-1) |
2041 | return -ENOSPC; | 2050 | return -ENOSPC; |
2042 | 2051 | ||
2043 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); | 2052 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); |
2044 | if (!new_bus) | 2053 | if (!new_bus) |
2045 | return -ENOMEM; | 2054 | return -ENOMEM; |
2046 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | 2055 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); |
2047 | new_bus->devs[new_bus->dev_count++] = dev; | 2056 | new_bus->devs[new_bus->dev_count++] = dev; |
2048 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | 2057 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
2049 | synchronize_srcu_expedited(&kvm->srcu); | 2058 | synchronize_srcu_expedited(&kvm->srcu); |
2050 | kfree(bus); | 2059 | kfree(bus); |
2051 | 2060 | ||
2052 | return 0; | 2061 | return 0; |
2053 | } | 2062 | } |
2054 | 2063 | ||
2055 | /* Caller must hold slots_lock. */ | 2064 | /* Caller must hold slots_lock. */ |
2056 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | 2065 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
2057 | struct kvm_io_device *dev) | 2066 | struct kvm_io_device *dev) |
2058 | { | 2067 | { |
2059 | int i, r; | 2068 | int i, r; |
2060 | struct kvm_io_bus *new_bus, *bus; | 2069 | struct kvm_io_bus *new_bus, *bus; |
2061 | 2070 | ||
2062 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); | 2071 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); |
2063 | if (!new_bus) | 2072 | if (!new_bus) |
2064 | return -ENOMEM; | 2073 | return -ENOMEM; |
2065 | 2074 | ||
2066 | bus = kvm->buses[bus_idx]; | 2075 | bus = kvm->buses[bus_idx]; |
2067 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | 2076 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); |
2068 | 2077 | ||
2069 | r = -ENOENT; | 2078 | r = -ENOENT; |
2070 | for (i = 0; i < new_bus->dev_count; i++) | 2079 | for (i = 0; i < new_bus->dev_count; i++) |
2071 | if (new_bus->devs[i] == dev) { | 2080 | if (new_bus->devs[i] == dev) { |
2072 | r = 0; | 2081 | r = 0; |
2073 | new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; | 2082 | new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; |
2074 | break; | 2083 | break; |
2075 | } | 2084 | } |
2076 | 2085 | ||
2077 | if (r) { | 2086 | if (r) { |
2078 | kfree(new_bus); | 2087 | kfree(new_bus); |
2079 | return r; | 2088 | return r; |
2080 | } | 2089 | } |
2081 | 2090 | ||
2082 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | 2091 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
2083 | synchronize_srcu_expedited(&kvm->srcu); | 2092 | synchronize_srcu_expedited(&kvm->srcu); |
2084 | kfree(bus); | 2093 | kfree(bus); |
2085 | return r; | 2094 | return r; |
2086 | } | 2095 | } |
2087 | 2096 | ||
2088 | static struct notifier_block kvm_cpu_notifier = { | 2097 | static struct notifier_block kvm_cpu_notifier = { |
2089 | .notifier_call = kvm_cpu_hotplug, | 2098 | .notifier_call = kvm_cpu_hotplug, |
2090 | .priority = 20, /* must be > scheduler priority */ | 2099 | .priority = 20, /* must be > scheduler priority */ |
2091 | }; | 2100 | }; |
2092 | 2101 | ||
2093 | static int vm_stat_get(void *_offset, u64 *val) | 2102 | static int vm_stat_get(void *_offset, u64 *val) |
2094 | { | 2103 | { |
2095 | unsigned offset = (long)_offset; | 2104 | unsigned offset = (long)_offset; |
2096 | struct kvm *kvm; | 2105 | struct kvm *kvm; |
2097 | 2106 | ||
2098 | *val = 0; | 2107 | *val = 0; |
2099 | spin_lock(&kvm_lock); | 2108 | spin_lock(&kvm_lock); |
2100 | list_for_each_entry(kvm, &vm_list, vm_list) | 2109 | list_for_each_entry(kvm, &vm_list, vm_list) |
2101 | *val += *(u32 *)((void *)kvm + offset); | 2110 | *val += *(u32 *)((void *)kvm + offset); |
2102 | spin_unlock(&kvm_lock); | 2111 | spin_unlock(&kvm_lock); |
2103 | return 0; | 2112 | return 0; |
2104 | } | 2113 | } |
2105 | 2114 | ||
2106 | DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); | 2115 | DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); |
2107 | 2116 | ||
2108 | static int vcpu_stat_get(void *_offset, u64 *val) | 2117 | static int vcpu_stat_get(void *_offset, u64 *val) |
2109 | { | 2118 | { |
2110 | unsigned offset = (long)_offset; | 2119 | unsigned offset = (long)_offset; |
2111 | struct kvm *kvm; | 2120 | struct kvm *kvm; |
2112 | struct kvm_vcpu *vcpu; | 2121 | struct kvm_vcpu *vcpu; |
2113 | int i; | 2122 | int i; |
2114 | 2123 | ||
2115 | *val = 0; | 2124 | *val = 0; |
2116 | spin_lock(&kvm_lock); | 2125 | spin_lock(&kvm_lock); |
2117 | list_for_each_entry(kvm, &vm_list, vm_list) | 2126 | list_for_each_entry(kvm, &vm_list, vm_list) |
2118 | kvm_for_each_vcpu(i, vcpu, kvm) | 2127 | kvm_for_each_vcpu(i, vcpu, kvm) |
2119 | *val += *(u32 *)((void *)vcpu + offset); | 2128 | *val += *(u32 *)((void *)vcpu + offset); |
2120 | 2129 | ||
2121 | spin_unlock(&kvm_lock); | 2130 | spin_unlock(&kvm_lock); |
2122 | return 0; | 2131 | return 0; |
2123 | } | 2132 | } |
2124 | 2133 | ||
2125 | DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); | 2134 | DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); |
2126 | 2135 | ||
2127 | static const struct file_operations *stat_fops[] = { | 2136 | static const struct file_operations *stat_fops[] = { |
2128 | [KVM_STAT_VCPU] = &vcpu_stat_fops, | 2137 | [KVM_STAT_VCPU] = &vcpu_stat_fops, |
2129 | [KVM_STAT_VM] = &vm_stat_fops, | 2138 | [KVM_STAT_VM] = &vm_stat_fops, |
2130 | }; | 2139 | }; |
2131 | 2140 | ||
2132 | static void kvm_init_debug(void) | 2141 | static void kvm_init_debug(void) |
2133 | { | 2142 | { |
2134 | struct kvm_stats_debugfs_item *p; | 2143 | struct kvm_stats_debugfs_item *p; |
2135 | 2144 | ||
2136 | kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); | 2145 | kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); |
2137 | for (p = debugfs_entries; p->name; ++p) | 2146 | for (p = debugfs_entries; p->name; ++p) |
2138 | p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, | 2147 | p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, |
2139 | (void *)(long)p->offset, | 2148 | (void *)(long)p->offset, |
2140 | stat_fops[p->kind]); | 2149 | stat_fops[p->kind]); |
2141 | } | 2150 | } |
2142 | 2151 | ||
2143 | static void kvm_exit_debug(void) | 2152 | static void kvm_exit_debug(void) |
2144 | { | 2153 | { |
2145 | struct kvm_stats_debugfs_item *p; | 2154 | struct kvm_stats_debugfs_item *p; |
2146 | 2155 | ||
2147 | for (p = debugfs_entries; p->name; ++p) | 2156 | for (p = debugfs_entries; p->name; ++p) |
2148 | debugfs_remove(p->dentry); | 2157 | debugfs_remove(p->dentry); |
2149 | debugfs_remove(kvm_debugfs_dir); | 2158 | debugfs_remove(kvm_debugfs_dir); |
2150 | } | 2159 | } |
2151 | 2160 | ||
2152 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 2161 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) |
2153 | { | 2162 | { |
2154 | if (kvm_usage_count) | 2163 | if (kvm_usage_count) |
2155 | hardware_disable(NULL); | 2164 | hardware_disable(NULL); |
2156 | return 0; | 2165 | return 0; |
2157 | } | 2166 | } |
2158 | 2167 | ||
2159 | static int kvm_resume(struct sys_device *dev) | 2168 | static int kvm_resume(struct sys_device *dev) |
2160 | { | 2169 | { |
2161 | if (kvm_usage_count) | 2170 | if (kvm_usage_count) |
2162 | hardware_enable(NULL); | 2171 | hardware_enable(NULL); |
2163 | return 0; | 2172 | return 0; |
2164 | } | 2173 | } |
2165 | 2174 | ||
2166 | static struct sysdev_class kvm_sysdev_class = { | 2175 | static struct sysdev_class kvm_sysdev_class = { |
2167 | .name = "kvm", | 2176 | .name = "kvm", |
2168 | .suspend = kvm_suspend, | 2177 | .suspend = kvm_suspend, |
2169 | .resume = kvm_resume, | 2178 | .resume = kvm_resume, |
2170 | }; | 2179 | }; |
2171 | 2180 | ||
2172 | static struct sys_device kvm_sysdev = { | 2181 | static struct sys_device kvm_sysdev = { |
2173 | .id = 0, | 2182 | .id = 0, |
2174 | .cls = &kvm_sysdev_class, | 2183 | .cls = &kvm_sysdev_class, |
2175 | }; | 2184 | }; |
2176 | 2185 | ||
2177 | struct page *bad_page; | 2186 | struct page *bad_page; |
2178 | pfn_t bad_pfn; | 2187 | pfn_t bad_pfn; |
2179 | 2188 | ||
2180 | static inline | 2189 | static inline |
2181 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | 2190 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) |
2182 | { | 2191 | { |
2183 | return container_of(pn, struct kvm_vcpu, preempt_notifier); | 2192 | return container_of(pn, struct kvm_vcpu, preempt_notifier); |
2184 | } | 2193 | } |
2185 | 2194 | ||
2186 | static void kvm_sched_in(struct preempt_notifier *pn, int cpu) | 2195 | static void kvm_sched_in(struct preempt_notifier *pn, int cpu) |
2187 | { | 2196 | { |
2188 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); | 2197 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); |
2189 | 2198 | ||
2190 | kvm_arch_vcpu_load(vcpu, cpu); | 2199 | kvm_arch_vcpu_load(vcpu, cpu); |
2191 | } | 2200 | } |
2192 | 2201 | ||
2193 | static void kvm_sched_out(struct preempt_notifier *pn, | 2202 | static void kvm_sched_out(struct preempt_notifier *pn, |
2194 | struct task_struct *next) | 2203 | struct task_struct *next) |
2195 | { | 2204 | { |
2196 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); | 2205 | struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); |
2197 | 2206 | ||
2198 | kvm_arch_vcpu_put(vcpu); | 2207 | kvm_arch_vcpu_put(vcpu); |
2199 | } | 2208 | } |
2200 | 2209 | ||
2201 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | 2210 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
2202 | struct module *module) | 2211 | struct module *module) |
2203 | { | 2212 | { |
2204 | int r; | 2213 | int r; |
2205 | int cpu; | 2214 | int cpu; |
2206 | 2215 | ||
2207 | r = kvm_arch_init(opaque); | 2216 | r = kvm_arch_init(opaque); |
2208 | if (r) | 2217 | if (r) |
2209 | goto out_fail; | 2218 | goto out_fail; |
2210 | 2219 | ||
2211 | bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 2220 | bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
2212 | 2221 | ||
2213 | if (bad_page == NULL) { | 2222 | if (bad_page == NULL) { |
2214 | r = -ENOMEM; | 2223 | r = -ENOMEM; |
2215 | goto out; | 2224 | goto out; |
2216 | } | 2225 | } |
2217 | 2226 | ||
2218 | bad_pfn = page_to_pfn(bad_page); | 2227 | bad_pfn = page_to_pfn(bad_page); |
2219 | 2228 | ||
2220 | hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 2229 | hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
2221 | 2230 | ||
2222 | if (hwpoison_page == NULL) { | 2231 | if (hwpoison_page == NULL) { |
2223 | r = -ENOMEM; | 2232 | r = -ENOMEM; |
2224 | goto out_free_0; | 2233 | goto out_free_0; |
2225 | } | 2234 | } |
2226 | 2235 | ||
2227 | hwpoison_pfn = page_to_pfn(hwpoison_page); | 2236 | hwpoison_pfn = page_to_pfn(hwpoison_page); |
2228 | 2237 | ||
2238 | fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2239 | |||
2240 | if (fault_page == NULL) { | ||
2241 | r = -ENOMEM; | ||
2242 | goto out_free_0; | ||
2243 | } | ||
2244 | |||
2245 | fault_pfn = page_to_pfn(fault_page); | ||
2246 | |||
2229 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { | 2247 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { |
2230 | r = -ENOMEM; | 2248 | r = -ENOMEM; |
2231 | goto out_free_0; | 2249 | goto out_free_0; |
2232 | } | 2250 | } |
2233 | 2251 | ||
2234 | r = kvm_arch_hardware_setup(); | 2252 | r = kvm_arch_hardware_setup(); |
2235 | if (r < 0) | 2253 | if (r < 0) |
2236 | goto out_free_0a; | 2254 | goto out_free_0a; |
2237 | 2255 | ||
2238 | for_each_online_cpu(cpu) { | 2256 | for_each_online_cpu(cpu) { |
2239 | smp_call_function_single(cpu, | 2257 | smp_call_function_single(cpu, |
2240 | kvm_arch_check_processor_compat, | 2258 | kvm_arch_check_processor_compat, |
2241 | &r, 1); | 2259 | &r, 1); |
2242 | if (r < 0) | 2260 | if (r < 0) |
2243 | goto out_free_1; | 2261 | goto out_free_1; |
2244 | } | 2262 | } |
2245 | 2263 | ||
2246 | r = register_cpu_notifier(&kvm_cpu_notifier); | 2264 | r = register_cpu_notifier(&kvm_cpu_notifier); |
2247 | if (r) | 2265 | if (r) |
2248 | goto out_free_2; | 2266 | goto out_free_2; |
2249 | register_reboot_notifier(&kvm_reboot_notifier); | 2267 | register_reboot_notifier(&kvm_reboot_notifier); |
2250 | 2268 | ||
2251 | r = sysdev_class_register(&kvm_sysdev_class); | 2269 | r = sysdev_class_register(&kvm_sysdev_class); |
2252 | if (r) | 2270 | if (r) |
2253 | goto out_free_3; | 2271 | goto out_free_3; |
2254 | 2272 | ||
2255 | r = sysdev_register(&kvm_sysdev); | 2273 | r = sysdev_register(&kvm_sysdev); |
2256 | if (r) | 2274 | if (r) |
2257 | goto out_free_4; | 2275 | goto out_free_4; |
2258 | 2276 | ||
2259 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ | 2277 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ |
2260 | if (!vcpu_align) | 2278 | if (!vcpu_align) |
2261 | vcpu_align = __alignof__(struct kvm_vcpu); | 2279 | vcpu_align = __alignof__(struct kvm_vcpu); |
2262 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, | 2280 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, |
2263 | 0, NULL); | 2281 | 0, NULL); |
2264 | if (!kvm_vcpu_cache) { | 2282 | if (!kvm_vcpu_cache) { |
2265 | r = -ENOMEM; | 2283 | r = -ENOMEM; |
2266 | goto out_free_5; | 2284 | goto out_free_5; |
2267 | } | 2285 | } |
2268 | 2286 | ||
2269 | kvm_chardev_ops.owner = module; | 2287 | kvm_chardev_ops.owner = module; |
2270 | kvm_vm_fops.owner = module; | 2288 | kvm_vm_fops.owner = module; |
2271 | kvm_vcpu_fops.owner = module; | 2289 | kvm_vcpu_fops.owner = module; |
2272 | 2290 | ||
2273 | r = misc_register(&kvm_dev); | 2291 | r = misc_register(&kvm_dev); |
2274 | if (r) { | 2292 | if (r) { |
2275 | printk(KERN_ERR "kvm: misc device register failed\n"); | 2293 | printk(KERN_ERR "kvm: misc device register failed\n"); |
2276 | goto out_free; | 2294 | goto out_free; |
2277 | } | 2295 | } |
2278 | 2296 | ||
2279 | kvm_preempt_ops.sched_in = kvm_sched_in; | 2297 | kvm_preempt_ops.sched_in = kvm_sched_in; |
2280 | kvm_preempt_ops.sched_out = kvm_sched_out; | 2298 | kvm_preempt_ops.sched_out = kvm_sched_out; |
2281 | 2299 | ||
2282 | kvm_init_debug(); | 2300 | kvm_init_debug(); |
2283 | 2301 | ||
2284 | return 0; | 2302 | return 0; |
2285 | 2303 | ||
2286 | out_free: | 2304 | out_free: |
2287 | kmem_cache_destroy(kvm_vcpu_cache); | 2305 | kmem_cache_destroy(kvm_vcpu_cache); |
2288 | out_free_5: | 2306 | out_free_5: |
2289 | sysdev_unregister(&kvm_sysdev); | 2307 | sysdev_unregister(&kvm_sysdev); |
2290 | out_free_4: | 2308 | out_free_4: |
2291 | sysdev_class_unregister(&kvm_sysdev_class); | 2309 | sysdev_class_unregister(&kvm_sysdev_class); |
2292 | out_free_3: | 2310 | out_free_3: |
2293 | unregister_reboot_notifier(&kvm_reboot_notifier); | 2311 | unregister_reboot_notifier(&kvm_reboot_notifier); |
2294 | unregister_cpu_notifier(&kvm_cpu_notifier); | 2312 | unregister_cpu_notifier(&kvm_cpu_notifier); |
2295 | out_free_2: | 2313 | out_free_2: |
2296 | out_free_1: | 2314 | out_free_1: |
2297 | kvm_arch_hardware_unsetup(); | 2315 | kvm_arch_hardware_unsetup(); |
2298 | out_free_0a: | 2316 | out_free_0a: |
2299 | free_cpumask_var(cpus_hardware_enabled); | 2317 | free_cpumask_var(cpus_hardware_enabled); |
2300 | out_free_0: | 2318 | out_free_0: |
2319 | if (fault_page) | ||
2320 | __free_page(fault_page); | ||
2301 | if (hwpoison_page) | 2321 | if (hwpoison_page) |
2302 | __free_page(hwpoison_page); | 2322 | __free_page(hwpoison_page); |
2303 | __free_page(bad_page); | 2323 | __free_page(bad_page); |
2304 | out: | 2324 | out: |
2305 | kvm_arch_exit(); | 2325 | kvm_arch_exit(); |
2306 | out_fail: | 2326 | out_fail: |
2307 | return r; | 2327 | return r; |
2308 | } | 2328 | } |
2309 | EXPORT_SYMBOL_GPL(kvm_init); | 2329 | EXPORT_SYMBOL_GPL(kvm_init); |
2310 | 2330 | ||
2311 | void kvm_exit(void) | 2331 | void kvm_exit(void) |
2312 | { | 2332 | { |
2313 | kvm_exit_debug(); | 2333 | kvm_exit_debug(); |
2314 | misc_deregister(&kvm_dev); | 2334 | misc_deregister(&kvm_dev); |
2315 | kmem_cache_destroy(kvm_vcpu_cache); | 2335 | kmem_cache_destroy(kvm_vcpu_cache); |
2316 | sysdev_unregister(&kvm_sysdev); | 2336 | sysdev_unregister(&kvm_sysdev); |
2317 | sysdev_class_unregister(&kvm_sysdev_class); | 2337 | sysdev_class_unregister(&kvm_sysdev_class); |
2318 | unregister_reboot_notifier(&kvm_reboot_notifier); | 2338 | unregister_reboot_notifier(&kvm_reboot_notifier); |
2319 | unregister_cpu_notifier(&kvm_cpu_notifier); | 2339 | unregister_cpu_notifier(&kvm_cpu_notifier); |
2320 | on_each_cpu(hardware_disable, NULL, 1); | 2340 | on_each_cpu(hardware_disable, NULL, 1); |
2321 | kvm_arch_hardware_unsetup(); | 2341 | kvm_arch_hardware_unsetup(); |
2322 | kvm_arch_exit(); | 2342 | kvm_arch_exit(); |
2323 | free_cpumask_var(cpus_hardware_enabled); | 2343 | free_cpumask_var(cpus_hardware_enabled); |
2324 | __free_page(hwpoison_page); | 2344 | __free_page(hwpoison_page); |
2325 | __free_page(bad_page); | 2345 | __free_page(bad_page); |
2326 | } | 2346 | } |
2327 | EXPORT_SYMBOL_GPL(kvm_exit); | 2347 | EXPORT_SYMBOL_GPL(kvm_exit); |
2328 | 2348 |