Commit 87bf6e7de1134f48681fd2ce4b7c1ec45458cb6d

Authored by Takuya Yoshikawa
Committed by Avi Kivity
1 parent 77662e0028

KVM: fix the handling of dirty bitmaps to avoid overflows

Int is not long enough to store the size of a dirty bitmap.

This patch fixes this problem with the introduction of a wrapper
function to calculate the sizes of dirty bitmaps.

Note: in mark_page_dirty(), we have to consider the fact that
  __set_bit() takes the offset as int, not long.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Showing 5 changed files with 24 additions and 13 deletions Inline Diff

arch/ia64/kvm/kvm-ia64.c
1 /* 1 /*
2 * kvm_ia64.c: Basic KVM suppport On Itanium series processors 2 * kvm_ia64.c: Basic KVM suppport On Itanium series processors
3 * 3 *
4 * 4 *
5 * Copyright (C) 2007, Intel Corporation. 5 * Copyright (C) 2007, Intel Corporation.
6 * Xiantao Zhang (xiantao.zhang@intel.com) 6 * Xiantao Zhang (xiantao.zhang@intel.com)
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License, 9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation. 10 * version 2, as published by the Free Software Foundation.
11 * 11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT 12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details. 15 * more details.
16 * 16 *
17 * You should have received a copy of the GNU General Public License along with 17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 18 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 * Place - Suite 330, Boston, MA 02111-1307 USA. 19 * Place - Suite 330, Boston, MA 02111-1307 USA.
20 * 20 *
21 */ 21 */
22 22
23 #include <linux/module.h> 23 #include <linux/module.h>
24 #include <linux/errno.h> 24 #include <linux/errno.h>
25 #include <linux/percpu.h> 25 #include <linux/percpu.h>
26 #include <linux/fs.h> 26 #include <linux/fs.h>
27 #include <linux/slab.h> 27 #include <linux/slab.h>
28 #include <linux/smp.h> 28 #include <linux/smp.h>
29 #include <linux/kvm_host.h> 29 #include <linux/kvm_host.h>
30 #include <linux/kvm.h> 30 #include <linux/kvm.h>
31 #include <linux/bitops.h> 31 #include <linux/bitops.h>
32 #include <linux/hrtimer.h> 32 #include <linux/hrtimer.h>
33 #include <linux/uaccess.h> 33 #include <linux/uaccess.h>
34 #include <linux/iommu.h> 34 #include <linux/iommu.h>
35 #include <linux/intel-iommu.h> 35 #include <linux/intel-iommu.h>
36 36
37 #include <asm/pgtable.h> 37 #include <asm/pgtable.h>
38 #include <asm/gcc_intrin.h> 38 #include <asm/gcc_intrin.h>
39 #include <asm/pal.h> 39 #include <asm/pal.h>
40 #include <asm/cacheflush.h> 40 #include <asm/cacheflush.h>
41 #include <asm/div64.h> 41 #include <asm/div64.h>
42 #include <asm/tlb.h> 42 #include <asm/tlb.h>
43 #include <asm/elf.h> 43 #include <asm/elf.h>
44 #include <asm/sn/addrs.h> 44 #include <asm/sn/addrs.h>
45 #include <asm/sn/clksupport.h> 45 #include <asm/sn/clksupport.h>
46 #include <asm/sn/shub_mmr.h> 46 #include <asm/sn/shub_mmr.h>
47 47
48 #include "misc.h" 48 #include "misc.h"
49 #include "vti.h" 49 #include "vti.h"
50 #include "iodev.h" 50 #include "iodev.h"
51 #include "ioapic.h" 51 #include "ioapic.h"
52 #include "lapic.h" 52 #include "lapic.h"
53 #include "irq.h" 53 #include "irq.h"
54 54
55 static unsigned long kvm_vmm_base; 55 static unsigned long kvm_vmm_base;
56 static unsigned long kvm_vsa_base; 56 static unsigned long kvm_vsa_base;
57 static unsigned long kvm_vm_buffer; 57 static unsigned long kvm_vm_buffer;
58 static unsigned long kvm_vm_buffer_size; 58 static unsigned long kvm_vm_buffer_size;
59 unsigned long kvm_vmm_gp; 59 unsigned long kvm_vmm_gp;
60 60
61 static long vp_env_info; 61 static long vp_env_info;
62 62
63 static struct kvm_vmm_info *kvm_vmm_info; 63 static struct kvm_vmm_info *kvm_vmm_info;
64 64
65 static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); 65 static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
66 66
67 struct kvm_stats_debugfs_item debugfs_entries[] = { 67 struct kvm_stats_debugfs_item debugfs_entries[] = {
68 { NULL } 68 { NULL }
69 }; 69 };
70 70
71 static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu) 71 static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
72 { 72 {
73 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) 73 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
74 if (vcpu->kvm->arch.is_sn2) 74 if (vcpu->kvm->arch.is_sn2)
75 return rtc_time(); 75 return rtc_time();
76 else 76 else
77 #endif 77 #endif
78 return ia64_getreg(_IA64_REG_AR_ITC); 78 return ia64_getreg(_IA64_REG_AR_ITC);
79 } 79 }
80 80
81 static void kvm_flush_icache(unsigned long start, unsigned long len) 81 static void kvm_flush_icache(unsigned long start, unsigned long len)
82 { 82 {
83 int l; 83 int l;
84 84
85 for (l = 0; l < (len + 32); l += 32) 85 for (l = 0; l < (len + 32); l += 32)
86 ia64_fc((void *)(start + l)); 86 ia64_fc((void *)(start + l));
87 87
88 ia64_sync_i(); 88 ia64_sync_i();
89 ia64_srlz_i(); 89 ia64_srlz_i();
90 } 90 }
91 91
92 static void kvm_flush_tlb_all(void) 92 static void kvm_flush_tlb_all(void)
93 { 93 {
94 unsigned long i, j, count0, count1, stride0, stride1, addr; 94 unsigned long i, j, count0, count1, stride0, stride1, addr;
95 long flags; 95 long flags;
96 96
97 addr = local_cpu_data->ptce_base; 97 addr = local_cpu_data->ptce_base;
98 count0 = local_cpu_data->ptce_count[0]; 98 count0 = local_cpu_data->ptce_count[0];
99 count1 = local_cpu_data->ptce_count[1]; 99 count1 = local_cpu_data->ptce_count[1];
100 stride0 = local_cpu_data->ptce_stride[0]; 100 stride0 = local_cpu_data->ptce_stride[0];
101 stride1 = local_cpu_data->ptce_stride[1]; 101 stride1 = local_cpu_data->ptce_stride[1];
102 102
103 local_irq_save(flags); 103 local_irq_save(flags);
104 for (i = 0; i < count0; ++i) { 104 for (i = 0; i < count0; ++i) {
105 for (j = 0; j < count1; ++j) { 105 for (j = 0; j < count1; ++j) {
106 ia64_ptce(addr); 106 ia64_ptce(addr);
107 addr += stride1; 107 addr += stride1;
108 } 108 }
109 addr += stride0; 109 addr += stride0;
110 } 110 }
111 local_irq_restore(flags); 111 local_irq_restore(flags);
112 ia64_srlz_i(); /* srlz.i implies srlz.d */ 112 ia64_srlz_i(); /* srlz.i implies srlz.d */
113 } 113 }
114 114
115 long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) 115 long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
116 { 116 {
117 struct ia64_pal_retval iprv; 117 struct ia64_pal_retval iprv;
118 118
119 PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, 119 PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
120 (u64)opt_handler); 120 (u64)opt_handler);
121 121
122 return iprv.status; 122 return iprv.status;
123 } 123 }
124 124
125 static DEFINE_SPINLOCK(vp_lock); 125 static DEFINE_SPINLOCK(vp_lock);
126 126
127 int kvm_arch_hardware_enable(void *garbage) 127 int kvm_arch_hardware_enable(void *garbage)
128 { 128 {
129 long status; 129 long status;
130 long tmp_base; 130 long tmp_base;
131 unsigned long pte; 131 unsigned long pte;
132 unsigned long saved_psr; 132 unsigned long saved_psr;
133 int slot; 133 int slot;
134 134
135 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); 135 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
136 local_irq_save(saved_psr); 136 local_irq_save(saved_psr);
137 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); 137 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
138 local_irq_restore(saved_psr); 138 local_irq_restore(saved_psr);
139 if (slot < 0) 139 if (slot < 0)
140 return -EINVAL; 140 return -EINVAL;
141 141
142 spin_lock(&vp_lock); 142 spin_lock(&vp_lock);
143 status = ia64_pal_vp_init_env(kvm_vsa_base ? 143 status = ia64_pal_vp_init_env(kvm_vsa_base ?
144 VP_INIT_ENV : VP_INIT_ENV_INITALIZE, 144 VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
145 __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); 145 __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
146 if (status != 0) { 146 if (status != 0) {
147 printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); 147 printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
148 return -EINVAL; 148 return -EINVAL;
149 } 149 }
150 150
151 if (!kvm_vsa_base) { 151 if (!kvm_vsa_base) {
152 kvm_vsa_base = tmp_base; 152 kvm_vsa_base = tmp_base;
153 printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); 153 printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
154 } 154 }
155 spin_unlock(&vp_lock); 155 spin_unlock(&vp_lock);
156 ia64_ptr_entry(0x3, slot); 156 ia64_ptr_entry(0x3, slot);
157 157
158 return 0; 158 return 0;
159 } 159 }
160 160
161 void kvm_arch_hardware_disable(void *garbage) 161 void kvm_arch_hardware_disable(void *garbage)
162 { 162 {
163 163
164 long status; 164 long status;
165 int slot; 165 int slot;
166 unsigned long pte; 166 unsigned long pte;
167 unsigned long saved_psr; 167 unsigned long saved_psr;
168 unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); 168 unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
169 169
170 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), 170 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
171 PAGE_KERNEL)); 171 PAGE_KERNEL));
172 172
173 local_irq_save(saved_psr); 173 local_irq_save(saved_psr);
174 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); 174 slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
175 local_irq_restore(saved_psr); 175 local_irq_restore(saved_psr);
176 if (slot < 0) 176 if (slot < 0)
177 return; 177 return;
178 178
179 status = ia64_pal_vp_exit_env(host_iva); 179 status = ia64_pal_vp_exit_env(host_iva);
180 if (status) 180 if (status)
181 printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", 181 printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
182 status); 182 status);
183 ia64_ptr_entry(0x3, slot); 183 ia64_ptr_entry(0x3, slot);
184 } 184 }
185 185
186 void kvm_arch_check_processor_compat(void *rtn) 186 void kvm_arch_check_processor_compat(void *rtn)
187 { 187 {
188 *(int *)rtn = 0; 188 *(int *)rtn = 0;
189 } 189 }
190 190
191 int kvm_dev_ioctl_check_extension(long ext) 191 int kvm_dev_ioctl_check_extension(long ext)
192 { 192 {
193 193
194 int r; 194 int r;
195 195
196 switch (ext) { 196 switch (ext) {
197 case KVM_CAP_IRQCHIP: 197 case KVM_CAP_IRQCHIP:
198 case KVM_CAP_MP_STATE: 198 case KVM_CAP_MP_STATE:
199 case KVM_CAP_IRQ_INJECT_STATUS: 199 case KVM_CAP_IRQ_INJECT_STATUS:
200 r = 1; 200 r = 1;
201 break; 201 break;
202 case KVM_CAP_COALESCED_MMIO: 202 case KVM_CAP_COALESCED_MMIO:
203 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 203 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
204 break; 204 break;
205 case KVM_CAP_IOMMU: 205 case KVM_CAP_IOMMU:
206 r = iommu_found(); 206 r = iommu_found();
207 break; 207 break;
208 default: 208 default:
209 r = 0; 209 r = 0;
210 } 210 }
211 return r; 211 return r;
212 212
213 } 213 }
214 214
215 static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 215 static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
216 { 216 {
217 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 217 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
218 kvm_run->hw.hardware_exit_reason = 1; 218 kvm_run->hw.hardware_exit_reason = 1;
219 return 0; 219 return 0;
220 } 220 }
221 221
222 static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 222 static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
223 { 223 {
224 struct kvm_mmio_req *p; 224 struct kvm_mmio_req *p;
225 struct kvm_io_device *mmio_dev; 225 struct kvm_io_device *mmio_dev;
226 int r; 226 int r;
227 227
228 p = kvm_get_vcpu_ioreq(vcpu); 228 p = kvm_get_vcpu_ioreq(vcpu);
229 229
230 if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) 230 if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
231 goto mmio; 231 goto mmio;
232 vcpu->mmio_needed = 1; 232 vcpu->mmio_needed = 1;
233 vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; 233 vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
234 vcpu->mmio_size = kvm_run->mmio.len = p->size; 234 vcpu->mmio_size = kvm_run->mmio.len = p->size;
235 vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; 235 vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
236 236
237 if (vcpu->mmio_is_write) 237 if (vcpu->mmio_is_write)
238 memcpy(vcpu->mmio_data, &p->data, p->size); 238 memcpy(vcpu->mmio_data, &p->data, p->size);
239 memcpy(kvm_run->mmio.data, &p->data, p->size); 239 memcpy(kvm_run->mmio.data, &p->data, p->size);
240 kvm_run->exit_reason = KVM_EXIT_MMIO; 240 kvm_run->exit_reason = KVM_EXIT_MMIO;
241 return 0; 241 return 0;
242 mmio: 242 mmio:
243 if (p->dir) 243 if (p->dir)
244 r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr, 244 r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
245 p->size, &p->data); 245 p->size, &p->data);
246 else 246 else
247 r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr, 247 r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
248 p->size, &p->data); 248 p->size, &p->data);
249 if (r) 249 if (r)
250 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); 250 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
251 p->state = STATE_IORESP_READY; 251 p->state = STATE_IORESP_READY;
252 252
253 return 1; 253 return 1;
254 } 254 }
255 255
256 static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 256 static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
257 { 257 {
258 struct exit_ctl_data *p; 258 struct exit_ctl_data *p;
259 259
260 p = kvm_get_exit_data(vcpu); 260 p = kvm_get_exit_data(vcpu);
261 261
262 if (p->exit_reason == EXIT_REASON_PAL_CALL) 262 if (p->exit_reason == EXIT_REASON_PAL_CALL)
263 return kvm_pal_emul(vcpu, kvm_run); 263 return kvm_pal_emul(vcpu, kvm_run);
264 else { 264 else {
265 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 265 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
266 kvm_run->hw.hardware_exit_reason = 2; 266 kvm_run->hw.hardware_exit_reason = 2;
267 return 0; 267 return 0;
268 } 268 }
269 } 269 }
270 270
271 static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 271 static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
272 { 272 {
273 struct exit_ctl_data *p; 273 struct exit_ctl_data *p;
274 274
275 p = kvm_get_exit_data(vcpu); 275 p = kvm_get_exit_data(vcpu);
276 276
277 if (p->exit_reason == EXIT_REASON_SAL_CALL) { 277 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
278 kvm_sal_emul(vcpu); 278 kvm_sal_emul(vcpu);
279 return 1; 279 return 1;
280 } else { 280 } else {
281 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 281 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
282 kvm_run->hw.hardware_exit_reason = 3; 282 kvm_run->hw.hardware_exit_reason = 3;
283 return 0; 283 return 0;
284 } 284 }
285 285
286 } 286 }
287 287
288 static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector) 288 static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
289 { 289 {
290 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 290 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
291 291
292 if (!test_and_set_bit(vector, &vpd->irr[0])) { 292 if (!test_and_set_bit(vector, &vpd->irr[0])) {
293 vcpu->arch.irq_new_pending = 1; 293 vcpu->arch.irq_new_pending = 1;
294 kvm_vcpu_kick(vcpu); 294 kvm_vcpu_kick(vcpu);
295 return 1; 295 return 1;
296 } 296 }
297 return 0; 297 return 0;
298 } 298 }
299 299
300 /* 300 /*
301 * offset: address offset to IPI space. 301 * offset: address offset to IPI space.
302 * value: deliver value. 302 * value: deliver value.
303 */ 303 */
304 static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, 304 static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
305 uint64_t vector) 305 uint64_t vector)
306 { 306 {
307 switch (dm) { 307 switch (dm) {
308 case SAPIC_FIXED: 308 case SAPIC_FIXED:
309 break; 309 break;
310 case SAPIC_NMI: 310 case SAPIC_NMI:
311 vector = 2; 311 vector = 2;
312 break; 312 break;
313 case SAPIC_EXTINT: 313 case SAPIC_EXTINT:
314 vector = 0; 314 vector = 0;
315 break; 315 break;
316 case SAPIC_INIT: 316 case SAPIC_INIT:
317 case SAPIC_PMI: 317 case SAPIC_PMI:
318 default: 318 default:
319 printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); 319 printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
320 return; 320 return;
321 } 321 }
322 __apic_accept_irq(vcpu, vector); 322 __apic_accept_irq(vcpu, vector);
323 } 323 }
324 324
325 static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, 325 static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
326 unsigned long eid) 326 unsigned long eid)
327 { 327 {
328 union ia64_lid lid; 328 union ia64_lid lid;
329 int i; 329 int i;
330 struct kvm_vcpu *vcpu; 330 struct kvm_vcpu *vcpu;
331 331
332 kvm_for_each_vcpu(i, vcpu, kvm) { 332 kvm_for_each_vcpu(i, vcpu, kvm) {
333 lid.val = VCPU_LID(vcpu); 333 lid.val = VCPU_LID(vcpu);
334 if (lid.id == id && lid.eid == eid) 334 if (lid.id == id && lid.eid == eid)
335 return vcpu; 335 return vcpu;
336 } 336 }
337 337
338 return NULL; 338 return NULL;
339 } 339 }
340 340
341 static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 341 static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
342 { 342 {
343 struct exit_ctl_data *p = kvm_get_exit_data(vcpu); 343 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
344 struct kvm_vcpu *target_vcpu; 344 struct kvm_vcpu *target_vcpu;
345 struct kvm_pt_regs *regs; 345 struct kvm_pt_regs *regs;
346 union ia64_ipi_a addr = p->u.ipi_data.addr; 346 union ia64_ipi_a addr = p->u.ipi_data.addr;
347 union ia64_ipi_d data = p->u.ipi_data.data; 347 union ia64_ipi_d data = p->u.ipi_data.data;
348 348
349 target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); 349 target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
350 if (!target_vcpu) 350 if (!target_vcpu)
351 return handle_vm_error(vcpu, kvm_run); 351 return handle_vm_error(vcpu, kvm_run);
352 352
353 if (!target_vcpu->arch.launched) { 353 if (!target_vcpu->arch.launched) {
354 regs = vcpu_regs(target_vcpu); 354 regs = vcpu_regs(target_vcpu);
355 355
356 regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; 356 regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
357 regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; 357 regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
358 358
359 target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 359 target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
360 if (waitqueue_active(&target_vcpu->wq)) 360 if (waitqueue_active(&target_vcpu->wq))
361 wake_up_interruptible(&target_vcpu->wq); 361 wake_up_interruptible(&target_vcpu->wq);
362 } else { 362 } else {
363 vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); 363 vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
364 if (target_vcpu != vcpu) 364 if (target_vcpu != vcpu)
365 kvm_vcpu_kick(target_vcpu); 365 kvm_vcpu_kick(target_vcpu);
366 } 366 }
367 367
368 return 1; 368 return 1;
369 } 369 }
370 370
371 struct call_data { 371 struct call_data {
372 struct kvm_ptc_g ptc_g_data; 372 struct kvm_ptc_g ptc_g_data;
373 struct kvm_vcpu *vcpu; 373 struct kvm_vcpu *vcpu;
374 }; 374 };
375 375
376 static void vcpu_global_purge(void *info) 376 static void vcpu_global_purge(void *info)
377 { 377 {
378 struct call_data *p = (struct call_data *)info; 378 struct call_data *p = (struct call_data *)info;
379 struct kvm_vcpu *vcpu = p->vcpu; 379 struct kvm_vcpu *vcpu = p->vcpu;
380 380
381 if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 381 if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
382 return; 382 return;
383 383
384 set_bit(KVM_REQ_PTC_G, &vcpu->requests); 384 set_bit(KVM_REQ_PTC_G, &vcpu->requests);
385 if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { 385 if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
386 vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = 386 vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
387 p->ptc_g_data; 387 p->ptc_g_data;
388 } else { 388 } else {
389 clear_bit(KVM_REQ_PTC_G, &vcpu->requests); 389 clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
390 vcpu->arch.ptc_g_count = 0; 390 vcpu->arch.ptc_g_count = 0;
391 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); 391 set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
392 } 392 }
393 } 393 }
394 394
395 static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 395 static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
396 { 396 {
397 struct exit_ctl_data *p = kvm_get_exit_data(vcpu); 397 struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
398 struct kvm *kvm = vcpu->kvm; 398 struct kvm *kvm = vcpu->kvm;
399 struct call_data call_data; 399 struct call_data call_data;
400 int i; 400 int i;
401 struct kvm_vcpu *vcpui; 401 struct kvm_vcpu *vcpui;
402 402
403 call_data.ptc_g_data = p->u.ptc_g_data; 403 call_data.ptc_g_data = p->u.ptc_g_data;
404 404
405 kvm_for_each_vcpu(i, vcpui, kvm) { 405 kvm_for_each_vcpu(i, vcpui, kvm) {
406 if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED || 406 if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
407 vcpu == vcpui) 407 vcpu == vcpui)
408 continue; 408 continue;
409 409
410 if (waitqueue_active(&vcpui->wq)) 410 if (waitqueue_active(&vcpui->wq))
411 wake_up_interruptible(&vcpui->wq); 411 wake_up_interruptible(&vcpui->wq);
412 412
413 if (vcpui->cpu != -1) { 413 if (vcpui->cpu != -1) {
414 call_data.vcpu = vcpui; 414 call_data.vcpu = vcpui;
415 smp_call_function_single(vcpui->cpu, 415 smp_call_function_single(vcpui->cpu,
416 vcpu_global_purge, &call_data, 1); 416 vcpu_global_purge, &call_data, 1);
417 } else 417 } else
418 printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); 418 printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
419 419
420 } 420 }
421 return 1; 421 return 1;
422 } 422 }
423 423
424 static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 424 static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
425 { 425 {
426 return 1; 426 return 1;
427 } 427 }
428 428
429 static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu) 429 static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
430 { 430 {
431 unsigned long pte, rtc_phys_addr, map_addr; 431 unsigned long pte, rtc_phys_addr, map_addr;
432 int slot; 432 int slot;
433 433
434 map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT); 434 map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
435 rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC; 435 rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
436 pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC)); 436 pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
437 slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT); 437 slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
438 vcpu->arch.sn_rtc_tr_slot = slot; 438 vcpu->arch.sn_rtc_tr_slot = slot;
439 if (slot < 0) { 439 if (slot < 0) {
440 printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n"); 440 printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
441 slot = 0; 441 slot = 0;
442 } 442 }
443 return slot; 443 return slot;
444 } 444 }
445 445
446 int kvm_emulate_halt(struct kvm_vcpu *vcpu) 446 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
447 { 447 {
448 448
449 ktime_t kt; 449 ktime_t kt;
450 long itc_diff; 450 long itc_diff;
451 unsigned long vcpu_now_itc; 451 unsigned long vcpu_now_itc;
452 unsigned long expires; 452 unsigned long expires;
453 struct hrtimer *p_ht = &vcpu->arch.hlt_timer; 453 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
454 unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; 454 unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
455 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 455 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
456 456
457 if (irqchip_in_kernel(vcpu->kvm)) { 457 if (irqchip_in_kernel(vcpu->kvm)) {
458 458
459 vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset; 459 vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
460 460
461 if (time_after(vcpu_now_itc, vpd->itm)) { 461 if (time_after(vcpu_now_itc, vpd->itm)) {
462 vcpu->arch.timer_check = 1; 462 vcpu->arch.timer_check = 1;
463 return 1; 463 return 1;
464 } 464 }
465 itc_diff = vpd->itm - vcpu_now_itc; 465 itc_diff = vpd->itm - vcpu_now_itc;
466 if (itc_diff < 0) 466 if (itc_diff < 0)
467 itc_diff = -itc_diff; 467 itc_diff = -itc_diff;
468 468
469 expires = div64_u64(itc_diff, cyc_per_usec); 469 expires = div64_u64(itc_diff, cyc_per_usec);
470 kt = ktime_set(0, 1000 * expires); 470 kt = ktime_set(0, 1000 * expires);
471 471
472 vcpu->arch.ht_active = 1; 472 vcpu->arch.ht_active = 1;
473 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); 473 hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
474 474
475 vcpu->arch.mp_state = KVM_MP_STATE_HALTED; 475 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
476 kvm_vcpu_block(vcpu); 476 kvm_vcpu_block(vcpu);
477 hrtimer_cancel(p_ht); 477 hrtimer_cancel(p_ht);
478 vcpu->arch.ht_active = 0; 478 vcpu->arch.ht_active = 0;
479 479
480 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) || 480 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
481 kvm_cpu_has_pending_timer(vcpu)) 481 kvm_cpu_has_pending_timer(vcpu))
482 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) 482 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
483 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 483 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
484 484
485 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) 485 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
486 return -EINTR; 486 return -EINTR;
487 return 1; 487 return 1;
488 } else { 488 } else {
489 printk(KERN_ERR"kvm: Unsupported userspace halt!"); 489 printk(KERN_ERR"kvm: Unsupported userspace halt!");
490 return 0; 490 return 0;
491 } 491 }
492 } 492 }
493 493
494 static int handle_vm_shutdown(struct kvm_vcpu *vcpu, 494 static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
495 struct kvm_run *kvm_run) 495 struct kvm_run *kvm_run)
496 { 496 {
497 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 497 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
498 return 0; 498 return 0;
499 } 499 }
500 500
501 static int handle_external_interrupt(struct kvm_vcpu *vcpu, 501 static int handle_external_interrupt(struct kvm_vcpu *vcpu,
502 struct kvm_run *kvm_run) 502 struct kvm_run *kvm_run)
503 { 503 {
504 return 1; 504 return 1;
505 } 505 }
506 506
507 static int handle_vcpu_debug(struct kvm_vcpu *vcpu, 507 static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
508 struct kvm_run *kvm_run) 508 struct kvm_run *kvm_run)
509 { 509 {
510 printk("VMM: %s", vcpu->arch.log_buf); 510 printk("VMM: %s", vcpu->arch.log_buf);
511 return 1; 511 return 1;
512 } 512 }
513 513
514 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, 514 static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
515 struct kvm_run *kvm_run) = { 515 struct kvm_run *kvm_run) = {
516 [EXIT_REASON_VM_PANIC] = handle_vm_error, 516 [EXIT_REASON_VM_PANIC] = handle_vm_error,
517 [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, 517 [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio,
518 [EXIT_REASON_PAL_CALL] = handle_pal_call, 518 [EXIT_REASON_PAL_CALL] = handle_pal_call,
519 [EXIT_REASON_SAL_CALL] = handle_sal_call, 519 [EXIT_REASON_SAL_CALL] = handle_sal_call,
520 [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, 520 [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6,
521 [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, 521 [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown,
522 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 522 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
523 [EXIT_REASON_IPI] = handle_ipi, 523 [EXIT_REASON_IPI] = handle_ipi,
524 [EXIT_REASON_PTC_G] = handle_global_purge, 524 [EXIT_REASON_PTC_G] = handle_global_purge,
525 [EXIT_REASON_DEBUG] = handle_vcpu_debug, 525 [EXIT_REASON_DEBUG] = handle_vcpu_debug,
526 526
527 }; 527 };
528 528
529 static const int kvm_vti_max_exit_handlers = 529 static const int kvm_vti_max_exit_handlers =
530 sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); 530 sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
531 531
532 static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) 532 static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
533 { 533 {
534 struct exit_ctl_data *p_exit_data; 534 struct exit_ctl_data *p_exit_data;
535 535
536 p_exit_data = kvm_get_exit_data(vcpu); 536 p_exit_data = kvm_get_exit_data(vcpu);
537 return p_exit_data->exit_reason; 537 return p_exit_data->exit_reason;
538 } 538 }
539 539
540 /* 540 /*
541 * The guest has exited. See if we can fix it or if we need userspace 541 * The guest has exited. See if we can fix it or if we need userspace
542 * assistance. 542 * assistance.
543 */ 543 */
544 static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 544 static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
545 { 545 {
546 u32 exit_reason = kvm_get_exit_reason(vcpu); 546 u32 exit_reason = kvm_get_exit_reason(vcpu);
547 vcpu->arch.last_exit = exit_reason; 547 vcpu->arch.last_exit = exit_reason;
548 548
549 if (exit_reason < kvm_vti_max_exit_handlers 549 if (exit_reason < kvm_vti_max_exit_handlers
550 && kvm_vti_exit_handlers[exit_reason]) 550 && kvm_vti_exit_handlers[exit_reason])
551 return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); 551 return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
552 else { 552 else {
553 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 553 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
554 kvm_run->hw.hardware_exit_reason = exit_reason; 554 kvm_run->hw.hardware_exit_reason = exit_reason;
555 } 555 }
556 return 0; 556 return 0;
557 } 557 }
558 558
559 static inline void vti_set_rr6(unsigned long rr6) 559 static inline void vti_set_rr6(unsigned long rr6)
560 { 560 {
561 ia64_set_rr(RR6, rr6); 561 ia64_set_rr(RR6, rr6);
562 ia64_srlz_i(); 562 ia64_srlz_i();
563 } 563 }
564 564
565 static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) 565 static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
566 { 566 {
567 unsigned long pte; 567 unsigned long pte;
568 struct kvm *kvm = vcpu->kvm; 568 struct kvm *kvm = vcpu->kvm;
569 int r; 569 int r;
570 570
571 /*Insert a pair of tr to map vmm*/ 571 /*Insert a pair of tr to map vmm*/
572 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); 572 pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
573 r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); 573 r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
574 if (r < 0) 574 if (r < 0)
575 goto out; 575 goto out;
576 vcpu->arch.vmm_tr_slot = r; 576 vcpu->arch.vmm_tr_slot = r;
577 /*Insert a pairt of tr to map data of vm*/ 577 /*Insert a pairt of tr to map data of vm*/
578 pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); 578 pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
579 r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, 579 r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
580 pte, KVM_VM_DATA_SHIFT); 580 pte, KVM_VM_DATA_SHIFT);
581 if (r < 0) 581 if (r < 0)
582 goto out; 582 goto out;
583 vcpu->arch.vm_tr_slot = r; 583 vcpu->arch.vm_tr_slot = r;
584 584
585 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) 585 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
586 if (kvm->arch.is_sn2) { 586 if (kvm->arch.is_sn2) {
587 r = kvm_sn2_setup_mappings(vcpu); 587 r = kvm_sn2_setup_mappings(vcpu);
588 if (r < 0) 588 if (r < 0)
589 goto out; 589 goto out;
590 } 590 }
591 #endif 591 #endif
592 592
593 r = 0; 593 r = 0;
594 out: 594 out:
595 return r; 595 return r;
596 } 596 }
597 597
598 static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) 598 static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
599 { 599 {
600 struct kvm *kvm = vcpu->kvm; 600 struct kvm *kvm = vcpu->kvm;
601 ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); 601 ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
602 ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); 602 ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
603 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) 603 #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
604 if (kvm->arch.is_sn2) 604 if (kvm->arch.is_sn2)
605 ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot); 605 ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
606 #endif 606 #endif
607 } 607 }
608 608
609 static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) 609 static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
610 { 610 {
611 unsigned long psr; 611 unsigned long psr;
612 int r; 612 int r;
613 int cpu = smp_processor_id(); 613 int cpu = smp_processor_id();
614 614
615 if (vcpu->arch.last_run_cpu != cpu || 615 if (vcpu->arch.last_run_cpu != cpu ||
616 per_cpu(last_vcpu, cpu) != vcpu) { 616 per_cpu(last_vcpu, cpu) != vcpu) {
617 per_cpu(last_vcpu, cpu) = vcpu; 617 per_cpu(last_vcpu, cpu) = vcpu;
618 vcpu->arch.last_run_cpu = cpu; 618 vcpu->arch.last_run_cpu = cpu;
619 kvm_flush_tlb_all(); 619 kvm_flush_tlb_all();
620 } 620 }
621 621
622 vcpu->arch.host_rr6 = ia64_get_rr(RR6); 622 vcpu->arch.host_rr6 = ia64_get_rr(RR6);
623 vti_set_rr6(vcpu->arch.vmm_rr); 623 vti_set_rr6(vcpu->arch.vmm_rr);
624 local_irq_save(psr); 624 local_irq_save(psr);
625 r = kvm_insert_vmm_mapping(vcpu); 625 r = kvm_insert_vmm_mapping(vcpu);
626 local_irq_restore(psr); 626 local_irq_restore(psr);
627 return r; 627 return r;
628 } 628 }
629 629
630 static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) 630 static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
631 { 631 {
632 kvm_purge_vmm_mapping(vcpu); 632 kvm_purge_vmm_mapping(vcpu);
633 vti_set_rr6(vcpu->arch.host_rr6); 633 vti_set_rr6(vcpu->arch.host_rr6);
634 } 634 }
635 635
636 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 636 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
637 { 637 {
638 union context *host_ctx, *guest_ctx; 638 union context *host_ctx, *guest_ctx;
639 int r, idx; 639 int r, idx;
640 640
641 idx = srcu_read_lock(&vcpu->kvm->srcu); 641 idx = srcu_read_lock(&vcpu->kvm->srcu);
642 642
643 again: 643 again:
644 if (signal_pending(current)) { 644 if (signal_pending(current)) {
645 r = -EINTR; 645 r = -EINTR;
646 kvm_run->exit_reason = KVM_EXIT_INTR; 646 kvm_run->exit_reason = KVM_EXIT_INTR;
647 goto out; 647 goto out;
648 } 648 }
649 649
650 preempt_disable(); 650 preempt_disable();
651 local_irq_disable(); 651 local_irq_disable();
652 652
653 /*Get host and guest context with guest address space.*/ 653 /*Get host and guest context with guest address space.*/
654 host_ctx = kvm_get_host_context(vcpu); 654 host_ctx = kvm_get_host_context(vcpu);
655 guest_ctx = kvm_get_guest_context(vcpu); 655 guest_ctx = kvm_get_guest_context(vcpu);
656 656
657 clear_bit(KVM_REQ_KICK, &vcpu->requests); 657 clear_bit(KVM_REQ_KICK, &vcpu->requests);
658 658
659 r = kvm_vcpu_pre_transition(vcpu); 659 r = kvm_vcpu_pre_transition(vcpu);
660 if (r < 0) 660 if (r < 0)
661 goto vcpu_run_fail; 661 goto vcpu_run_fail;
662 662
663 srcu_read_unlock(&vcpu->kvm->srcu, idx); 663 srcu_read_unlock(&vcpu->kvm->srcu, idx);
664 kvm_guest_enter(); 664 kvm_guest_enter();
665 665
666 /* 666 /*
667 * Transition to the guest 667 * Transition to the guest
668 */ 668 */
669 kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); 669 kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
670 670
671 kvm_vcpu_post_transition(vcpu); 671 kvm_vcpu_post_transition(vcpu);
672 672
673 vcpu->arch.launched = 1; 673 vcpu->arch.launched = 1;
674 set_bit(KVM_REQ_KICK, &vcpu->requests); 674 set_bit(KVM_REQ_KICK, &vcpu->requests);
675 local_irq_enable(); 675 local_irq_enable();
676 676
677 /* 677 /*
678 * We must have an instruction between local_irq_enable() and 678 * We must have an instruction between local_irq_enable() and
679 * kvm_guest_exit(), so the timer interrupt isn't delayed by 679 * kvm_guest_exit(), so the timer interrupt isn't delayed by
680 * the interrupt shadow. The stat.exits increment will do nicely. 680 * the interrupt shadow. The stat.exits increment will do nicely.
681 * But we need to prevent reordering, hence this barrier(): 681 * But we need to prevent reordering, hence this barrier():
682 */ 682 */
683 barrier(); 683 barrier();
684 kvm_guest_exit(); 684 kvm_guest_exit();
685 preempt_enable(); 685 preempt_enable();
686 686
687 idx = srcu_read_lock(&vcpu->kvm->srcu); 687 idx = srcu_read_lock(&vcpu->kvm->srcu);
688 688
689 r = kvm_handle_exit(kvm_run, vcpu); 689 r = kvm_handle_exit(kvm_run, vcpu);
690 690
691 if (r > 0) { 691 if (r > 0) {
692 if (!need_resched()) 692 if (!need_resched())
693 goto again; 693 goto again;
694 } 694 }
695 695
696 out: 696 out:
697 srcu_read_unlock(&vcpu->kvm->srcu, idx); 697 srcu_read_unlock(&vcpu->kvm->srcu, idx);
698 if (r > 0) { 698 if (r > 0) {
699 kvm_resched(vcpu); 699 kvm_resched(vcpu);
700 idx = srcu_read_lock(&vcpu->kvm->srcu); 700 idx = srcu_read_lock(&vcpu->kvm->srcu);
701 goto again; 701 goto again;
702 } 702 }
703 703
704 return r; 704 return r;
705 705
706 vcpu_run_fail: 706 vcpu_run_fail:
707 local_irq_enable(); 707 local_irq_enable();
708 preempt_enable(); 708 preempt_enable();
709 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 709 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
710 goto out; 710 goto out;
711 } 711 }
712 712
713 static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) 713 static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
714 { 714 {
715 struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); 715 struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
716 716
717 if (!vcpu->mmio_is_write) 717 if (!vcpu->mmio_is_write)
718 memcpy(&p->data, vcpu->mmio_data, 8); 718 memcpy(&p->data, vcpu->mmio_data, 8);
719 p->state = STATE_IORESP_READY; 719 p->state = STATE_IORESP_READY;
720 } 720 }
721 721
722 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 722 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
723 { 723 {
724 int r; 724 int r;
725 sigset_t sigsaved; 725 sigset_t sigsaved;
726 726
727 vcpu_load(vcpu); 727 vcpu_load(vcpu);
728 728
729 if (vcpu->sigset_active) 729 if (vcpu->sigset_active)
730 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 730 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
731 731
732 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 732 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
733 kvm_vcpu_block(vcpu); 733 kvm_vcpu_block(vcpu);
734 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 734 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
735 r = -EAGAIN; 735 r = -EAGAIN;
736 goto out; 736 goto out;
737 } 737 }
738 738
739 if (vcpu->mmio_needed) { 739 if (vcpu->mmio_needed) {
740 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 740 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
741 kvm_set_mmio_data(vcpu); 741 kvm_set_mmio_data(vcpu);
742 vcpu->mmio_read_completed = 1; 742 vcpu->mmio_read_completed = 1;
743 vcpu->mmio_needed = 0; 743 vcpu->mmio_needed = 0;
744 } 744 }
745 r = __vcpu_run(vcpu, kvm_run); 745 r = __vcpu_run(vcpu, kvm_run);
746 out: 746 out:
747 if (vcpu->sigset_active) 747 if (vcpu->sigset_active)
748 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 748 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
749 749
750 vcpu_put(vcpu); 750 vcpu_put(vcpu);
751 return r; 751 return r;
752 } 752 }
753 753
754 static struct kvm *kvm_alloc_kvm(void) 754 static struct kvm *kvm_alloc_kvm(void)
755 { 755 {
756 756
757 struct kvm *kvm; 757 struct kvm *kvm;
758 uint64_t vm_base; 758 uint64_t vm_base;
759 759
760 BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE); 760 BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
761 761
762 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); 762 vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
763 763
764 if (!vm_base) 764 if (!vm_base)
765 return ERR_PTR(-ENOMEM); 765 return ERR_PTR(-ENOMEM);
766 766
767 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); 767 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
768 kvm = (struct kvm *)(vm_base + 768 kvm = (struct kvm *)(vm_base +
769 offsetof(struct kvm_vm_data, kvm_vm_struct)); 769 offsetof(struct kvm_vm_data, kvm_vm_struct));
770 kvm->arch.vm_base = vm_base; 770 kvm->arch.vm_base = vm_base;
771 printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base); 771 printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
772 772
773 return kvm; 773 return kvm;
774 } 774 }
775 775
776 struct kvm_io_range { 776 struct kvm_io_range {
777 unsigned long start; 777 unsigned long start;
778 unsigned long size; 778 unsigned long size;
779 unsigned long type; 779 unsigned long type;
780 }; 780 };
781 781
782 static const struct kvm_io_range io_ranges[] = { 782 static const struct kvm_io_range io_ranges[] = {
783 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, 783 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
784 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, 784 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
785 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, 785 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
786 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, 786 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
787 {PIB_START, PIB_SIZE, GPFN_PIB}, 787 {PIB_START, PIB_SIZE, GPFN_PIB},
788 }; 788 };
789 789
790 static void kvm_build_io_pmt(struct kvm *kvm) 790 static void kvm_build_io_pmt(struct kvm *kvm)
791 { 791 {
792 unsigned long i, j; 792 unsigned long i, j;
793 793
794 /* Mark I/O ranges */ 794 /* Mark I/O ranges */
795 for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); 795 for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
796 i++) { 796 i++) {
797 for (j = io_ranges[i].start; 797 for (j = io_ranges[i].start;
798 j < io_ranges[i].start + io_ranges[i].size; 798 j < io_ranges[i].start + io_ranges[i].size;
799 j += PAGE_SIZE) 799 j += PAGE_SIZE)
800 kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, 800 kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
801 io_ranges[i].type, 0); 801 io_ranges[i].type, 0);
802 } 802 }
803 803
804 } 804 }
805 805
806 /*Use unused rids to virtualize guest rid.*/ 806 /*Use unused rids to virtualize guest rid.*/
807 #define GUEST_PHYSICAL_RR0 0x1739 807 #define GUEST_PHYSICAL_RR0 0x1739
808 #define GUEST_PHYSICAL_RR4 0x2739 808 #define GUEST_PHYSICAL_RR4 0x2739
809 #define VMM_INIT_RR 0x1660 809 #define VMM_INIT_RR 0x1660
810 810
811 static void kvm_init_vm(struct kvm *kvm) 811 static void kvm_init_vm(struct kvm *kvm)
812 { 812 {
813 BUG_ON(!kvm); 813 BUG_ON(!kvm);
814 814
815 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; 815 kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
816 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; 816 kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
817 kvm->arch.vmm_init_rr = VMM_INIT_RR; 817 kvm->arch.vmm_init_rr = VMM_INIT_RR;
818 818
819 /* 819 /*
820 *Fill P2M entries for MMIO/IO ranges 820 *Fill P2M entries for MMIO/IO ranges
821 */ 821 */
822 kvm_build_io_pmt(kvm); 822 kvm_build_io_pmt(kvm);
823 823
824 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 824 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
825 825
826 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 826 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
827 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 827 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
828 } 828 }
829 829
830 struct kvm *kvm_arch_create_vm(void) 830 struct kvm *kvm_arch_create_vm(void)
831 { 831 {
832 struct kvm *kvm = kvm_alloc_kvm(); 832 struct kvm *kvm = kvm_alloc_kvm();
833 833
834 if (IS_ERR(kvm)) 834 if (IS_ERR(kvm))
835 return ERR_PTR(-ENOMEM); 835 return ERR_PTR(-ENOMEM);
836 836
837 kvm->arch.is_sn2 = ia64_platform_is("sn2"); 837 kvm->arch.is_sn2 = ia64_platform_is("sn2");
838 838
839 kvm_init_vm(kvm); 839 kvm_init_vm(kvm);
840 840
841 return kvm; 841 return kvm;
842 842
843 } 843 }
844 844
845 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, 845 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
846 struct kvm_irqchip *chip) 846 struct kvm_irqchip *chip)
847 { 847 {
848 int r; 848 int r;
849 849
850 r = 0; 850 r = 0;
851 switch (chip->chip_id) { 851 switch (chip->chip_id) {
852 case KVM_IRQCHIP_IOAPIC: 852 case KVM_IRQCHIP_IOAPIC:
853 r = kvm_get_ioapic(kvm, &chip->chip.ioapic); 853 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
854 break; 854 break;
855 default: 855 default:
856 r = -EINVAL; 856 r = -EINVAL;
857 break; 857 break;
858 } 858 }
859 return r; 859 return r;
860 } 860 }
861 861
862 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) 862 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
863 { 863 {
864 int r; 864 int r;
865 865
866 r = 0; 866 r = 0;
867 switch (chip->chip_id) { 867 switch (chip->chip_id) {
868 case KVM_IRQCHIP_IOAPIC: 868 case KVM_IRQCHIP_IOAPIC:
869 r = kvm_set_ioapic(kvm, &chip->chip.ioapic); 869 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
870 break; 870 break;
871 default: 871 default:
872 r = -EINVAL; 872 r = -EINVAL;
873 break; 873 break;
874 } 874 }
875 return r; 875 return r;
876 } 876 }
877 877
878 #define RESTORE_REGS(_x) vcpu->arch._x = regs->_x 878 #define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
879 879
880 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 880 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
881 { 881 {
882 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 882 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
883 int i; 883 int i;
884 884
885 vcpu_load(vcpu); 885 vcpu_load(vcpu);
886 886
887 for (i = 0; i < 16; i++) { 887 for (i = 0; i < 16; i++) {
888 vpd->vgr[i] = regs->vpd.vgr[i]; 888 vpd->vgr[i] = regs->vpd.vgr[i];
889 vpd->vbgr[i] = regs->vpd.vbgr[i]; 889 vpd->vbgr[i] = regs->vpd.vbgr[i];
890 } 890 }
891 for (i = 0; i < 128; i++) 891 for (i = 0; i < 128; i++)
892 vpd->vcr[i] = regs->vpd.vcr[i]; 892 vpd->vcr[i] = regs->vpd.vcr[i];
893 vpd->vhpi = regs->vpd.vhpi; 893 vpd->vhpi = regs->vpd.vhpi;
894 vpd->vnat = regs->vpd.vnat; 894 vpd->vnat = regs->vpd.vnat;
895 vpd->vbnat = regs->vpd.vbnat; 895 vpd->vbnat = regs->vpd.vbnat;
896 vpd->vpsr = regs->vpd.vpsr; 896 vpd->vpsr = regs->vpd.vpsr;
897 897
898 vpd->vpr = regs->vpd.vpr; 898 vpd->vpr = regs->vpd.vpr;
899 899
900 memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context)); 900 memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
901 901
902 RESTORE_REGS(mp_state); 902 RESTORE_REGS(mp_state);
903 RESTORE_REGS(vmm_rr); 903 RESTORE_REGS(vmm_rr);
904 memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); 904 memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
905 memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); 905 memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
906 RESTORE_REGS(itr_regions); 906 RESTORE_REGS(itr_regions);
907 RESTORE_REGS(dtr_regions); 907 RESTORE_REGS(dtr_regions);
908 RESTORE_REGS(tc_regions); 908 RESTORE_REGS(tc_regions);
909 RESTORE_REGS(irq_check); 909 RESTORE_REGS(irq_check);
910 RESTORE_REGS(itc_check); 910 RESTORE_REGS(itc_check);
911 RESTORE_REGS(timer_check); 911 RESTORE_REGS(timer_check);
912 RESTORE_REGS(timer_pending); 912 RESTORE_REGS(timer_pending);
913 RESTORE_REGS(last_itc); 913 RESTORE_REGS(last_itc);
914 for (i = 0; i < 8; i++) { 914 for (i = 0; i < 8; i++) {
915 vcpu->arch.vrr[i] = regs->vrr[i]; 915 vcpu->arch.vrr[i] = regs->vrr[i];
916 vcpu->arch.ibr[i] = regs->ibr[i]; 916 vcpu->arch.ibr[i] = regs->ibr[i];
917 vcpu->arch.dbr[i] = regs->dbr[i]; 917 vcpu->arch.dbr[i] = regs->dbr[i];
918 } 918 }
919 for (i = 0; i < 4; i++) 919 for (i = 0; i < 4; i++)
920 vcpu->arch.insvc[i] = regs->insvc[i]; 920 vcpu->arch.insvc[i] = regs->insvc[i];
921 RESTORE_REGS(xtp); 921 RESTORE_REGS(xtp);
922 RESTORE_REGS(metaphysical_rr0); 922 RESTORE_REGS(metaphysical_rr0);
923 RESTORE_REGS(metaphysical_rr4); 923 RESTORE_REGS(metaphysical_rr4);
924 RESTORE_REGS(metaphysical_saved_rr0); 924 RESTORE_REGS(metaphysical_saved_rr0);
925 RESTORE_REGS(metaphysical_saved_rr4); 925 RESTORE_REGS(metaphysical_saved_rr4);
926 RESTORE_REGS(fp_psr); 926 RESTORE_REGS(fp_psr);
927 RESTORE_REGS(saved_gp); 927 RESTORE_REGS(saved_gp);
928 928
929 vcpu->arch.irq_new_pending = 1; 929 vcpu->arch.irq_new_pending = 1;
930 vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); 930 vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
931 set_bit(KVM_REQ_RESUME, &vcpu->requests); 931 set_bit(KVM_REQ_RESUME, &vcpu->requests);
932 932
933 vcpu_put(vcpu); 933 vcpu_put(vcpu);
934 934
935 return 0; 935 return 0;
936 } 936 }
937 937
938 long kvm_arch_vm_ioctl(struct file *filp, 938 long kvm_arch_vm_ioctl(struct file *filp,
939 unsigned int ioctl, unsigned long arg) 939 unsigned int ioctl, unsigned long arg)
940 { 940 {
941 struct kvm *kvm = filp->private_data; 941 struct kvm *kvm = filp->private_data;
942 void __user *argp = (void __user *)arg; 942 void __user *argp = (void __user *)arg;
943 int r = -ENOTTY; 943 int r = -ENOTTY;
944 944
945 switch (ioctl) { 945 switch (ioctl) {
946 case KVM_SET_MEMORY_REGION: { 946 case KVM_SET_MEMORY_REGION: {
947 struct kvm_memory_region kvm_mem; 947 struct kvm_memory_region kvm_mem;
948 struct kvm_userspace_memory_region kvm_userspace_mem; 948 struct kvm_userspace_memory_region kvm_userspace_mem;
949 949
950 r = -EFAULT; 950 r = -EFAULT;
951 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) 951 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
952 goto out; 952 goto out;
953 kvm_userspace_mem.slot = kvm_mem.slot; 953 kvm_userspace_mem.slot = kvm_mem.slot;
954 kvm_userspace_mem.flags = kvm_mem.flags; 954 kvm_userspace_mem.flags = kvm_mem.flags;
955 kvm_userspace_mem.guest_phys_addr = 955 kvm_userspace_mem.guest_phys_addr =
956 kvm_mem.guest_phys_addr; 956 kvm_mem.guest_phys_addr;
957 kvm_userspace_mem.memory_size = kvm_mem.memory_size; 957 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
958 r = kvm_vm_ioctl_set_memory_region(kvm, 958 r = kvm_vm_ioctl_set_memory_region(kvm,
959 &kvm_userspace_mem, 0); 959 &kvm_userspace_mem, 0);
960 if (r) 960 if (r)
961 goto out; 961 goto out;
962 break; 962 break;
963 } 963 }
964 case KVM_CREATE_IRQCHIP: 964 case KVM_CREATE_IRQCHIP:
965 r = -EFAULT; 965 r = -EFAULT;
966 r = kvm_ioapic_init(kvm); 966 r = kvm_ioapic_init(kvm);
967 if (r) 967 if (r)
968 goto out; 968 goto out;
969 r = kvm_setup_default_irq_routing(kvm); 969 r = kvm_setup_default_irq_routing(kvm);
970 if (r) { 970 if (r) {
971 kvm_ioapic_destroy(kvm); 971 kvm_ioapic_destroy(kvm);
972 goto out; 972 goto out;
973 } 973 }
974 break; 974 break;
975 case KVM_IRQ_LINE_STATUS: 975 case KVM_IRQ_LINE_STATUS:
976 case KVM_IRQ_LINE: { 976 case KVM_IRQ_LINE: {
977 struct kvm_irq_level irq_event; 977 struct kvm_irq_level irq_event;
978 978
979 r = -EFAULT; 979 r = -EFAULT;
980 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 980 if (copy_from_user(&irq_event, argp, sizeof irq_event))
981 goto out; 981 goto out;
982 if (irqchip_in_kernel(kvm)) { 982 if (irqchip_in_kernel(kvm)) {
983 __s32 status; 983 __s32 status;
984 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 984 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
985 irq_event.irq, irq_event.level); 985 irq_event.irq, irq_event.level);
986 if (ioctl == KVM_IRQ_LINE_STATUS) { 986 if (ioctl == KVM_IRQ_LINE_STATUS) {
987 irq_event.status = status; 987 irq_event.status = status;
988 if (copy_to_user(argp, &irq_event, 988 if (copy_to_user(argp, &irq_event,
989 sizeof irq_event)) 989 sizeof irq_event))
990 goto out; 990 goto out;
991 } 991 }
992 r = 0; 992 r = 0;
993 } 993 }
994 break; 994 break;
995 } 995 }
996 case KVM_GET_IRQCHIP: { 996 case KVM_GET_IRQCHIP: {
997 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 997 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
998 struct kvm_irqchip chip; 998 struct kvm_irqchip chip;
999 999
1000 r = -EFAULT; 1000 r = -EFAULT;
1001 if (copy_from_user(&chip, argp, sizeof chip)) 1001 if (copy_from_user(&chip, argp, sizeof chip))
1002 goto out; 1002 goto out;
1003 r = -ENXIO; 1003 r = -ENXIO;
1004 if (!irqchip_in_kernel(kvm)) 1004 if (!irqchip_in_kernel(kvm))
1005 goto out; 1005 goto out;
1006 r = kvm_vm_ioctl_get_irqchip(kvm, &chip); 1006 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
1007 if (r) 1007 if (r)
1008 goto out; 1008 goto out;
1009 r = -EFAULT; 1009 r = -EFAULT;
1010 if (copy_to_user(argp, &chip, sizeof chip)) 1010 if (copy_to_user(argp, &chip, sizeof chip))
1011 goto out; 1011 goto out;
1012 r = 0; 1012 r = 0;
1013 break; 1013 break;
1014 } 1014 }
1015 case KVM_SET_IRQCHIP: { 1015 case KVM_SET_IRQCHIP: {
1016 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 1016 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
1017 struct kvm_irqchip chip; 1017 struct kvm_irqchip chip;
1018 1018
1019 r = -EFAULT; 1019 r = -EFAULT;
1020 if (copy_from_user(&chip, argp, sizeof chip)) 1020 if (copy_from_user(&chip, argp, sizeof chip))
1021 goto out; 1021 goto out;
1022 r = -ENXIO; 1022 r = -ENXIO;
1023 if (!irqchip_in_kernel(kvm)) 1023 if (!irqchip_in_kernel(kvm))
1024 goto out; 1024 goto out;
1025 r = kvm_vm_ioctl_set_irqchip(kvm, &chip); 1025 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
1026 if (r) 1026 if (r)
1027 goto out; 1027 goto out;
1028 r = 0; 1028 r = 0;
1029 break; 1029 break;
1030 } 1030 }
1031 default: 1031 default:
1032 ; 1032 ;
1033 } 1033 }
1034 out: 1034 out:
1035 return r; 1035 return r;
1036 } 1036 }
1037 1037
1038 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 1038 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1039 struct kvm_sregs *sregs) 1039 struct kvm_sregs *sregs)
1040 { 1040 {
1041 return -EINVAL; 1041 return -EINVAL;
1042 } 1042 }
1043 1043
1044 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 1044 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1045 struct kvm_sregs *sregs) 1045 struct kvm_sregs *sregs)
1046 { 1046 {
1047 return -EINVAL; 1047 return -EINVAL;
1048 1048
1049 } 1049 }
1050 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 1050 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1051 struct kvm_translation *tr) 1051 struct kvm_translation *tr)
1052 { 1052 {
1053 1053
1054 return -EINVAL; 1054 return -EINVAL;
1055 } 1055 }
1056 1056
1057 static int kvm_alloc_vmm_area(void) 1057 static int kvm_alloc_vmm_area(void)
1058 { 1058 {
1059 if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { 1059 if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
1060 kvm_vmm_base = __get_free_pages(GFP_KERNEL, 1060 kvm_vmm_base = __get_free_pages(GFP_KERNEL,
1061 get_order(KVM_VMM_SIZE)); 1061 get_order(KVM_VMM_SIZE));
1062 if (!kvm_vmm_base) 1062 if (!kvm_vmm_base)
1063 return -ENOMEM; 1063 return -ENOMEM;
1064 1064
1065 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); 1065 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1066 kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; 1066 kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
1067 1067
1068 printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", 1068 printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
1069 kvm_vmm_base, kvm_vm_buffer); 1069 kvm_vmm_base, kvm_vm_buffer);
1070 } 1070 }
1071 1071
1072 return 0; 1072 return 0;
1073 } 1073 }
1074 1074
1075 static void kvm_free_vmm_area(void) 1075 static void kvm_free_vmm_area(void)
1076 { 1076 {
1077 if (kvm_vmm_base) { 1077 if (kvm_vmm_base) {
1078 /*Zero this area before free to avoid bits leak!!*/ 1078 /*Zero this area before free to avoid bits leak!!*/
1079 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); 1079 memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
1080 free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); 1080 free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
1081 kvm_vmm_base = 0; 1081 kvm_vmm_base = 0;
1082 kvm_vm_buffer = 0; 1082 kvm_vm_buffer = 0;
1083 kvm_vsa_base = 0; 1083 kvm_vsa_base = 0;
1084 } 1084 }
1085 } 1085 }
1086 1086
1087 static int vti_init_vpd(struct kvm_vcpu *vcpu) 1087 static int vti_init_vpd(struct kvm_vcpu *vcpu)
1088 { 1088 {
1089 int i; 1089 int i;
1090 union cpuid3_t cpuid3; 1090 union cpuid3_t cpuid3;
1091 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1091 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1092 1092
1093 if (IS_ERR(vpd)) 1093 if (IS_ERR(vpd))
1094 return PTR_ERR(vpd); 1094 return PTR_ERR(vpd);
1095 1095
1096 /* CPUID init */ 1096 /* CPUID init */
1097 for (i = 0; i < 5; i++) 1097 for (i = 0; i < 5; i++)
1098 vpd->vcpuid[i] = ia64_get_cpuid(i); 1098 vpd->vcpuid[i] = ia64_get_cpuid(i);
1099 1099
1100 /* Limit the CPUID number to 5 */ 1100 /* Limit the CPUID number to 5 */
1101 cpuid3.value = vpd->vcpuid[3]; 1101 cpuid3.value = vpd->vcpuid[3];
1102 cpuid3.number = 4; /* 5 - 1 */ 1102 cpuid3.number = 4; /* 5 - 1 */
1103 vpd->vcpuid[3] = cpuid3.value; 1103 vpd->vcpuid[3] = cpuid3.value;
1104 1104
1105 /*Set vac and vdc fields*/ 1105 /*Set vac and vdc fields*/
1106 vpd->vac.a_from_int_cr = 1; 1106 vpd->vac.a_from_int_cr = 1;
1107 vpd->vac.a_to_int_cr = 1; 1107 vpd->vac.a_to_int_cr = 1;
1108 vpd->vac.a_from_psr = 1; 1108 vpd->vac.a_from_psr = 1;
1109 vpd->vac.a_from_cpuid = 1; 1109 vpd->vac.a_from_cpuid = 1;
1110 vpd->vac.a_cover = 1; 1110 vpd->vac.a_cover = 1;
1111 vpd->vac.a_bsw = 1; 1111 vpd->vac.a_bsw = 1;
1112 vpd->vac.a_int = 1; 1112 vpd->vac.a_int = 1;
1113 vpd->vdc.d_vmsw = 1; 1113 vpd->vdc.d_vmsw = 1;
1114 1114
1115 /*Set virtual buffer*/ 1115 /*Set virtual buffer*/
1116 vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; 1116 vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
1117 1117
1118 return 0; 1118 return 0;
1119 } 1119 }
1120 1120
1121 static int vti_create_vp(struct kvm_vcpu *vcpu) 1121 static int vti_create_vp(struct kvm_vcpu *vcpu)
1122 { 1122 {
1123 long ret; 1123 long ret;
1124 struct vpd *vpd = vcpu->arch.vpd; 1124 struct vpd *vpd = vcpu->arch.vpd;
1125 unsigned long vmm_ivt; 1125 unsigned long vmm_ivt;
1126 1126
1127 vmm_ivt = kvm_vmm_info->vmm_ivt; 1127 vmm_ivt = kvm_vmm_info->vmm_ivt;
1128 1128
1129 printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); 1129 printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
1130 1130
1131 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); 1131 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
1132 1132
1133 if (ret) { 1133 if (ret) {
1134 printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); 1134 printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
1135 return -EINVAL; 1135 return -EINVAL;
1136 } 1136 }
1137 return 0; 1137 return 0;
1138 } 1138 }
1139 1139
1140 static void init_ptce_info(struct kvm_vcpu *vcpu) 1140 static void init_ptce_info(struct kvm_vcpu *vcpu)
1141 { 1141 {
1142 ia64_ptce_info_t ptce = {0}; 1142 ia64_ptce_info_t ptce = {0};
1143 1143
1144 ia64_get_ptce(&ptce); 1144 ia64_get_ptce(&ptce);
1145 vcpu->arch.ptce_base = ptce.base; 1145 vcpu->arch.ptce_base = ptce.base;
1146 vcpu->arch.ptce_count[0] = ptce.count[0]; 1146 vcpu->arch.ptce_count[0] = ptce.count[0];
1147 vcpu->arch.ptce_count[1] = ptce.count[1]; 1147 vcpu->arch.ptce_count[1] = ptce.count[1];
1148 vcpu->arch.ptce_stride[0] = ptce.stride[0]; 1148 vcpu->arch.ptce_stride[0] = ptce.stride[0];
1149 vcpu->arch.ptce_stride[1] = ptce.stride[1]; 1149 vcpu->arch.ptce_stride[1] = ptce.stride[1];
1150 } 1150 }
1151 1151
1152 static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) 1152 static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
1153 { 1153 {
1154 struct hrtimer *p_ht = &vcpu->arch.hlt_timer; 1154 struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
1155 1155
1156 if (hrtimer_cancel(p_ht)) 1156 if (hrtimer_cancel(p_ht))
1157 hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS); 1157 hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
1158 } 1158 }
1159 1159
1160 static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) 1160 static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
1161 { 1161 {
1162 struct kvm_vcpu *vcpu; 1162 struct kvm_vcpu *vcpu;
1163 wait_queue_head_t *q; 1163 wait_queue_head_t *q;
1164 1164
1165 vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); 1165 vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer);
1166 q = &vcpu->wq; 1166 q = &vcpu->wq;
1167 1167
1168 if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) 1168 if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
1169 goto out; 1169 goto out;
1170 1170
1171 if (waitqueue_active(q)) 1171 if (waitqueue_active(q))
1172 wake_up_interruptible(q); 1172 wake_up_interruptible(q);
1173 1173
1174 out: 1174 out:
1175 vcpu->arch.timer_fired = 1; 1175 vcpu->arch.timer_fired = 1;
1176 vcpu->arch.timer_check = 1; 1176 vcpu->arch.timer_check = 1;
1177 return HRTIMER_NORESTART; 1177 return HRTIMER_NORESTART;
1178 } 1178 }
1179 1179
1180 #define PALE_RESET_ENTRY 0x80000000ffffffb0UL 1180 #define PALE_RESET_ENTRY 0x80000000ffffffb0UL
1181 1181
1182 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 1182 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1183 { 1183 {
1184 struct kvm_vcpu *v; 1184 struct kvm_vcpu *v;
1185 int r; 1185 int r;
1186 int i; 1186 int i;
1187 long itc_offset; 1187 long itc_offset;
1188 struct kvm *kvm = vcpu->kvm; 1188 struct kvm *kvm = vcpu->kvm;
1189 struct kvm_pt_regs *regs = vcpu_regs(vcpu); 1189 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
1190 1190
1191 union context *p_ctx = &vcpu->arch.guest; 1191 union context *p_ctx = &vcpu->arch.guest;
1192 struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); 1192 struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
1193 1193
1194 /*Init vcpu context for first run.*/ 1194 /*Init vcpu context for first run.*/
1195 if (IS_ERR(vmm_vcpu)) 1195 if (IS_ERR(vmm_vcpu))
1196 return PTR_ERR(vmm_vcpu); 1196 return PTR_ERR(vmm_vcpu);
1197 1197
1198 if (kvm_vcpu_is_bsp(vcpu)) { 1198 if (kvm_vcpu_is_bsp(vcpu)) {
1199 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1199 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1200 1200
1201 /*Set entry address for first run.*/ 1201 /*Set entry address for first run.*/
1202 regs->cr_iip = PALE_RESET_ENTRY; 1202 regs->cr_iip = PALE_RESET_ENTRY;
1203 1203
1204 /*Initialize itc offset for vcpus*/ 1204 /*Initialize itc offset for vcpus*/
1205 itc_offset = 0UL - kvm_get_itc(vcpu); 1205 itc_offset = 0UL - kvm_get_itc(vcpu);
1206 for (i = 0; i < KVM_MAX_VCPUS; i++) { 1206 for (i = 0; i < KVM_MAX_VCPUS; i++) {
1207 v = (struct kvm_vcpu *)((char *)vcpu + 1207 v = (struct kvm_vcpu *)((char *)vcpu +
1208 sizeof(struct kvm_vcpu_data) * i); 1208 sizeof(struct kvm_vcpu_data) * i);
1209 v->arch.itc_offset = itc_offset; 1209 v->arch.itc_offset = itc_offset;
1210 v->arch.last_itc = 0; 1210 v->arch.last_itc = 0;
1211 } 1211 }
1212 } else 1212 } else
1213 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; 1213 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
1214 1214
1215 r = -ENOMEM; 1215 r = -ENOMEM;
1216 vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); 1216 vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
1217 if (!vcpu->arch.apic) 1217 if (!vcpu->arch.apic)
1218 goto out; 1218 goto out;
1219 vcpu->arch.apic->vcpu = vcpu; 1219 vcpu->arch.apic->vcpu = vcpu;
1220 1220
1221 p_ctx->gr[1] = 0; 1221 p_ctx->gr[1] = 0;
1222 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET); 1222 p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
1223 p_ctx->gr[13] = (unsigned long)vmm_vcpu; 1223 p_ctx->gr[13] = (unsigned long)vmm_vcpu;
1224 p_ctx->psr = 0x1008522000UL; 1224 p_ctx->psr = 0x1008522000UL;
1225 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ 1225 p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
1226 p_ctx->caller_unat = 0; 1226 p_ctx->caller_unat = 0;
1227 p_ctx->pr = 0x0; 1227 p_ctx->pr = 0x0;
1228 p_ctx->ar[36] = 0x0; /*unat*/ 1228 p_ctx->ar[36] = 0x0; /*unat*/
1229 p_ctx->ar[19] = 0x0; /*rnat*/ 1229 p_ctx->ar[19] = 0x0; /*rnat*/
1230 p_ctx->ar[18] = (unsigned long)vmm_vcpu + 1230 p_ctx->ar[18] = (unsigned long)vmm_vcpu +
1231 ((sizeof(struct kvm_vcpu)+15) & ~15); 1231 ((sizeof(struct kvm_vcpu)+15) & ~15);
1232 p_ctx->ar[64] = 0x0; /*pfs*/ 1232 p_ctx->ar[64] = 0x0; /*pfs*/
1233 p_ctx->cr[0] = 0x7e04UL; 1233 p_ctx->cr[0] = 0x7e04UL;
1234 p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; 1234 p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
1235 p_ctx->cr[8] = 0x3c; 1235 p_ctx->cr[8] = 0x3c;
1236 1236
1237 /*Initilize region register*/ 1237 /*Initilize region register*/
1238 p_ctx->rr[0] = 0x30; 1238 p_ctx->rr[0] = 0x30;
1239 p_ctx->rr[1] = 0x30; 1239 p_ctx->rr[1] = 0x30;
1240 p_ctx->rr[2] = 0x30; 1240 p_ctx->rr[2] = 0x30;
1241 p_ctx->rr[3] = 0x30; 1241 p_ctx->rr[3] = 0x30;
1242 p_ctx->rr[4] = 0x30; 1242 p_ctx->rr[4] = 0x30;
1243 p_ctx->rr[5] = 0x30; 1243 p_ctx->rr[5] = 0x30;
1244 p_ctx->rr[7] = 0x30; 1244 p_ctx->rr[7] = 0x30;
1245 1245
1246 /*Initilize branch register 0*/ 1246 /*Initilize branch register 0*/
1247 p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; 1247 p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
1248 1248
1249 vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; 1249 vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
1250 vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; 1250 vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
1251 vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; 1251 vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
1252 1252
1253 hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 1253 hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1254 vcpu->arch.hlt_timer.function = hlt_timer_fn; 1254 vcpu->arch.hlt_timer.function = hlt_timer_fn;
1255 1255
1256 vcpu->arch.last_run_cpu = -1; 1256 vcpu->arch.last_run_cpu = -1;
1257 vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id); 1257 vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
1258 vcpu->arch.vsa_base = kvm_vsa_base; 1258 vcpu->arch.vsa_base = kvm_vsa_base;
1259 vcpu->arch.__gp = kvm_vmm_gp; 1259 vcpu->arch.__gp = kvm_vmm_gp;
1260 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); 1260 vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
1261 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id); 1261 vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
1262 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id); 1262 vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
1263 init_ptce_info(vcpu); 1263 init_ptce_info(vcpu);
1264 1264
1265 r = 0; 1265 r = 0;
1266 out: 1266 out:
1267 return r; 1267 return r;
1268 } 1268 }
1269 1269
1270 static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) 1270 static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
1271 { 1271 {
1272 unsigned long psr; 1272 unsigned long psr;
1273 int r; 1273 int r;
1274 1274
1275 local_irq_save(psr); 1275 local_irq_save(psr);
1276 r = kvm_insert_vmm_mapping(vcpu); 1276 r = kvm_insert_vmm_mapping(vcpu);
1277 local_irq_restore(psr); 1277 local_irq_restore(psr);
1278 if (r) 1278 if (r)
1279 goto fail; 1279 goto fail;
1280 r = kvm_vcpu_init(vcpu, vcpu->kvm, id); 1280 r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
1281 if (r) 1281 if (r)
1282 goto fail; 1282 goto fail;
1283 1283
1284 r = vti_init_vpd(vcpu); 1284 r = vti_init_vpd(vcpu);
1285 if (r) { 1285 if (r) {
1286 printk(KERN_DEBUG"kvm: vpd init error!!\n"); 1286 printk(KERN_DEBUG"kvm: vpd init error!!\n");
1287 goto uninit; 1287 goto uninit;
1288 } 1288 }
1289 1289
1290 r = vti_create_vp(vcpu); 1290 r = vti_create_vp(vcpu);
1291 if (r) 1291 if (r)
1292 goto uninit; 1292 goto uninit;
1293 1293
1294 kvm_purge_vmm_mapping(vcpu); 1294 kvm_purge_vmm_mapping(vcpu);
1295 1295
1296 return 0; 1296 return 0;
1297 uninit: 1297 uninit:
1298 kvm_vcpu_uninit(vcpu); 1298 kvm_vcpu_uninit(vcpu);
1299 fail: 1299 fail:
1300 return r; 1300 return r;
1301 } 1301 }
1302 1302
1303 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 1303 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1304 unsigned int id) 1304 unsigned int id)
1305 { 1305 {
1306 struct kvm_vcpu *vcpu; 1306 struct kvm_vcpu *vcpu;
1307 unsigned long vm_base = kvm->arch.vm_base; 1307 unsigned long vm_base = kvm->arch.vm_base;
1308 int r; 1308 int r;
1309 int cpu; 1309 int cpu;
1310 1310
1311 BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2); 1311 BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
1312 1312
1313 r = -EINVAL; 1313 r = -EINVAL;
1314 if (id >= KVM_MAX_VCPUS) { 1314 if (id >= KVM_MAX_VCPUS) {
1315 printk(KERN_ERR"kvm: Can't configure vcpus > %ld", 1315 printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
1316 KVM_MAX_VCPUS); 1316 KVM_MAX_VCPUS);
1317 goto fail; 1317 goto fail;
1318 } 1318 }
1319 1319
1320 r = -ENOMEM; 1320 r = -ENOMEM;
1321 if (!vm_base) { 1321 if (!vm_base) {
1322 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); 1322 printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
1323 goto fail; 1323 goto fail;
1324 } 1324 }
1325 vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data, 1325 vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
1326 vcpu_data[id].vcpu_struct)); 1326 vcpu_data[id].vcpu_struct));
1327 vcpu->kvm = kvm; 1327 vcpu->kvm = kvm;
1328 1328
1329 cpu = get_cpu(); 1329 cpu = get_cpu();
1330 r = vti_vcpu_setup(vcpu, id); 1330 r = vti_vcpu_setup(vcpu, id);
1331 put_cpu(); 1331 put_cpu();
1332 1332
1333 if (r) { 1333 if (r) {
1334 printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); 1334 printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
1335 goto fail; 1335 goto fail;
1336 } 1336 }
1337 1337
1338 return vcpu; 1338 return vcpu;
1339 fail: 1339 fail:
1340 return ERR_PTR(r); 1340 return ERR_PTR(r);
1341 } 1341 }
1342 1342
1343 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 1343 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1344 { 1344 {
1345 return 0; 1345 return 0;
1346 } 1346 }
1347 1347
1348 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1348 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1349 { 1349 {
1350 return -EINVAL; 1350 return -EINVAL;
1351 } 1351 }
1352 1352
1353 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 1353 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1354 { 1354 {
1355 return -EINVAL; 1355 return -EINVAL;
1356 } 1356 }
1357 1357
1358 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 1358 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1359 struct kvm_guest_debug *dbg) 1359 struct kvm_guest_debug *dbg)
1360 { 1360 {
1361 return -EINVAL; 1361 return -EINVAL;
1362 } 1362 }
1363 1363
1364 static void free_kvm(struct kvm *kvm) 1364 static void free_kvm(struct kvm *kvm)
1365 { 1365 {
1366 unsigned long vm_base = kvm->arch.vm_base; 1366 unsigned long vm_base = kvm->arch.vm_base;
1367 1367
1368 if (vm_base) { 1368 if (vm_base) {
1369 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); 1369 memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
1370 free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); 1370 free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
1371 } 1371 }
1372 1372
1373 } 1373 }
1374 1374
1375 static void kvm_release_vm_pages(struct kvm *kvm) 1375 static void kvm_release_vm_pages(struct kvm *kvm)
1376 { 1376 {
1377 struct kvm_memslots *slots; 1377 struct kvm_memslots *slots;
1378 struct kvm_memory_slot *memslot; 1378 struct kvm_memory_slot *memslot;
1379 int i, j; 1379 int i, j;
1380 unsigned long base_gfn; 1380 unsigned long base_gfn;
1381 1381
1382 slots = rcu_dereference(kvm->memslots); 1382 slots = rcu_dereference(kvm->memslots);
1383 for (i = 0; i < slots->nmemslots; i++) { 1383 for (i = 0; i < slots->nmemslots; i++) {
1384 memslot = &slots->memslots[i]; 1384 memslot = &slots->memslots[i];
1385 base_gfn = memslot->base_gfn; 1385 base_gfn = memslot->base_gfn;
1386 1386
1387 for (j = 0; j < memslot->npages; j++) { 1387 for (j = 0; j < memslot->npages; j++) {
1388 if (memslot->rmap[j]) 1388 if (memslot->rmap[j])
1389 put_page((struct page *)memslot->rmap[j]); 1389 put_page((struct page *)memslot->rmap[j]);
1390 } 1390 }
1391 } 1391 }
1392 } 1392 }
1393 1393
1394 void kvm_arch_sync_events(struct kvm *kvm) 1394 void kvm_arch_sync_events(struct kvm *kvm)
1395 { 1395 {
1396 } 1396 }
1397 1397
1398 void kvm_arch_destroy_vm(struct kvm *kvm) 1398 void kvm_arch_destroy_vm(struct kvm *kvm)
1399 { 1399 {
1400 kvm_iommu_unmap_guest(kvm); 1400 kvm_iommu_unmap_guest(kvm);
1401 #ifdef KVM_CAP_DEVICE_ASSIGNMENT 1401 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
1402 kvm_free_all_assigned_devices(kvm); 1402 kvm_free_all_assigned_devices(kvm);
1403 #endif 1403 #endif
1404 kfree(kvm->arch.vioapic); 1404 kfree(kvm->arch.vioapic);
1405 kvm_release_vm_pages(kvm); 1405 kvm_release_vm_pages(kvm);
1406 kvm_free_physmem(kvm); 1406 kvm_free_physmem(kvm);
1407 cleanup_srcu_struct(&kvm->srcu); 1407 cleanup_srcu_struct(&kvm->srcu);
1408 free_kvm(kvm); 1408 free_kvm(kvm);
1409 } 1409 }
1410 1410
1411 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1411 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1412 { 1412 {
1413 } 1413 }
1414 1414
1415 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1415 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1416 { 1416 {
1417 if (cpu != vcpu->cpu) { 1417 if (cpu != vcpu->cpu) {
1418 vcpu->cpu = cpu; 1418 vcpu->cpu = cpu;
1419 if (vcpu->arch.ht_active) 1419 if (vcpu->arch.ht_active)
1420 kvm_migrate_hlt_timer(vcpu); 1420 kvm_migrate_hlt_timer(vcpu);
1421 } 1421 }
1422 } 1422 }
1423 1423
1424 #define SAVE_REGS(_x) regs->_x = vcpu->arch._x 1424 #define SAVE_REGS(_x) regs->_x = vcpu->arch._x
1425 1425
1426 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 1426 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1427 { 1427 {
1428 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1428 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1429 int i; 1429 int i;
1430 1430
1431 vcpu_load(vcpu); 1431 vcpu_load(vcpu);
1432 1432
1433 for (i = 0; i < 16; i++) { 1433 for (i = 0; i < 16; i++) {
1434 regs->vpd.vgr[i] = vpd->vgr[i]; 1434 regs->vpd.vgr[i] = vpd->vgr[i];
1435 regs->vpd.vbgr[i] = vpd->vbgr[i]; 1435 regs->vpd.vbgr[i] = vpd->vbgr[i];
1436 } 1436 }
1437 for (i = 0; i < 128; i++) 1437 for (i = 0; i < 128; i++)
1438 regs->vpd.vcr[i] = vpd->vcr[i]; 1438 regs->vpd.vcr[i] = vpd->vcr[i];
1439 regs->vpd.vhpi = vpd->vhpi; 1439 regs->vpd.vhpi = vpd->vhpi;
1440 regs->vpd.vnat = vpd->vnat; 1440 regs->vpd.vnat = vpd->vnat;
1441 regs->vpd.vbnat = vpd->vbnat; 1441 regs->vpd.vbnat = vpd->vbnat;
1442 regs->vpd.vpsr = vpd->vpsr; 1442 regs->vpd.vpsr = vpd->vpsr;
1443 regs->vpd.vpr = vpd->vpr; 1443 regs->vpd.vpr = vpd->vpr;
1444 1444
1445 memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context)); 1445 memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
1446 1446
1447 SAVE_REGS(mp_state); 1447 SAVE_REGS(mp_state);
1448 SAVE_REGS(vmm_rr); 1448 SAVE_REGS(vmm_rr);
1449 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); 1449 memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
1450 memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); 1450 memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
1451 SAVE_REGS(itr_regions); 1451 SAVE_REGS(itr_regions);
1452 SAVE_REGS(dtr_regions); 1452 SAVE_REGS(dtr_regions);
1453 SAVE_REGS(tc_regions); 1453 SAVE_REGS(tc_regions);
1454 SAVE_REGS(irq_check); 1454 SAVE_REGS(irq_check);
1455 SAVE_REGS(itc_check); 1455 SAVE_REGS(itc_check);
1456 SAVE_REGS(timer_check); 1456 SAVE_REGS(timer_check);
1457 SAVE_REGS(timer_pending); 1457 SAVE_REGS(timer_pending);
1458 SAVE_REGS(last_itc); 1458 SAVE_REGS(last_itc);
1459 for (i = 0; i < 8; i++) { 1459 for (i = 0; i < 8; i++) {
1460 regs->vrr[i] = vcpu->arch.vrr[i]; 1460 regs->vrr[i] = vcpu->arch.vrr[i];
1461 regs->ibr[i] = vcpu->arch.ibr[i]; 1461 regs->ibr[i] = vcpu->arch.ibr[i];
1462 regs->dbr[i] = vcpu->arch.dbr[i]; 1462 regs->dbr[i] = vcpu->arch.dbr[i];
1463 } 1463 }
1464 for (i = 0; i < 4; i++) 1464 for (i = 0; i < 4; i++)
1465 regs->insvc[i] = vcpu->arch.insvc[i]; 1465 regs->insvc[i] = vcpu->arch.insvc[i];
1466 regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu); 1466 regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
1467 SAVE_REGS(xtp); 1467 SAVE_REGS(xtp);
1468 SAVE_REGS(metaphysical_rr0); 1468 SAVE_REGS(metaphysical_rr0);
1469 SAVE_REGS(metaphysical_rr4); 1469 SAVE_REGS(metaphysical_rr4);
1470 SAVE_REGS(metaphysical_saved_rr0); 1470 SAVE_REGS(metaphysical_saved_rr0);
1471 SAVE_REGS(metaphysical_saved_rr4); 1471 SAVE_REGS(metaphysical_saved_rr4);
1472 SAVE_REGS(fp_psr); 1472 SAVE_REGS(fp_psr);
1473 SAVE_REGS(saved_gp); 1473 SAVE_REGS(saved_gp);
1474 1474
1475 vcpu_put(vcpu); 1475 vcpu_put(vcpu);
1476 return 0; 1476 return 0;
1477 } 1477 }
1478 1478
1479 int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu, 1479 int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
1480 struct kvm_ia64_vcpu_stack *stack) 1480 struct kvm_ia64_vcpu_stack *stack)
1481 { 1481 {
1482 memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack)); 1482 memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
1483 return 0; 1483 return 0;
1484 } 1484 }
1485 1485
1486 int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu, 1486 int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
1487 struct kvm_ia64_vcpu_stack *stack) 1487 struct kvm_ia64_vcpu_stack *stack)
1488 { 1488 {
1489 memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu), 1489 memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
1490 sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu)); 1490 sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
1491 1491
1492 vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data; 1492 vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
1493 return 0; 1493 return 0;
1494 } 1494 }
1495 1495
1496 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 1496 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
1497 { 1497 {
1498 1498
1499 hrtimer_cancel(&vcpu->arch.hlt_timer); 1499 hrtimer_cancel(&vcpu->arch.hlt_timer);
1500 kfree(vcpu->arch.apic); 1500 kfree(vcpu->arch.apic);
1501 } 1501 }
1502 1502
1503 1503
1504 long kvm_arch_vcpu_ioctl(struct file *filp, 1504 long kvm_arch_vcpu_ioctl(struct file *filp,
1505 unsigned int ioctl, unsigned long arg) 1505 unsigned int ioctl, unsigned long arg)
1506 { 1506 {
1507 struct kvm_vcpu *vcpu = filp->private_data; 1507 struct kvm_vcpu *vcpu = filp->private_data;
1508 void __user *argp = (void __user *)arg; 1508 void __user *argp = (void __user *)arg;
1509 struct kvm_ia64_vcpu_stack *stack = NULL; 1509 struct kvm_ia64_vcpu_stack *stack = NULL;
1510 long r; 1510 long r;
1511 1511
1512 switch (ioctl) { 1512 switch (ioctl) {
1513 case KVM_IA64_VCPU_GET_STACK: { 1513 case KVM_IA64_VCPU_GET_STACK: {
1514 struct kvm_ia64_vcpu_stack __user *user_stack; 1514 struct kvm_ia64_vcpu_stack __user *user_stack;
1515 void __user *first_p = argp; 1515 void __user *first_p = argp;
1516 1516
1517 r = -EFAULT; 1517 r = -EFAULT;
1518 if (copy_from_user(&user_stack, first_p, sizeof(void *))) 1518 if (copy_from_user(&user_stack, first_p, sizeof(void *)))
1519 goto out; 1519 goto out;
1520 1520
1521 if (!access_ok(VERIFY_WRITE, user_stack, 1521 if (!access_ok(VERIFY_WRITE, user_stack,
1522 sizeof(struct kvm_ia64_vcpu_stack))) { 1522 sizeof(struct kvm_ia64_vcpu_stack))) {
1523 printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: " 1523 printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
1524 "Illegal user destination address for stack\n"); 1524 "Illegal user destination address for stack\n");
1525 goto out; 1525 goto out;
1526 } 1526 }
1527 stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); 1527 stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
1528 if (!stack) { 1528 if (!stack) {
1529 r = -ENOMEM; 1529 r = -ENOMEM;
1530 goto out; 1530 goto out;
1531 } 1531 }
1532 1532
1533 r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack); 1533 r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
1534 if (r) 1534 if (r)
1535 goto out; 1535 goto out;
1536 1536
1537 if (copy_to_user(user_stack, stack, 1537 if (copy_to_user(user_stack, stack,
1538 sizeof(struct kvm_ia64_vcpu_stack))) 1538 sizeof(struct kvm_ia64_vcpu_stack)))
1539 goto out; 1539 goto out;
1540 1540
1541 break; 1541 break;
1542 } 1542 }
1543 case KVM_IA64_VCPU_SET_STACK: { 1543 case KVM_IA64_VCPU_SET_STACK: {
1544 struct kvm_ia64_vcpu_stack __user *user_stack; 1544 struct kvm_ia64_vcpu_stack __user *user_stack;
1545 void __user *first_p = argp; 1545 void __user *first_p = argp;
1546 1546
1547 r = -EFAULT; 1547 r = -EFAULT;
1548 if (copy_from_user(&user_stack, first_p, sizeof(void *))) 1548 if (copy_from_user(&user_stack, first_p, sizeof(void *)))
1549 goto out; 1549 goto out;
1550 1550
1551 if (!access_ok(VERIFY_READ, user_stack, 1551 if (!access_ok(VERIFY_READ, user_stack,
1552 sizeof(struct kvm_ia64_vcpu_stack))) { 1552 sizeof(struct kvm_ia64_vcpu_stack))) {
1553 printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: " 1553 printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
1554 "Illegal user address for stack\n"); 1554 "Illegal user address for stack\n");
1555 goto out; 1555 goto out;
1556 } 1556 }
1557 stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL); 1557 stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
1558 if (!stack) { 1558 if (!stack) {
1559 r = -ENOMEM; 1559 r = -ENOMEM;
1560 goto out; 1560 goto out;
1561 } 1561 }
1562 if (copy_from_user(stack, user_stack, 1562 if (copy_from_user(stack, user_stack,
1563 sizeof(struct kvm_ia64_vcpu_stack))) 1563 sizeof(struct kvm_ia64_vcpu_stack)))
1564 goto out; 1564 goto out;
1565 1565
1566 r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack); 1566 r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
1567 break; 1567 break;
1568 } 1568 }
1569 1569
1570 default: 1570 default:
1571 r = -EINVAL; 1571 r = -EINVAL;
1572 } 1572 }
1573 1573
1574 out: 1574 out:
1575 kfree(stack); 1575 kfree(stack);
1576 return r; 1576 return r;
1577 } 1577 }
1578 1578
1579 int kvm_arch_prepare_memory_region(struct kvm *kvm, 1579 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1580 struct kvm_memory_slot *memslot, 1580 struct kvm_memory_slot *memslot,
1581 struct kvm_memory_slot old, 1581 struct kvm_memory_slot old,
1582 struct kvm_userspace_memory_region *mem, 1582 struct kvm_userspace_memory_region *mem,
1583 int user_alloc) 1583 int user_alloc)
1584 { 1584 {
1585 unsigned long i; 1585 unsigned long i;
1586 unsigned long pfn; 1586 unsigned long pfn;
1587 int npages = memslot->npages; 1587 int npages = memslot->npages;
1588 unsigned long base_gfn = memslot->base_gfn; 1588 unsigned long base_gfn = memslot->base_gfn;
1589 1589
1590 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) 1590 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
1591 return -ENOMEM; 1591 return -ENOMEM;
1592 1592
1593 for (i = 0; i < npages; i++) { 1593 for (i = 0; i < npages; i++) {
1594 pfn = gfn_to_pfn(kvm, base_gfn + i); 1594 pfn = gfn_to_pfn(kvm, base_gfn + i);
1595 if (!kvm_is_mmio_pfn(pfn)) { 1595 if (!kvm_is_mmio_pfn(pfn)) {
1596 kvm_set_pmt_entry(kvm, base_gfn + i, 1596 kvm_set_pmt_entry(kvm, base_gfn + i,
1597 pfn << PAGE_SHIFT, 1597 pfn << PAGE_SHIFT,
1598 _PAGE_AR_RWX | _PAGE_MA_WB); 1598 _PAGE_AR_RWX | _PAGE_MA_WB);
1599 memslot->rmap[i] = (unsigned long)pfn_to_page(pfn); 1599 memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
1600 } else { 1600 } else {
1601 kvm_set_pmt_entry(kvm, base_gfn + i, 1601 kvm_set_pmt_entry(kvm, base_gfn + i,
1602 GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT), 1602 GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
1603 _PAGE_MA_UC); 1603 _PAGE_MA_UC);
1604 memslot->rmap[i] = 0; 1604 memslot->rmap[i] = 0;
1605 } 1605 }
1606 } 1606 }
1607 1607
1608 return 0; 1608 return 0;
1609 } 1609 }
1610 1610
1611 void kvm_arch_commit_memory_region(struct kvm *kvm, 1611 void kvm_arch_commit_memory_region(struct kvm *kvm,
1612 struct kvm_userspace_memory_region *mem, 1612 struct kvm_userspace_memory_region *mem,
1613 struct kvm_memory_slot old, 1613 struct kvm_memory_slot old,
1614 int user_alloc) 1614 int user_alloc)
1615 { 1615 {
1616 return; 1616 return;
1617 } 1617 }
1618 1618
1619 void kvm_arch_flush_shadow(struct kvm *kvm) 1619 void kvm_arch_flush_shadow(struct kvm *kvm)
1620 { 1620 {
1621 kvm_flush_remote_tlbs(kvm); 1621 kvm_flush_remote_tlbs(kvm);
1622 } 1622 }
1623 1623
1624 long kvm_arch_dev_ioctl(struct file *filp, 1624 long kvm_arch_dev_ioctl(struct file *filp,
1625 unsigned int ioctl, unsigned long arg) 1625 unsigned int ioctl, unsigned long arg)
1626 { 1626 {
1627 return -EINVAL; 1627 return -EINVAL;
1628 } 1628 }
1629 1629
1630 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 1630 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1631 { 1631 {
1632 kvm_vcpu_uninit(vcpu); 1632 kvm_vcpu_uninit(vcpu);
1633 } 1633 }
1634 1634
1635 static int vti_cpu_has_kvm_support(void) 1635 static int vti_cpu_has_kvm_support(void)
1636 { 1636 {
1637 long avail = 1, status = 1, control = 1; 1637 long avail = 1, status = 1, control = 1;
1638 long ret; 1638 long ret;
1639 1639
1640 ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); 1640 ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
1641 if (ret) 1641 if (ret)
1642 goto out; 1642 goto out;
1643 1643
1644 if (!(avail & PAL_PROC_VM_BIT)) 1644 if (!(avail & PAL_PROC_VM_BIT))
1645 goto out; 1645 goto out;
1646 1646
1647 printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); 1647 printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
1648 1648
1649 ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); 1649 ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
1650 if (ret) 1650 if (ret)
1651 goto out; 1651 goto out;
1652 printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); 1652 printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
1653 1653
1654 if (!(vp_env_info & VP_OPCODE)) { 1654 if (!(vp_env_info & VP_OPCODE)) {
1655 printk(KERN_WARNING"kvm: No opcode ability on hardware, " 1655 printk(KERN_WARNING"kvm: No opcode ability on hardware, "
1656 "vm_env_info:0x%lx\n", vp_env_info); 1656 "vm_env_info:0x%lx\n", vp_env_info);
1657 } 1657 }
1658 1658
1659 return 1; 1659 return 1;
1660 out: 1660 out:
1661 return 0; 1661 return 0;
1662 } 1662 }
1663 1663
1664 1664
1665 /* 1665 /*
1666 * On SN2, the ITC isn't stable, so copy in fast path code to use the 1666 * On SN2, the ITC isn't stable, so copy in fast path code to use the
1667 * SN2 RTC, replacing the ITC based default verion. 1667 * SN2 RTC, replacing the ITC based default verion.
1668 */ 1668 */
1669 static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info, 1669 static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
1670 struct module *module) 1670 struct module *module)
1671 { 1671 {
1672 unsigned long new_ar, new_ar_sn2; 1672 unsigned long new_ar, new_ar_sn2;
1673 unsigned long module_base; 1673 unsigned long module_base;
1674 1674
1675 if (!ia64_platform_is("sn2")) 1675 if (!ia64_platform_is("sn2"))
1676 return; 1676 return;
1677 1677
1678 module_base = (unsigned long)module->module_core; 1678 module_base = (unsigned long)module->module_core;
1679 1679
1680 new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base; 1680 new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
1681 new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base; 1681 new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
1682 1682
1683 printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC " 1683 printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
1684 "as source\n"); 1684 "as source\n");
1685 1685
1686 /* 1686 /*
1687 * Copy the SN2 version of mov_ar into place. They are both 1687 * Copy the SN2 version of mov_ar into place. They are both
1688 * the same size, so 6 bundles is sufficient (6 * 0x10). 1688 * the same size, so 6 bundles is sufficient (6 * 0x10).
1689 */ 1689 */
1690 memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60); 1690 memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
1691 } 1691 }
1692 1692
1693 static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, 1693 static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
1694 struct module *module) 1694 struct module *module)
1695 { 1695 {
1696 unsigned long module_base; 1696 unsigned long module_base;
1697 unsigned long vmm_size; 1697 unsigned long vmm_size;
1698 1698
1699 unsigned long vmm_offset, func_offset, fdesc_offset; 1699 unsigned long vmm_offset, func_offset, fdesc_offset;
1700 struct fdesc *p_fdesc; 1700 struct fdesc *p_fdesc;
1701 1701
1702 BUG_ON(!module); 1702 BUG_ON(!module);
1703 1703
1704 if (!kvm_vmm_base) { 1704 if (!kvm_vmm_base) {
1705 printk("kvm: kvm area hasn't been initilized yet!!\n"); 1705 printk("kvm: kvm area hasn't been initilized yet!!\n");
1706 return -EFAULT; 1706 return -EFAULT;
1707 } 1707 }
1708 1708
1709 /*Calculate new position of relocated vmm module.*/ 1709 /*Calculate new position of relocated vmm module.*/
1710 module_base = (unsigned long)module->module_core; 1710 module_base = (unsigned long)module->module_core;
1711 vmm_size = module->core_size; 1711 vmm_size = module->core_size;
1712 if (unlikely(vmm_size > KVM_VMM_SIZE)) 1712 if (unlikely(vmm_size > KVM_VMM_SIZE))
1713 return -EFAULT; 1713 return -EFAULT;
1714 1714
1715 memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); 1715 memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
1716 kvm_patch_vmm(vmm_info, module); 1716 kvm_patch_vmm(vmm_info, module);
1717 kvm_flush_icache(kvm_vmm_base, vmm_size); 1717 kvm_flush_icache(kvm_vmm_base, vmm_size);
1718 1718
1719 /*Recalculate kvm_vmm_info based on new VMM*/ 1719 /*Recalculate kvm_vmm_info based on new VMM*/
1720 vmm_offset = vmm_info->vmm_ivt - module_base; 1720 vmm_offset = vmm_info->vmm_ivt - module_base;
1721 kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; 1721 kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
1722 printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", 1722 printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
1723 kvm_vmm_info->vmm_ivt); 1723 kvm_vmm_info->vmm_ivt);
1724 1724
1725 fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; 1725 fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
1726 kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + 1726 kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
1727 fdesc_offset); 1727 fdesc_offset);
1728 func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; 1728 func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
1729 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); 1729 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1730 p_fdesc->ip = KVM_VMM_BASE + func_offset; 1730 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1731 p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); 1731 p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
1732 1732
1733 printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", 1733 printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
1734 KVM_VMM_BASE+func_offset); 1734 KVM_VMM_BASE+func_offset);
1735 1735
1736 fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; 1736 fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
1737 kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + 1737 kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
1738 fdesc_offset); 1738 fdesc_offset);
1739 func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; 1739 func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
1740 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); 1740 p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
1741 p_fdesc->ip = KVM_VMM_BASE + func_offset; 1741 p_fdesc->ip = KVM_VMM_BASE + func_offset;
1742 p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); 1742 p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
1743 1743
1744 kvm_vmm_gp = p_fdesc->gp; 1744 kvm_vmm_gp = p_fdesc->gp;
1745 1745
1746 printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", 1746 printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
1747 kvm_vmm_info->vmm_entry); 1747 kvm_vmm_info->vmm_entry);
1748 printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", 1748 printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
1749 KVM_VMM_BASE + func_offset); 1749 KVM_VMM_BASE + func_offset);
1750 1750
1751 return 0; 1751 return 0;
1752 } 1752 }
1753 1753
1754 int kvm_arch_init(void *opaque) 1754 int kvm_arch_init(void *opaque)
1755 { 1755 {
1756 int r; 1756 int r;
1757 struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; 1757 struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
1758 1758
1759 if (!vti_cpu_has_kvm_support()) { 1759 if (!vti_cpu_has_kvm_support()) {
1760 printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); 1760 printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
1761 r = -EOPNOTSUPP; 1761 r = -EOPNOTSUPP;
1762 goto out; 1762 goto out;
1763 } 1763 }
1764 1764
1765 if (kvm_vmm_info) { 1765 if (kvm_vmm_info) {
1766 printk(KERN_ERR "kvm: Already loaded VMM module!\n"); 1766 printk(KERN_ERR "kvm: Already loaded VMM module!\n");
1767 r = -EEXIST; 1767 r = -EEXIST;
1768 goto out; 1768 goto out;
1769 } 1769 }
1770 1770
1771 r = -ENOMEM; 1771 r = -ENOMEM;
1772 kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); 1772 kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
1773 if (!kvm_vmm_info) 1773 if (!kvm_vmm_info)
1774 goto out; 1774 goto out;
1775 1775
1776 if (kvm_alloc_vmm_area()) 1776 if (kvm_alloc_vmm_area())
1777 goto out_free0; 1777 goto out_free0;
1778 1778
1779 r = kvm_relocate_vmm(vmm_info, vmm_info->module); 1779 r = kvm_relocate_vmm(vmm_info, vmm_info->module);
1780 if (r) 1780 if (r)
1781 goto out_free1; 1781 goto out_free1;
1782 1782
1783 return 0; 1783 return 0;
1784 1784
1785 out_free1: 1785 out_free1:
1786 kvm_free_vmm_area(); 1786 kvm_free_vmm_area();
1787 out_free0: 1787 out_free0:
1788 kfree(kvm_vmm_info); 1788 kfree(kvm_vmm_info);
1789 out: 1789 out:
1790 return r; 1790 return r;
1791 } 1791 }
1792 1792
1793 void kvm_arch_exit(void) 1793 void kvm_arch_exit(void)
1794 { 1794 {
1795 kvm_free_vmm_area(); 1795 kvm_free_vmm_area();
1796 kfree(kvm_vmm_info); 1796 kfree(kvm_vmm_info);
1797 kvm_vmm_info = NULL; 1797 kvm_vmm_info = NULL;
1798 } 1798 }
1799 1799
1800 static int kvm_ia64_sync_dirty_log(struct kvm *kvm, 1800 static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1801 struct kvm_dirty_log *log) 1801 struct kvm_dirty_log *log)
1802 { 1802 {
1803 struct kvm_memory_slot *memslot; 1803 struct kvm_memory_slot *memslot;
1804 int r, i; 1804 int r, i;
1805 long n, base; 1805 long base;
1806 unsigned long n;
1806 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + 1807 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
1807 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); 1808 offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
1808 1809
1809 r = -EINVAL; 1810 r = -EINVAL;
1810 if (log->slot >= KVM_MEMORY_SLOTS) 1811 if (log->slot >= KVM_MEMORY_SLOTS)
1811 goto out; 1812 goto out;
1812 1813
1813 memslot = &kvm->memslots->memslots[log->slot]; 1814 memslot = &kvm->memslots->memslots[log->slot];
1814 r = -ENOENT; 1815 r = -ENOENT;
1815 if (!memslot->dirty_bitmap) 1816 if (!memslot->dirty_bitmap)
1816 goto out; 1817 goto out;
1817 1818
1818 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1819 n = kvm_dirty_bitmap_bytes(memslot);
1819 base = memslot->base_gfn / BITS_PER_LONG; 1820 base = memslot->base_gfn / BITS_PER_LONG;
1820 1821
1821 for (i = 0; i < n/sizeof(long); ++i) { 1822 for (i = 0; i < n/sizeof(long); ++i) {
1822 memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; 1823 memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
1823 dirty_bitmap[base + i] = 0; 1824 dirty_bitmap[base + i] = 0;
1824 } 1825 }
1825 r = 0; 1826 r = 0;
1826 out: 1827 out:
1827 return r; 1828 return r;
1828 } 1829 }
1829 1830
1830 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 1831 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1831 struct kvm_dirty_log *log) 1832 struct kvm_dirty_log *log)
1832 { 1833 {
1833 int r; 1834 int r;
1834 int n; 1835 unsigned long n;
1835 struct kvm_memory_slot *memslot; 1836 struct kvm_memory_slot *memslot;
1836 int is_dirty = 0; 1837 int is_dirty = 0;
1837 1838
1838 mutex_lock(&kvm->slots_lock); 1839 mutex_lock(&kvm->slots_lock);
1839 spin_lock(&kvm->arch.dirty_log_lock); 1840 spin_lock(&kvm->arch.dirty_log_lock);
1840 1841
1841 r = kvm_ia64_sync_dirty_log(kvm, log); 1842 r = kvm_ia64_sync_dirty_log(kvm, log);
1842 if (r) 1843 if (r)
1843 goto out; 1844 goto out;
1844 1845
1845 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1846 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1846 if (r) 1847 if (r)
1847 goto out; 1848 goto out;
1848 1849
1849 /* If nothing is dirty, don't bother messing with page tables. */ 1850 /* If nothing is dirty, don't bother messing with page tables. */
1850 if (is_dirty) { 1851 if (is_dirty) {
1851 kvm_flush_remote_tlbs(kvm); 1852 kvm_flush_remote_tlbs(kvm);
1852 memslot = &kvm->memslots->memslots[log->slot]; 1853 memslot = &kvm->memslots->memslots[log->slot];
1853 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1854 n = kvm_dirty_bitmap_bytes(memslot);
1854 memset(memslot->dirty_bitmap, 0, n); 1855 memset(memslot->dirty_bitmap, 0, n);
1855 } 1856 }
1856 r = 0; 1857 r = 0;
1857 out: 1858 out:
1858 mutex_unlock(&kvm->slots_lock); 1859 mutex_unlock(&kvm->slots_lock);
1859 spin_unlock(&kvm->arch.dirty_log_lock); 1860 spin_unlock(&kvm->arch.dirty_log_lock);
1860 return r; 1861 return r;
1861 } 1862 }
1862 1863
1863 int kvm_arch_hardware_setup(void) 1864 int kvm_arch_hardware_setup(void)
1864 { 1865 {
1865 return 0; 1866 return 0;
1866 } 1867 }
1867 1868
1868 void kvm_arch_hardware_unsetup(void) 1869 void kvm_arch_hardware_unsetup(void)
1869 { 1870 {
1870 } 1871 }
1871 1872
1872 void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 1873 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
1873 { 1874 {
1874 int me; 1875 int me;
1875 int cpu = vcpu->cpu; 1876 int cpu = vcpu->cpu;
1876 1877
1877 if (waitqueue_active(&vcpu->wq)) 1878 if (waitqueue_active(&vcpu->wq))
1878 wake_up_interruptible(&vcpu->wq); 1879 wake_up_interruptible(&vcpu->wq);
1879 1880
1880 me = get_cpu(); 1881 me = get_cpu();
1881 if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu)) 1882 if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu))
1882 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) 1883 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
1883 smp_send_reschedule(cpu); 1884 smp_send_reschedule(cpu);
1884 put_cpu(); 1885 put_cpu();
1885 } 1886 }
1886 1887
1887 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) 1888 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
1888 { 1889 {
1889 return __apic_accept_irq(vcpu, irq->vector); 1890 return __apic_accept_irq(vcpu, irq->vector);
1890 } 1891 }
1891 1892
1892 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) 1893 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
1893 { 1894 {
1894 return apic->vcpu->vcpu_id == dest; 1895 return apic->vcpu->vcpu_id == dest;
1895 } 1896 }
1896 1897
1897 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) 1898 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
1898 { 1899 {
1899 return 0; 1900 return 0;
1900 } 1901 }
1901 1902
1902 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) 1903 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1903 { 1904 {
1904 return vcpu1->arch.xtp - vcpu2->arch.xtp; 1905 return vcpu1->arch.xtp - vcpu2->arch.xtp;
1905 } 1906 }
1906 1907
1907 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 1908 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
1908 int short_hand, int dest, int dest_mode) 1909 int short_hand, int dest, int dest_mode)
1909 { 1910 {
1910 struct kvm_lapic *target = vcpu->arch.apic; 1911 struct kvm_lapic *target = vcpu->arch.apic;
1911 return (dest_mode == 0) ? 1912 return (dest_mode == 0) ?
1912 kvm_apic_match_physical_addr(target, dest) : 1913 kvm_apic_match_physical_addr(target, dest) :
1913 kvm_apic_match_logical_addr(target, dest); 1914 kvm_apic_match_logical_addr(target, dest);
1914 } 1915 }
1915 1916
1916 static int find_highest_bits(int *dat) 1917 static int find_highest_bits(int *dat)
1917 { 1918 {
1918 u32 bits, bitnum; 1919 u32 bits, bitnum;
1919 int i; 1920 int i;
1920 1921
1921 /* loop for all 256 bits */ 1922 /* loop for all 256 bits */
1922 for (i = 7; i >= 0 ; i--) { 1923 for (i = 7; i >= 0 ; i--) {
1923 bits = dat[i]; 1924 bits = dat[i];
1924 if (bits) { 1925 if (bits) {
1925 bitnum = fls(bits); 1926 bitnum = fls(bits);
1926 return i * 32 + bitnum - 1; 1927 return i * 32 + bitnum - 1;
1927 } 1928 }
1928 } 1929 }
1929 1930
1930 return -1; 1931 return -1;
1931 } 1932 }
1932 1933
1933 int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) 1934 int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
1934 { 1935 {
1935 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); 1936 struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
1936 1937
1937 if (vpd->irr[0] & (1UL << NMI_VECTOR)) 1938 if (vpd->irr[0] & (1UL << NMI_VECTOR))
1938 return NMI_VECTOR; 1939 return NMI_VECTOR;
1939 if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) 1940 if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
1940 return ExtINT_VECTOR; 1941 return ExtINT_VECTOR;
1941 1942
1942 return find_highest_bits((int *)&vpd->irr[0]); 1943 return find_highest_bits((int *)&vpd->irr[0]);
1943 } 1944 }
1944 1945
1945 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 1946 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
1946 { 1947 {
1947 return vcpu->arch.timer_fired; 1948 return vcpu->arch.timer_fired;
1948 } 1949 }
1949 1950
1950 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 1951 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
1951 { 1952 {
1952 return gfn; 1953 return gfn;
1953 } 1954 }
1954 1955
1955 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 1956 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1956 { 1957 {
1957 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || 1958 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
1958 (kvm_highest_pending_irq(vcpu) != -1); 1959 (kvm_highest_pending_irq(vcpu) != -1);
1959 } 1960 }
1960 1961
1961 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 1962 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1962 struct kvm_mp_state *mp_state) 1963 struct kvm_mp_state *mp_state)
1963 { 1964 {
1964 vcpu_load(vcpu); 1965 vcpu_load(vcpu);
1965 mp_state->mp_state = vcpu->arch.mp_state; 1966 mp_state->mp_state = vcpu->arch.mp_state;
1966 vcpu_put(vcpu); 1967 vcpu_put(vcpu);
1967 return 0; 1968 return 0;
1968 } 1969 }
1969 1970
1970 static int vcpu_reset(struct kvm_vcpu *vcpu) 1971 static int vcpu_reset(struct kvm_vcpu *vcpu)
1971 { 1972 {
1972 int r; 1973 int r;
1973 long psr; 1974 long psr;
1974 local_irq_save(psr); 1975 local_irq_save(psr);
1975 r = kvm_insert_vmm_mapping(vcpu); 1976 r = kvm_insert_vmm_mapping(vcpu);
1976 local_irq_restore(psr); 1977 local_irq_restore(psr);
1977 if (r) 1978 if (r)
1978 goto fail; 1979 goto fail;
1979 1980
1980 vcpu->arch.launched = 0; 1981 vcpu->arch.launched = 0;
1981 kvm_arch_vcpu_uninit(vcpu); 1982 kvm_arch_vcpu_uninit(vcpu);
1982 r = kvm_arch_vcpu_init(vcpu); 1983 r = kvm_arch_vcpu_init(vcpu);
1983 if (r) 1984 if (r)
1984 goto fail; 1985 goto fail;
1985 1986
1986 kvm_purge_vmm_mapping(vcpu); 1987 kvm_purge_vmm_mapping(vcpu);
1987 r = 0; 1988 r = 0;
1988 fail: 1989 fail:
1989 return r; 1990 return r;
1990 } 1991 }
1991 1992
1992 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 1993 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1993 struct kvm_mp_state *mp_state) 1994 struct kvm_mp_state *mp_state)
1994 { 1995 {
1995 int r = 0; 1996 int r = 0;
1996 1997
1997 vcpu_load(vcpu); 1998 vcpu_load(vcpu);
1998 vcpu->arch.mp_state = mp_state->mp_state; 1999 vcpu->arch.mp_state = mp_state->mp_state;
1999 if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) 2000 if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
2000 r = vcpu_reset(vcpu); 2001 r = vcpu_reset(vcpu);
2001 vcpu_put(vcpu); 2002 vcpu_put(vcpu);
2002 return r; 2003 return r;
2003 } 2004 }
2004 2005
arch/powerpc/kvm/book3s.c
1 /* 1 /*
2 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. 2 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
3 * 3 *
4 * Authors: 4 * Authors:
5 * Alexander Graf <agraf@suse.de> 5 * Alexander Graf <agraf@suse.de>
6 * Kevin Wolf <mail@kevin-wolf.de> 6 * Kevin Wolf <mail@kevin-wolf.de>
7 * 7 *
8 * Description: 8 * Description:
9 * This file is derived from arch/powerpc/kvm/44x.c, 9 * This file is derived from arch/powerpc/kvm/44x.c,
10 * by Hollis Blanchard <hollisb@us.ibm.com>. 10 * by Hollis Blanchard <hollisb@us.ibm.com>.
11 * 11 *
12 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License, version 2, as 13 * it under the terms of the GNU General Public License, version 2, as
14 * published by the Free Software Foundation. 14 * published by the Free Software Foundation.
15 */ 15 */
16 16
17 #include <linux/kvm_host.h> 17 #include <linux/kvm_host.h>
18 #include <linux/err.h> 18 #include <linux/err.h>
19 19
20 #include <asm/reg.h> 20 #include <asm/reg.h>
21 #include <asm/cputable.h> 21 #include <asm/cputable.h>
22 #include <asm/cacheflush.h> 22 #include <asm/cacheflush.h>
23 #include <asm/tlbflush.h> 23 #include <asm/tlbflush.h>
24 #include <asm/uaccess.h> 24 #include <asm/uaccess.h>
25 #include <asm/io.h> 25 #include <asm/io.h>
26 #include <asm/kvm_ppc.h> 26 #include <asm/kvm_ppc.h>
27 #include <asm/kvm_book3s.h> 27 #include <asm/kvm_book3s.h>
28 #include <asm/mmu_context.h> 28 #include <asm/mmu_context.h>
29 #include <linux/gfp.h> 29 #include <linux/gfp.h>
30 #include <linux/sched.h> 30 #include <linux/sched.h>
31 #include <linux/vmalloc.h> 31 #include <linux/vmalloc.h>
32 32
33 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 33 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
34 34
35 /* #define EXIT_DEBUG */ 35 /* #define EXIT_DEBUG */
36 /* #define EXIT_DEBUG_SIMPLE */ 36 /* #define EXIT_DEBUG_SIMPLE */
37 /* #define DEBUG_EXT */ 37 /* #define DEBUG_EXT */
38 38
39 static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); 39 static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
40 40
41 struct kvm_stats_debugfs_item debugfs_entries[] = { 41 struct kvm_stats_debugfs_item debugfs_entries[] = {
42 { "exits", VCPU_STAT(sum_exits) }, 42 { "exits", VCPU_STAT(sum_exits) },
43 { "mmio", VCPU_STAT(mmio_exits) }, 43 { "mmio", VCPU_STAT(mmio_exits) },
44 { "sig", VCPU_STAT(signal_exits) }, 44 { "sig", VCPU_STAT(signal_exits) },
45 { "sysc", VCPU_STAT(syscall_exits) }, 45 { "sysc", VCPU_STAT(syscall_exits) },
46 { "inst_emu", VCPU_STAT(emulated_inst_exits) }, 46 { "inst_emu", VCPU_STAT(emulated_inst_exits) },
47 { "dec", VCPU_STAT(dec_exits) }, 47 { "dec", VCPU_STAT(dec_exits) },
48 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 48 { "ext_intr", VCPU_STAT(ext_intr_exits) },
49 { "queue_intr", VCPU_STAT(queue_intr) }, 49 { "queue_intr", VCPU_STAT(queue_intr) },
50 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 50 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
51 { "pf_storage", VCPU_STAT(pf_storage) }, 51 { "pf_storage", VCPU_STAT(pf_storage) },
52 { "sp_storage", VCPU_STAT(sp_storage) }, 52 { "sp_storage", VCPU_STAT(sp_storage) },
53 { "pf_instruc", VCPU_STAT(pf_instruc) }, 53 { "pf_instruc", VCPU_STAT(pf_instruc) },
54 { "sp_instruc", VCPU_STAT(sp_instruc) }, 54 { "sp_instruc", VCPU_STAT(sp_instruc) },
55 { "ld", VCPU_STAT(ld) }, 55 { "ld", VCPU_STAT(ld) },
56 { "ld_slow", VCPU_STAT(ld_slow) }, 56 { "ld_slow", VCPU_STAT(ld_slow) },
57 { "st", VCPU_STAT(st) }, 57 { "st", VCPU_STAT(st) },
58 { "st_slow", VCPU_STAT(st_slow) }, 58 { "st_slow", VCPU_STAT(st_slow) },
59 { NULL } 59 { NULL }
60 }; 60 };
61 61
62 void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) 62 void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
63 { 63 {
64 } 64 }
65 65
66 void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) 66 void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
67 { 67 {
68 } 68 }
69 69
70 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 70 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
71 { 71 {
72 memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); 72 memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
73 memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu, 73 memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu,
74 sizeof(get_paca()->shadow_vcpu)); 74 sizeof(get_paca()->shadow_vcpu));
75 get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; 75 get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
76 } 76 }
77 77
78 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 78 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
79 { 79 {
80 memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); 80 memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
81 memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 81 memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
82 sizeof(get_paca()->shadow_vcpu)); 82 sizeof(get_paca()->shadow_vcpu));
83 to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; 83 to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
84 84
85 kvmppc_giveup_ext(vcpu, MSR_FP); 85 kvmppc_giveup_ext(vcpu, MSR_FP);
86 kvmppc_giveup_ext(vcpu, MSR_VEC); 86 kvmppc_giveup_ext(vcpu, MSR_VEC);
87 kvmppc_giveup_ext(vcpu, MSR_VSX); 87 kvmppc_giveup_ext(vcpu, MSR_VSX);
88 } 88 }
89 89
90 #if defined(EXIT_DEBUG) 90 #if defined(EXIT_DEBUG)
91 static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) 91 static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
92 { 92 {
93 u64 jd = mftb() - vcpu->arch.dec_jiffies; 93 u64 jd = mftb() - vcpu->arch.dec_jiffies;
94 return vcpu->arch.dec - jd; 94 return vcpu->arch.dec - jd;
95 } 95 }
96 #endif 96 #endif
97 97
98 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) 98 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
99 { 99 {
100 vcpu->arch.shadow_msr = vcpu->arch.msr; 100 vcpu->arch.shadow_msr = vcpu->arch.msr;
101 /* Guest MSR values */ 101 /* Guest MSR values */
102 vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | 102 vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
103 MSR_BE | MSR_DE; 103 MSR_BE | MSR_DE;
104 /* Process MSR values */ 104 /* Process MSR values */
105 vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | 105 vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
106 MSR_EE; 106 MSR_EE;
107 /* External providers the guest reserved */ 107 /* External providers the guest reserved */
108 vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext); 108 vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
109 /* 64-bit Process MSR values */ 109 /* 64-bit Process MSR values */
110 #ifdef CONFIG_PPC_BOOK3S_64 110 #ifdef CONFIG_PPC_BOOK3S_64
111 vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV; 111 vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
112 #endif 112 #endif
113 } 113 }
114 114
115 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 115 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
116 { 116 {
117 ulong old_msr = vcpu->arch.msr; 117 ulong old_msr = vcpu->arch.msr;
118 118
119 #ifdef EXIT_DEBUG 119 #ifdef EXIT_DEBUG
120 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); 120 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
121 #endif 121 #endif
122 122
123 msr &= to_book3s(vcpu)->msr_mask; 123 msr &= to_book3s(vcpu)->msr_mask;
124 vcpu->arch.msr = msr; 124 vcpu->arch.msr = msr;
125 kvmppc_recalc_shadow_msr(vcpu); 125 kvmppc_recalc_shadow_msr(vcpu);
126 126
127 if (msr & (MSR_WE|MSR_POW)) { 127 if (msr & (MSR_WE|MSR_POW)) {
128 if (!vcpu->arch.pending_exceptions) { 128 if (!vcpu->arch.pending_exceptions) {
129 kvm_vcpu_block(vcpu); 129 kvm_vcpu_block(vcpu);
130 vcpu->stat.halt_wakeup++; 130 vcpu->stat.halt_wakeup++;
131 } 131 }
132 } 132 }
133 133
134 if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) || 134 if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) ||
135 (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) { 135 (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) {
136 kvmppc_mmu_flush_segments(vcpu); 136 kvmppc_mmu_flush_segments(vcpu);
137 kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); 137 kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc);
138 } 138 }
139 } 139 }
140 140
141 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) 141 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
142 { 142 {
143 vcpu->arch.srr0 = vcpu->arch.pc; 143 vcpu->arch.srr0 = vcpu->arch.pc;
144 vcpu->arch.srr1 = vcpu->arch.msr | flags; 144 vcpu->arch.srr1 = vcpu->arch.msr | flags;
145 vcpu->arch.pc = to_book3s(vcpu)->hior + vec; 145 vcpu->arch.pc = to_book3s(vcpu)->hior + vec;
146 vcpu->arch.mmu.reset_msr(vcpu); 146 vcpu->arch.mmu.reset_msr(vcpu);
147 } 147 }
148 148
149 static int kvmppc_book3s_vec2irqprio(unsigned int vec) 149 static int kvmppc_book3s_vec2irqprio(unsigned int vec)
150 { 150 {
151 unsigned int prio; 151 unsigned int prio;
152 152
153 switch (vec) { 153 switch (vec) {
154 case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break; 154 case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break;
155 case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break; 155 case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break;
156 case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE; break; 156 case 0x300: prio = BOOK3S_IRQPRIO_DATA_STORAGE; break;
157 case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT; break; 157 case 0x380: prio = BOOK3S_IRQPRIO_DATA_SEGMENT; break;
158 case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; 158 case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
159 case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; 159 case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
160 case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; 160 case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
161 case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; 161 case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
162 case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; 162 case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
163 case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; 163 case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
164 case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER; break; 164 case 0x900: prio = BOOK3S_IRQPRIO_DECREMENTER; break;
165 case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL; break; 165 case 0xc00: prio = BOOK3S_IRQPRIO_SYSCALL; break;
166 case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break; 166 case 0xd00: prio = BOOK3S_IRQPRIO_DEBUG; break;
167 case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break; 167 case 0xf20: prio = BOOK3S_IRQPRIO_ALTIVEC; break;
168 case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break; 168 case 0xf40: prio = BOOK3S_IRQPRIO_VSX; break;
169 default: prio = BOOK3S_IRQPRIO_MAX; break; 169 default: prio = BOOK3S_IRQPRIO_MAX; break;
170 } 170 }
171 171
172 return prio; 172 return prio;
173 } 173 }
174 174
175 static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, 175 static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
176 unsigned int vec) 176 unsigned int vec)
177 { 177 {
178 clear_bit(kvmppc_book3s_vec2irqprio(vec), 178 clear_bit(kvmppc_book3s_vec2irqprio(vec),
179 &vcpu->arch.pending_exceptions); 179 &vcpu->arch.pending_exceptions);
180 } 180 }
181 181
182 void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) 182 void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
183 { 183 {
184 vcpu->stat.queue_intr++; 184 vcpu->stat.queue_intr++;
185 185
186 set_bit(kvmppc_book3s_vec2irqprio(vec), 186 set_bit(kvmppc_book3s_vec2irqprio(vec),
187 &vcpu->arch.pending_exceptions); 187 &vcpu->arch.pending_exceptions);
188 #ifdef EXIT_DEBUG 188 #ifdef EXIT_DEBUG
189 printk(KERN_INFO "Queueing interrupt %x\n", vec); 189 printk(KERN_INFO "Queueing interrupt %x\n", vec);
190 #endif 190 #endif
191 } 191 }
192 192
193 193
194 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) 194 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
195 { 195 {
196 to_book3s(vcpu)->prog_flags = flags; 196 to_book3s(vcpu)->prog_flags = flags;
197 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); 197 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
198 } 198 }
199 199
200 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 200 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
201 { 201 {
202 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 202 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
203 } 203 }
204 204
205 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) 205 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
206 { 206 {
207 return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); 207 return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
208 } 208 }
209 209
210 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) 210 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
211 { 211 {
212 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); 212 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
213 } 213 }
214 214
215 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 215 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
216 struct kvm_interrupt *irq) 216 struct kvm_interrupt *irq)
217 { 217 {
218 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 218 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
219 } 219 }
220 220
221 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) 221 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
222 { 222 {
223 int deliver = 1; 223 int deliver = 1;
224 int vec = 0; 224 int vec = 0;
225 ulong flags = 0ULL; 225 ulong flags = 0ULL;
226 226
227 switch (priority) { 227 switch (priority) {
228 case BOOK3S_IRQPRIO_DECREMENTER: 228 case BOOK3S_IRQPRIO_DECREMENTER:
229 deliver = vcpu->arch.msr & MSR_EE; 229 deliver = vcpu->arch.msr & MSR_EE;
230 vec = BOOK3S_INTERRUPT_DECREMENTER; 230 vec = BOOK3S_INTERRUPT_DECREMENTER;
231 break; 231 break;
232 case BOOK3S_IRQPRIO_EXTERNAL: 232 case BOOK3S_IRQPRIO_EXTERNAL:
233 deliver = vcpu->arch.msr & MSR_EE; 233 deliver = vcpu->arch.msr & MSR_EE;
234 vec = BOOK3S_INTERRUPT_EXTERNAL; 234 vec = BOOK3S_INTERRUPT_EXTERNAL;
235 break; 235 break;
236 case BOOK3S_IRQPRIO_SYSTEM_RESET: 236 case BOOK3S_IRQPRIO_SYSTEM_RESET:
237 vec = BOOK3S_INTERRUPT_SYSTEM_RESET; 237 vec = BOOK3S_INTERRUPT_SYSTEM_RESET;
238 break; 238 break;
239 case BOOK3S_IRQPRIO_MACHINE_CHECK: 239 case BOOK3S_IRQPRIO_MACHINE_CHECK:
240 vec = BOOK3S_INTERRUPT_MACHINE_CHECK; 240 vec = BOOK3S_INTERRUPT_MACHINE_CHECK;
241 break; 241 break;
242 case BOOK3S_IRQPRIO_DATA_STORAGE: 242 case BOOK3S_IRQPRIO_DATA_STORAGE:
243 vec = BOOK3S_INTERRUPT_DATA_STORAGE; 243 vec = BOOK3S_INTERRUPT_DATA_STORAGE;
244 break; 244 break;
245 case BOOK3S_IRQPRIO_INST_STORAGE: 245 case BOOK3S_IRQPRIO_INST_STORAGE:
246 vec = BOOK3S_INTERRUPT_INST_STORAGE; 246 vec = BOOK3S_INTERRUPT_INST_STORAGE;
247 break; 247 break;
248 case BOOK3S_IRQPRIO_DATA_SEGMENT: 248 case BOOK3S_IRQPRIO_DATA_SEGMENT:
249 vec = BOOK3S_INTERRUPT_DATA_SEGMENT; 249 vec = BOOK3S_INTERRUPT_DATA_SEGMENT;
250 break; 250 break;
251 case BOOK3S_IRQPRIO_INST_SEGMENT: 251 case BOOK3S_IRQPRIO_INST_SEGMENT:
252 vec = BOOK3S_INTERRUPT_INST_SEGMENT; 252 vec = BOOK3S_INTERRUPT_INST_SEGMENT;
253 break; 253 break;
254 case BOOK3S_IRQPRIO_ALIGNMENT: 254 case BOOK3S_IRQPRIO_ALIGNMENT:
255 vec = BOOK3S_INTERRUPT_ALIGNMENT; 255 vec = BOOK3S_INTERRUPT_ALIGNMENT;
256 break; 256 break;
257 case BOOK3S_IRQPRIO_PROGRAM: 257 case BOOK3S_IRQPRIO_PROGRAM:
258 vec = BOOK3S_INTERRUPT_PROGRAM; 258 vec = BOOK3S_INTERRUPT_PROGRAM;
259 flags = to_book3s(vcpu)->prog_flags; 259 flags = to_book3s(vcpu)->prog_flags;
260 break; 260 break;
261 case BOOK3S_IRQPRIO_VSX: 261 case BOOK3S_IRQPRIO_VSX:
262 vec = BOOK3S_INTERRUPT_VSX; 262 vec = BOOK3S_INTERRUPT_VSX;
263 break; 263 break;
264 case BOOK3S_IRQPRIO_ALTIVEC: 264 case BOOK3S_IRQPRIO_ALTIVEC:
265 vec = BOOK3S_INTERRUPT_ALTIVEC; 265 vec = BOOK3S_INTERRUPT_ALTIVEC;
266 break; 266 break;
267 case BOOK3S_IRQPRIO_FP_UNAVAIL: 267 case BOOK3S_IRQPRIO_FP_UNAVAIL:
268 vec = BOOK3S_INTERRUPT_FP_UNAVAIL; 268 vec = BOOK3S_INTERRUPT_FP_UNAVAIL;
269 break; 269 break;
270 case BOOK3S_IRQPRIO_SYSCALL: 270 case BOOK3S_IRQPRIO_SYSCALL:
271 vec = BOOK3S_INTERRUPT_SYSCALL; 271 vec = BOOK3S_INTERRUPT_SYSCALL;
272 break; 272 break;
273 case BOOK3S_IRQPRIO_DEBUG: 273 case BOOK3S_IRQPRIO_DEBUG:
274 vec = BOOK3S_INTERRUPT_TRACE; 274 vec = BOOK3S_INTERRUPT_TRACE;
275 break; 275 break;
276 case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR: 276 case BOOK3S_IRQPRIO_PERFORMANCE_MONITOR:
277 vec = BOOK3S_INTERRUPT_PERFMON; 277 vec = BOOK3S_INTERRUPT_PERFMON;
278 break; 278 break;
279 default: 279 default:
280 deliver = 0; 280 deliver = 0;
281 printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority); 281 printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
282 break; 282 break;
283 } 283 }
284 284
285 #if 0 285 #if 0
286 printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver); 286 printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver);
287 #endif 287 #endif
288 288
289 if (deliver) 289 if (deliver)
290 kvmppc_inject_interrupt(vcpu, vec, flags); 290 kvmppc_inject_interrupt(vcpu, vec, flags);
291 291
292 return deliver; 292 return deliver;
293 } 293 }
294 294
295 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) 295 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
296 { 296 {
297 unsigned long *pending = &vcpu->arch.pending_exceptions; 297 unsigned long *pending = &vcpu->arch.pending_exceptions;
298 unsigned int priority; 298 unsigned int priority;
299 299
300 #ifdef EXIT_DEBUG 300 #ifdef EXIT_DEBUG
301 if (vcpu->arch.pending_exceptions) 301 if (vcpu->arch.pending_exceptions)
302 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); 302 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
303 #endif 303 #endif
304 priority = __ffs(*pending); 304 priority = __ffs(*pending);
305 while (priority <= (sizeof(unsigned int) * 8)) { 305 while (priority <= (sizeof(unsigned int) * 8)) {
306 if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && 306 if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
307 (priority != BOOK3S_IRQPRIO_DECREMENTER)) { 307 (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
308 /* DEC interrupts get cleared by mtdec */ 308 /* DEC interrupts get cleared by mtdec */
309 clear_bit(priority, &vcpu->arch.pending_exceptions); 309 clear_bit(priority, &vcpu->arch.pending_exceptions);
310 break; 310 break;
311 } 311 }
312 312
313 priority = find_next_bit(pending, 313 priority = find_next_bit(pending,
314 BITS_PER_BYTE * sizeof(*pending), 314 BITS_PER_BYTE * sizeof(*pending),
315 priority + 1); 315 priority + 1);
316 } 316 }
317 } 317 }
318 318
319 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 319 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
320 { 320 {
321 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; 321 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
322 vcpu->arch.pvr = pvr; 322 vcpu->arch.pvr = pvr;
323 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { 323 if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
324 kvmppc_mmu_book3s_64_init(vcpu); 324 kvmppc_mmu_book3s_64_init(vcpu);
325 to_book3s(vcpu)->hior = 0xfff00000; 325 to_book3s(vcpu)->hior = 0xfff00000;
326 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; 326 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
327 } else { 327 } else {
328 kvmppc_mmu_book3s_32_init(vcpu); 328 kvmppc_mmu_book3s_32_init(vcpu);
329 to_book3s(vcpu)->hior = 0; 329 to_book3s(vcpu)->hior = 0;
330 to_book3s(vcpu)->msr_mask = 0xffffffffULL; 330 to_book3s(vcpu)->msr_mask = 0xffffffffULL;
331 } 331 }
332 332
333 /* If we are in hypervisor level on 970, we can tell the CPU to 333 /* If we are in hypervisor level on 970, we can tell the CPU to
334 * treat DCBZ as 32 bytes store */ 334 * treat DCBZ as 32 bytes store */
335 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; 335 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
336 if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) && 336 if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
337 !strcmp(cur_cpu_spec->platform, "ppc970")) 337 !strcmp(cur_cpu_spec->platform, "ppc970"))
338 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 338 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
339 339
340 } 340 }
341 341
342 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To 342 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
343 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to 343 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
344 * emulate 32 bytes dcbz length. 344 * emulate 32 bytes dcbz length.
345 * 345 *
346 * The Book3s_64 inventors also realized this case and implemented a special bit 346 * The Book3s_64 inventors also realized this case and implemented a special bit
347 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it. 347 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
348 * 348 *
349 * My approach here is to patch the dcbz instruction on executing pages. 349 * My approach here is to patch the dcbz instruction on executing pages.
350 */ 350 */
351 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 351 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
352 { 352 {
353 bool touched = false; 353 bool touched = false;
354 hva_t hpage; 354 hva_t hpage;
355 u32 *page; 355 u32 *page;
356 int i; 356 int i;
357 357
358 hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 358 hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
359 if (kvm_is_error_hva(hpage)) 359 if (kvm_is_error_hva(hpage))
360 return; 360 return;
361 361
362 hpage |= pte->raddr & ~PAGE_MASK; 362 hpage |= pte->raddr & ~PAGE_MASK;
363 hpage &= ~0xFFFULL; 363 hpage &= ~0xFFFULL;
364 364
365 page = vmalloc(HW_PAGE_SIZE); 365 page = vmalloc(HW_PAGE_SIZE);
366 366
367 if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE)) 367 if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE))
368 goto out; 368 goto out;
369 369
370 for (i=0; i < HW_PAGE_SIZE / 4; i++) 370 for (i=0; i < HW_PAGE_SIZE / 4; i++)
371 if ((page[i] & 0xff0007ff) == INS_DCBZ) { 371 if ((page[i] & 0xff0007ff) == INS_DCBZ) {
372 page[i] &= 0xfffffff7; // reserved instruction, so we trap 372 page[i] &= 0xfffffff7; // reserved instruction, so we trap
373 touched = true; 373 touched = true;
374 } 374 }
375 375
376 if (touched) 376 if (touched)
377 copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); 377 copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE);
378 378
379 out: 379 out:
380 vfree(page); 380 vfree(page);
381 } 381 }
382 382
383 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, 383 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
384 struct kvmppc_pte *pte) 384 struct kvmppc_pte *pte)
385 { 385 {
386 int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR)); 386 int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR));
387 int r; 387 int r;
388 388
389 if (relocated) { 389 if (relocated) {
390 r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data); 390 r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
391 } else { 391 } else {
392 pte->eaddr = eaddr; 392 pte->eaddr = eaddr;
393 pte->raddr = eaddr & 0xffffffff; 393 pte->raddr = eaddr & 0xffffffff;
394 pte->vpage = eaddr >> 12; 394 pte->vpage = eaddr >> 12;
395 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 395 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
396 case 0: 396 case 0:
397 pte->vpage |= VSID_REAL; 397 pte->vpage |= VSID_REAL;
398 case MSR_DR: 398 case MSR_DR:
399 pte->vpage |= VSID_REAL_DR; 399 pte->vpage |= VSID_REAL_DR;
400 case MSR_IR: 400 case MSR_IR:
401 pte->vpage |= VSID_REAL_IR; 401 pte->vpage |= VSID_REAL_IR;
402 } 402 }
403 pte->may_read = true; 403 pte->may_read = true;
404 pte->may_write = true; 404 pte->may_write = true;
405 pte->may_execute = true; 405 pte->may_execute = true;
406 r = 0; 406 r = 0;
407 } 407 }
408 408
409 return r; 409 return r;
410 } 410 }
411 411
412 static hva_t kvmppc_bad_hva(void) 412 static hva_t kvmppc_bad_hva(void)
413 { 413 {
414 return PAGE_OFFSET; 414 return PAGE_OFFSET;
415 } 415 }
416 416
417 static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte, 417 static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
418 bool read) 418 bool read)
419 { 419 {
420 hva_t hpage; 420 hva_t hpage;
421 421
422 if (read && !pte->may_read) 422 if (read && !pte->may_read)
423 goto err; 423 goto err;
424 424
425 if (!read && !pte->may_write) 425 if (!read && !pte->may_write)
426 goto err; 426 goto err;
427 427
428 hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 428 hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
429 if (kvm_is_error_hva(hpage)) 429 if (kvm_is_error_hva(hpage))
430 goto err; 430 goto err;
431 431
432 return hpage | (pte->raddr & ~PAGE_MASK); 432 return hpage | (pte->raddr & ~PAGE_MASK);
433 err: 433 err:
434 return kvmppc_bad_hva(); 434 return kvmppc_bad_hva();
435 } 435 }
436 436
437 int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr) 437 int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr)
438 { 438 {
439 struct kvmppc_pte pte; 439 struct kvmppc_pte pte;
440 hva_t hva = eaddr; 440 hva_t hva = eaddr;
441 441
442 vcpu->stat.st++; 442 vcpu->stat.st++;
443 443
444 if (kvmppc_xlate(vcpu, eaddr, false, &pte)) 444 if (kvmppc_xlate(vcpu, eaddr, false, &pte))
445 goto err; 445 goto err;
446 446
447 hva = kvmppc_pte_to_hva(vcpu, &pte, false); 447 hva = kvmppc_pte_to_hva(vcpu, &pte, false);
448 if (kvm_is_error_hva(hva)) 448 if (kvm_is_error_hva(hva))
449 goto err; 449 goto err;
450 450
451 if (copy_to_user((void __user *)hva, ptr, size)) { 451 if (copy_to_user((void __user *)hva, ptr, size)) {
452 printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); 452 printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva);
453 goto err; 453 goto err;
454 } 454 }
455 455
456 return 0; 456 return 0;
457 457
458 err: 458 err:
459 return -ENOENT; 459 return -ENOENT;
460 } 460 }
461 461
462 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, 462 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr,
463 bool data) 463 bool data)
464 { 464 {
465 struct kvmppc_pte pte; 465 struct kvmppc_pte pte;
466 hva_t hva = eaddr; 466 hva_t hva = eaddr;
467 467
468 vcpu->stat.ld++; 468 vcpu->stat.ld++;
469 469
470 if (kvmppc_xlate(vcpu, eaddr, data, &pte)) 470 if (kvmppc_xlate(vcpu, eaddr, data, &pte))
471 goto err; 471 goto err;
472 472
473 hva = kvmppc_pte_to_hva(vcpu, &pte, true); 473 hva = kvmppc_pte_to_hva(vcpu, &pte, true);
474 if (kvm_is_error_hva(hva)) 474 if (kvm_is_error_hva(hva))
475 goto err; 475 goto err;
476 476
477 if (copy_from_user(ptr, (void __user *)hva, size)) { 477 if (copy_from_user(ptr, (void __user *)hva, size)) {
478 printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); 478 printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
479 goto err; 479 goto err;
480 } 480 }
481 481
482 return 0; 482 return 0;
483 483
484 err: 484 err:
485 return -ENOENT; 485 return -ENOENT;
486 } 486 }
487 487
488 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) 488 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
489 { 489 {
490 return kvm_is_visible_gfn(vcpu->kvm, gfn); 490 return kvm_is_visible_gfn(vcpu->kvm, gfn);
491 } 491 }
492 492
493 int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, 493 int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
494 ulong eaddr, int vec) 494 ulong eaddr, int vec)
495 { 495 {
496 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); 496 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
497 int r = RESUME_GUEST; 497 int r = RESUME_GUEST;
498 int relocated; 498 int relocated;
499 int page_found = 0; 499 int page_found = 0;
500 struct kvmppc_pte pte; 500 struct kvmppc_pte pte;
501 bool is_mmio = false; 501 bool is_mmio = false;
502 502
503 if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) { 503 if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) {
504 relocated = (vcpu->arch.msr & MSR_DR); 504 relocated = (vcpu->arch.msr & MSR_DR);
505 } else { 505 } else {
506 relocated = (vcpu->arch.msr & MSR_IR); 506 relocated = (vcpu->arch.msr & MSR_IR);
507 } 507 }
508 508
509 /* Resolve real address if translation turned on */ 509 /* Resolve real address if translation turned on */
510 if (relocated) { 510 if (relocated) {
511 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data); 511 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
512 } else { 512 } else {
513 pte.may_execute = true; 513 pte.may_execute = true;
514 pte.may_read = true; 514 pte.may_read = true;
515 pte.may_write = true; 515 pte.may_write = true;
516 pte.raddr = eaddr & 0xffffffff; 516 pte.raddr = eaddr & 0xffffffff;
517 pte.eaddr = eaddr; 517 pte.eaddr = eaddr;
518 pte.vpage = eaddr >> 12; 518 pte.vpage = eaddr >> 12;
519 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 519 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
520 case 0: 520 case 0:
521 pte.vpage |= VSID_REAL; 521 pte.vpage |= VSID_REAL;
522 case MSR_DR: 522 case MSR_DR:
523 pte.vpage |= VSID_REAL_DR; 523 pte.vpage |= VSID_REAL_DR;
524 case MSR_IR: 524 case MSR_IR:
525 pte.vpage |= VSID_REAL_IR; 525 pte.vpage |= VSID_REAL_IR;
526 } 526 }
527 } 527 }
528 528
529 if (vcpu->arch.mmu.is_dcbz32(vcpu) && 529 if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
530 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 530 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
531 /* 531 /*
532 * If we do the dcbz hack, we have to NX on every execution, 532 * If we do the dcbz hack, we have to NX on every execution,
533 * so we can patch the executing code. This renders our guest 533 * so we can patch the executing code. This renders our guest
534 * NX-less. 534 * NX-less.
535 */ 535 */
536 pte.may_execute = !data; 536 pte.may_execute = !data;
537 } 537 }
538 538
539 if (page_found == -ENOENT) { 539 if (page_found == -ENOENT) {
540 /* Page not found in guest PTE entries */ 540 /* Page not found in guest PTE entries */
541 vcpu->arch.dear = vcpu->arch.fault_dear; 541 vcpu->arch.dear = vcpu->arch.fault_dear;
542 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; 542 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
543 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); 543 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
544 kvmppc_book3s_queue_irqprio(vcpu, vec); 544 kvmppc_book3s_queue_irqprio(vcpu, vec);
545 } else if (page_found == -EPERM) { 545 } else if (page_found == -EPERM) {
546 /* Storage protection */ 546 /* Storage protection */
547 vcpu->arch.dear = vcpu->arch.fault_dear; 547 vcpu->arch.dear = vcpu->arch.fault_dear;
548 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 548 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
549 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; 549 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
550 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); 550 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
551 kvmppc_book3s_queue_irqprio(vcpu, vec); 551 kvmppc_book3s_queue_irqprio(vcpu, vec);
552 } else if (page_found == -EINVAL) { 552 } else if (page_found == -EINVAL) {
553 /* Page not found in guest SLB */ 553 /* Page not found in guest SLB */
554 vcpu->arch.dear = vcpu->arch.fault_dear; 554 vcpu->arch.dear = vcpu->arch.fault_dear;
555 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 555 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
556 } else if (!is_mmio && 556 } else if (!is_mmio &&
557 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { 557 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
558 /* The guest's PTE is not mapped yet. Map on the host */ 558 /* The guest's PTE is not mapped yet. Map on the host */
559 kvmppc_mmu_map_page(vcpu, &pte); 559 kvmppc_mmu_map_page(vcpu, &pte);
560 if (data) 560 if (data)
561 vcpu->stat.sp_storage++; 561 vcpu->stat.sp_storage++;
562 else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 562 else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
563 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) 563 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
564 kvmppc_patch_dcbz(vcpu, &pte); 564 kvmppc_patch_dcbz(vcpu, &pte);
565 } else { 565 } else {
566 /* MMIO */ 566 /* MMIO */
567 vcpu->stat.mmio_exits++; 567 vcpu->stat.mmio_exits++;
568 vcpu->arch.paddr_accessed = pte.raddr; 568 vcpu->arch.paddr_accessed = pte.raddr;
569 r = kvmppc_emulate_mmio(run, vcpu); 569 r = kvmppc_emulate_mmio(run, vcpu);
570 if ( r == RESUME_HOST_NV ) 570 if ( r == RESUME_HOST_NV )
571 r = RESUME_HOST; 571 r = RESUME_HOST;
572 } 572 }
573 573
574 return r; 574 return r;
575 } 575 }
576 576
577 static inline int get_fpr_index(int i) 577 static inline int get_fpr_index(int i)
578 { 578 {
579 #ifdef CONFIG_VSX 579 #ifdef CONFIG_VSX
580 i *= 2; 580 i *= 2;
581 #endif 581 #endif
582 return i; 582 return i;
583 } 583 }
584 584
585 /* Give up external provider (FPU, Altivec, VSX) */ 585 /* Give up external provider (FPU, Altivec, VSX) */
586 static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) 586 static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
587 { 587 {
588 struct thread_struct *t = &current->thread; 588 struct thread_struct *t = &current->thread;
589 u64 *vcpu_fpr = vcpu->arch.fpr; 589 u64 *vcpu_fpr = vcpu->arch.fpr;
590 u64 *vcpu_vsx = vcpu->arch.vsr; 590 u64 *vcpu_vsx = vcpu->arch.vsr;
591 u64 *thread_fpr = (u64*)t->fpr; 591 u64 *thread_fpr = (u64*)t->fpr;
592 int i; 592 int i;
593 593
594 if (!(vcpu->arch.guest_owned_ext & msr)) 594 if (!(vcpu->arch.guest_owned_ext & msr))
595 return; 595 return;
596 596
597 #ifdef DEBUG_EXT 597 #ifdef DEBUG_EXT
598 printk(KERN_INFO "Giving up ext 0x%lx\n", msr); 598 printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
599 #endif 599 #endif
600 600
601 switch (msr) { 601 switch (msr) {
602 case MSR_FP: 602 case MSR_FP:
603 giveup_fpu(current); 603 giveup_fpu(current);
604 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 604 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
605 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; 605 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
606 606
607 vcpu->arch.fpscr = t->fpscr.val; 607 vcpu->arch.fpscr = t->fpscr.val;
608 break; 608 break;
609 case MSR_VEC: 609 case MSR_VEC:
610 #ifdef CONFIG_ALTIVEC 610 #ifdef CONFIG_ALTIVEC
611 giveup_altivec(current); 611 giveup_altivec(current);
612 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); 612 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
613 vcpu->arch.vscr = t->vscr; 613 vcpu->arch.vscr = t->vscr;
614 #endif 614 #endif
615 break; 615 break;
616 case MSR_VSX: 616 case MSR_VSX:
617 #ifdef CONFIG_VSX 617 #ifdef CONFIG_VSX
618 __giveup_vsx(current); 618 __giveup_vsx(current);
619 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) 619 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
620 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; 620 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
621 #endif 621 #endif
622 break; 622 break;
623 default: 623 default:
624 BUG(); 624 BUG();
625 } 625 }
626 626
627 vcpu->arch.guest_owned_ext &= ~msr; 627 vcpu->arch.guest_owned_ext &= ~msr;
628 current->thread.regs->msr &= ~msr; 628 current->thread.regs->msr &= ~msr;
629 kvmppc_recalc_shadow_msr(vcpu); 629 kvmppc_recalc_shadow_msr(vcpu);
630 } 630 }
631 631
632 /* Handle external providers (FPU, Altivec, VSX) */ 632 /* Handle external providers (FPU, Altivec, VSX) */
633 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 633 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
634 ulong msr) 634 ulong msr)
635 { 635 {
636 struct thread_struct *t = &current->thread; 636 struct thread_struct *t = &current->thread;
637 u64 *vcpu_fpr = vcpu->arch.fpr; 637 u64 *vcpu_fpr = vcpu->arch.fpr;
638 u64 *vcpu_vsx = vcpu->arch.vsr; 638 u64 *vcpu_vsx = vcpu->arch.vsr;
639 u64 *thread_fpr = (u64*)t->fpr; 639 u64 *thread_fpr = (u64*)t->fpr;
640 int i; 640 int i;
641 641
642 if (!(vcpu->arch.msr & msr)) { 642 if (!(vcpu->arch.msr & msr)) {
643 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 643 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
644 return RESUME_GUEST; 644 return RESUME_GUEST;
645 } 645 }
646 646
647 #ifdef DEBUG_EXT 647 #ifdef DEBUG_EXT
648 printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 648 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
649 #endif 649 #endif
650 650
651 current->thread.regs->msr |= msr; 651 current->thread.regs->msr |= msr;
652 652
653 switch (msr) { 653 switch (msr) {
654 case MSR_FP: 654 case MSR_FP:
655 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) 655 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
656 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; 656 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
657 657
658 t->fpscr.val = vcpu->arch.fpscr; 658 t->fpscr.val = vcpu->arch.fpscr;
659 t->fpexc_mode = 0; 659 t->fpexc_mode = 0;
660 kvmppc_load_up_fpu(); 660 kvmppc_load_up_fpu();
661 break; 661 break;
662 case MSR_VEC: 662 case MSR_VEC:
663 #ifdef CONFIG_ALTIVEC 663 #ifdef CONFIG_ALTIVEC
664 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); 664 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
665 t->vscr = vcpu->arch.vscr; 665 t->vscr = vcpu->arch.vscr;
666 t->vrsave = -1; 666 t->vrsave = -1;
667 kvmppc_load_up_altivec(); 667 kvmppc_load_up_altivec();
668 #endif 668 #endif
669 break; 669 break;
670 case MSR_VSX: 670 case MSR_VSX:
671 #ifdef CONFIG_VSX 671 #ifdef CONFIG_VSX
672 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) 672 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
673 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; 673 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
674 kvmppc_load_up_vsx(); 674 kvmppc_load_up_vsx();
675 #endif 675 #endif
676 break; 676 break;
677 default: 677 default:
678 BUG(); 678 BUG();
679 } 679 }
680 680
681 vcpu->arch.guest_owned_ext |= msr; 681 vcpu->arch.guest_owned_ext |= msr;
682 682
683 kvmppc_recalc_shadow_msr(vcpu); 683 kvmppc_recalc_shadow_msr(vcpu);
684 684
685 return RESUME_GUEST; 685 return RESUME_GUEST;
686 } 686 }
687 687
688 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 688 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
689 unsigned int exit_nr) 689 unsigned int exit_nr)
690 { 690 {
691 int r = RESUME_HOST; 691 int r = RESUME_HOST;
692 692
693 vcpu->stat.sum_exits++; 693 vcpu->stat.sum_exits++;
694 694
695 run->exit_reason = KVM_EXIT_UNKNOWN; 695 run->exit_reason = KVM_EXIT_UNKNOWN;
696 run->ready_for_interrupt_injection = 1; 696 run->ready_for_interrupt_injection = 1;
697 #ifdef EXIT_DEBUG 697 #ifdef EXIT_DEBUG
698 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", 698 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
699 exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, 699 exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
700 kvmppc_get_dec(vcpu), vcpu->arch.msr); 700 kvmppc_get_dec(vcpu), vcpu->arch.msr);
701 #elif defined (EXIT_DEBUG_SIMPLE) 701 #elif defined (EXIT_DEBUG_SIMPLE)
702 if ((exit_nr != 0x900) && (exit_nr != 0x500)) 702 if ((exit_nr != 0x900) && (exit_nr != 0x500))
703 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", 703 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
704 exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, 704 exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
705 vcpu->arch.msr); 705 vcpu->arch.msr);
706 #endif 706 #endif
707 kvm_resched(vcpu); 707 kvm_resched(vcpu);
708 switch (exit_nr) { 708 switch (exit_nr) {
709 case BOOK3S_INTERRUPT_INST_STORAGE: 709 case BOOK3S_INTERRUPT_INST_STORAGE:
710 vcpu->stat.pf_instruc++; 710 vcpu->stat.pf_instruc++;
711 /* only care about PTEG not found errors, but leave NX alone */ 711 /* only care about PTEG not found errors, but leave NX alone */
712 if (vcpu->arch.shadow_srr1 & 0x40000000) { 712 if (vcpu->arch.shadow_srr1 & 0x40000000) {
713 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); 713 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
714 vcpu->stat.sp_instruc++; 714 vcpu->stat.sp_instruc++;
715 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 715 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
716 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 716 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
717 /* 717 /*
718 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page, 718 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
719 * so we can't use the NX bit inside the guest. Let's cross our fingers, 719 * so we can't use the NX bit inside the guest. Let's cross our fingers,
720 * that no guest that needs the dcbz hack does NX. 720 * that no guest that needs the dcbz hack does NX.
721 */ 721 */
722 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 722 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
723 } else { 723 } else {
724 vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; 724 vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000;
725 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 725 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
726 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 726 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
727 r = RESUME_GUEST; 727 r = RESUME_GUEST;
728 } 728 }
729 break; 729 break;
730 case BOOK3S_INTERRUPT_DATA_STORAGE: 730 case BOOK3S_INTERRUPT_DATA_STORAGE:
731 vcpu->stat.pf_storage++; 731 vcpu->stat.pf_storage++;
732 /* The only case we need to handle is missing shadow PTEs */ 732 /* The only case we need to handle is missing shadow PTEs */
733 if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) { 733 if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) {
734 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr); 734 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr);
735 } else { 735 } else {
736 vcpu->arch.dear = vcpu->arch.fault_dear; 736 vcpu->arch.dear = vcpu->arch.fault_dear;
737 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; 737 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
738 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 738 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
739 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL); 739 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL);
740 r = RESUME_GUEST; 740 r = RESUME_GUEST;
741 } 741 }
742 break; 742 break;
743 case BOOK3S_INTERRUPT_DATA_SEGMENT: 743 case BOOK3S_INTERRUPT_DATA_SEGMENT:
744 if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) { 744 if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) {
745 vcpu->arch.dear = vcpu->arch.fault_dear; 745 vcpu->arch.dear = vcpu->arch.fault_dear;
746 kvmppc_book3s_queue_irqprio(vcpu, 746 kvmppc_book3s_queue_irqprio(vcpu,
747 BOOK3S_INTERRUPT_DATA_SEGMENT); 747 BOOK3S_INTERRUPT_DATA_SEGMENT);
748 } 748 }
749 r = RESUME_GUEST; 749 r = RESUME_GUEST;
750 break; 750 break;
751 case BOOK3S_INTERRUPT_INST_SEGMENT: 751 case BOOK3S_INTERRUPT_INST_SEGMENT:
752 if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) { 752 if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) {
753 kvmppc_book3s_queue_irqprio(vcpu, 753 kvmppc_book3s_queue_irqprio(vcpu,
754 BOOK3S_INTERRUPT_INST_SEGMENT); 754 BOOK3S_INTERRUPT_INST_SEGMENT);
755 } 755 }
756 r = RESUME_GUEST; 756 r = RESUME_GUEST;
757 break; 757 break;
758 /* We're good on these - the host merely wanted to get our attention */ 758 /* We're good on these - the host merely wanted to get our attention */
759 case BOOK3S_INTERRUPT_DECREMENTER: 759 case BOOK3S_INTERRUPT_DECREMENTER:
760 vcpu->stat.dec_exits++; 760 vcpu->stat.dec_exits++;
761 r = RESUME_GUEST; 761 r = RESUME_GUEST;
762 break; 762 break;
763 case BOOK3S_INTERRUPT_EXTERNAL: 763 case BOOK3S_INTERRUPT_EXTERNAL:
764 vcpu->stat.ext_intr_exits++; 764 vcpu->stat.ext_intr_exits++;
765 r = RESUME_GUEST; 765 r = RESUME_GUEST;
766 break; 766 break;
767 case BOOK3S_INTERRUPT_PROGRAM: 767 case BOOK3S_INTERRUPT_PROGRAM:
768 { 768 {
769 enum emulation_result er; 769 enum emulation_result er;
770 ulong flags; 770 ulong flags;
771 771
772 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 772 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
773 773
774 if (vcpu->arch.msr & MSR_PR) { 774 if (vcpu->arch.msr & MSR_PR) {
775 #ifdef EXIT_DEBUG 775 #ifdef EXIT_DEBUG
776 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst); 776 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst);
777 #endif 777 #endif
778 if ((vcpu->arch.last_inst & 0xff0007ff) != 778 if ((vcpu->arch.last_inst & 0xff0007ff) !=
779 (INS_DCBZ & 0xfffffff7)) { 779 (INS_DCBZ & 0xfffffff7)) {
780 kvmppc_core_queue_program(vcpu, flags); 780 kvmppc_core_queue_program(vcpu, flags);
781 r = RESUME_GUEST; 781 r = RESUME_GUEST;
782 break; 782 break;
783 } 783 }
784 } 784 }
785 785
786 vcpu->stat.emulated_inst_exits++; 786 vcpu->stat.emulated_inst_exits++;
787 er = kvmppc_emulate_instruction(run, vcpu); 787 er = kvmppc_emulate_instruction(run, vcpu);
788 switch (er) { 788 switch (er) {
789 case EMULATE_DONE: 789 case EMULATE_DONE:
790 r = RESUME_GUEST_NV; 790 r = RESUME_GUEST_NV;
791 break; 791 break;
792 case EMULATE_FAIL: 792 case EMULATE_FAIL:
793 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 793 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
794 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 794 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
795 kvmppc_core_queue_program(vcpu, flags); 795 kvmppc_core_queue_program(vcpu, flags);
796 r = RESUME_GUEST; 796 r = RESUME_GUEST;
797 break; 797 break;
798 default: 798 default:
799 BUG(); 799 BUG();
800 } 800 }
801 break; 801 break;
802 } 802 }
803 case BOOK3S_INTERRUPT_SYSCALL: 803 case BOOK3S_INTERRUPT_SYSCALL:
804 #ifdef EXIT_DEBUG 804 #ifdef EXIT_DEBUG
805 printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); 805 printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0));
806 #endif 806 #endif
807 vcpu->stat.syscall_exits++; 807 vcpu->stat.syscall_exits++;
808 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 808 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
809 r = RESUME_GUEST; 809 r = RESUME_GUEST;
810 break; 810 break;
811 case BOOK3S_INTERRUPT_FP_UNAVAIL: 811 case BOOK3S_INTERRUPT_FP_UNAVAIL:
812 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP); 812 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP);
813 break; 813 break;
814 case BOOK3S_INTERRUPT_ALTIVEC: 814 case BOOK3S_INTERRUPT_ALTIVEC:
815 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC); 815 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC);
816 break; 816 break;
817 case BOOK3S_INTERRUPT_VSX: 817 case BOOK3S_INTERRUPT_VSX:
818 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX); 818 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX);
819 break; 819 break;
820 case BOOK3S_INTERRUPT_MACHINE_CHECK: 820 case BOOK3S_INTERRUPT_MACHINE_CHECK:
821 case BOOK3S_INTERRUPT_TRACE: 821 case BOOK3S_INTERRUPT_TRACE:
822 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 822 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
823 r = RESUME_GUEST; 823 r = RESUME_GUEST;
824 break; 824 break;
825 default: 825 default:
826 /* Ugh - bork here! What did we get? */ 826 /* Ugh - bork here! What did we get? */
827 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 827 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
828 exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1); 828 exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1);
829 r = RESUME_HOST; 829 r = RESUME_HOST;
830 BUG(); 830 BUG();
831 break; 831 break;
832 } 832 }
833 833
834 834
835 if (!(r & RESUME_HOST)) { 835 if (!(r & RESUME_HOST)) {
836 /* To avoid clobbering exit_reason, only check for signals if 836 /* To avoid clobbering exit_reason, only check for signals if
837 * we aren't already exiting to userspace for some other 837 * we aren't already exiting to userspace for some other
838 * reason. */ 838 * reason. */
839 if (signal_pending(current)) { 839 if (signal_pending(current)) {
840 #ifdef EXIT_DEBUG 840 #ifdef EXIT_DEBUG
841 printk(KERN_EMERG "KVM: Going back to host\n"); 841 printk(KERN_EMERG "KVM: Going back to host\n");
842 #endif 842 #endif
843 vcpu->stat.signal_exits++; 843 vcpu->stat.signal_exits++;
844 run->exit_reason = KVM_EXIT_INTR; 844 run->exit_reason = KVM_EXIT_INTR;
845 r = -EINTR; 845 r = -EINTR;
846 } else { 846 } else {
847 /* In case an interrupt came in that was triggered 847 /* In case an interrupt came in that was triggered
848 * from userspace (like DEC), we need to check what 848 * from userspace (like DEC), we need to check what
849 * to inject now! */ 849 * to inject now! */
850 kvmppc_core_deliver_interrupts(vcpu); 850 kvmppc_core_deliver_interrupts(vcpu);
851 } 851 }
852 } 852 }
853 853
854 #ifdef EXIT_DEBUG 854 #ifdef EXIT_DEBUG
855 printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r); 855 printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r);
856 #endif 856 #endif
857 857
858 return r; 858 return r;
859 } 859 }
860 860
861 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 861 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
862 { 862 {
863 return 0; 863 return 0;
864 } 864 }
865 865
866 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 866 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
867 { 867 {
868 int i; 868 int i;
869 869
870 regs->pc = vcpu->arch.pc; 870 regs->pc = vcpu->arch.pc;
871 regs->cr = kvmppc_get_cr(vcpu); 871 regs->cr = kvmppc_get_cr(vcpu);
872 regs->ctr = vcpu->arch.ctr; 872 regs->ctr = vcpu->arch.ctr;
873 regs->lr = vcpu->arch.lr; 873 regs->lr = vcpu->arch.lr;
874 regs->xer = kvmppc_get_xer(vcpu); 874 regs->xer = kvmppc_get_xer(vcpu);
875 regs->msr = vcpu->arch.msr; 875 regs->msr = vcpu->arch.msr;
876 regs->srr0 = vcpu->arch.srr0; 876 regs->srr0 = vcpu->arch.srr0;
877 regs->srr1 = vcpu->arch.srr1; 877 regs->srr1 = vcpu->arch.srr1;
878 regs->pid = vcpu->arch.pid; 878 regs->pid = vcpu->arch.pid;
879 regs->sprg0 = vcpu->arch.sprg0; 879 regs->sprg0 = vcpu->arch.sprg0;
880 regs->sprg1 = vcpu->arch.sprg1; 880 regs->sprg1 = vcpu->arch.sprg1;
881 regs->sprg2 = vcpu->arch.sprg2; 881 regs->sprg2 = vcpu->arch.sprg2;
882 regs->sprg3 = vcpu->arch.sprg3; 882 regs->sprg3 = vcpu->arch.sprg3;
883 regs->sprg5 = vcpu->arch.sprg4; 883 regs->sprg5 = vcpu->arch.sprg4;
884 regs->sprg6 = vcpu->arch.sprg5; 884 regs->sprg6 = vcpu->arch.sprg5;
885 regs->sprg7 = vcpu->arch.sprg6; 885 regs->sprg7 = vcpu->arch.sprg6;
886 886
887 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 887 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
888 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 888 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
889 889
890 return 0; 890 return 0;
891 } 891 }
892 892
893 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 893 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
894 { 894 {
895 int i; 895 int i;
896 896
897 vcpu->arch.pc = regs->pc; 897 vcpu->arch.pc = regs->pc;
898 kvmppc_set_cr(vcpu, regs->cr); 898 kvmppc_set_cr(vcpu, regs->cr);
899 vcpu->arch.ctr = regs->ctr; 899 vcpu->arch.ctr = regs->ctr;
900 vcpu->arch.lr = regs->lr; 900 vcpu->arch.lr = regs->lr;
901 kvmppc_set_xer(vcpu, regs->xer); 901 kvmppc_set_xer(vcpu, regs->xer);
902 kvmppc_set_msr(vcpu, regs->msr); 902 kvmppc_set_msr(vcpu, regs->msr);
903 vcpu->arch.srr0 = regs->srr0; 903 vcpu->arch.srr0 = regs->srr0;
904 vcpu->arch.srr1 = regs->srr1; 904 vcpu->arch.srr1 = regs->srr1;
905 vcpu->arch.sprg0 = regs->sprg0; 905 vcpu->arch.sprg0 = regs->sprg0;
906 vcpu->arch.sprg1 = regs->sprg1; 906 vcpu->arch.sprg1 = regs->sprg1;
907 vcpu->arch.sprg2 = regs->sprg2; 907 vcpu->arch.sprg2 = regs->sprg2;
908 vcpu->arch.sprg3 = regs->sprg3; 908 vcpu->arch.sprg3 = regs->sprg3;
909 vcpu->arch.sprg5 = regs->sprg4; 909 vcpu->arch.sprg5 = regs->sprg4;
910 vcpu->arch.sprg6 = regs->sprg5; 910 vcpu->arch.sprg6 = regs->sprg5;
911 vcpu->arch.sprg7 = regs->sprg6; 911 vcpu->arch.sprg7 = regs->sprg6;
912 912
913 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 913 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
914 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 914 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
915 915
916 return 0; 916 return 0;
917 } 917 }
918 918
919 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 919 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
920 struct kvm_sregs *sregs) 920 struct kvm_sregs *sregs)
921 { 921 {
922 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 922 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
923 int i; 923 int i;
924 924
925 sregs->pvr = vcpu->arch.pvr; 925 sregs->pvr = vcpu->arch.pvr;
926 926
927 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; 927 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
928 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 928 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
929 for (i = 0; i < 64; i++) { 929 for (i = 0; i < 64; i++) {
930 sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i; 930 sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i;
931 sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv; 931 sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
932 } 932 }
933 } else { 933 } else {
934 for (i = 0; i < 16; i++) { 934 for (i = 0; i < 16; i++) {
935 sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw; 935 sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
936 sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw; 936 sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
937 } 937 }
938 for (i = 0; i < 8; i++) { 938 for (i = 0; i < 8; i++) {
939 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; 939 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
940 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; 940 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
941 } 941 }
942 } 942 }
943 return 0; 943 return 0;
944 } 944 }
945 945
946 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 946 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
947 struct kvm_sregs *sregs) 947 struct kvm_sregs *sregs)
948 { 948 {
949 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 949 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
950 int i; 950 int i;
951 951
952 kvmppc_set_pvr(vcpu, sregs->pvr); 952 kvmppc_set_pvr(vcpu, sregs->pvr);
953 953
954 vcpu3s->sdr1 = sregs->u.s.sdr1; 954 vcpu3s->sdr1 = sregs->u.s.sdr1;
955 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 955 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
956 for (i = 0; i < 64; i++) { 956 for (i = 0; i < 64; i++) {
957 vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv, 957 vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
958 sregs->u.s.ppc64.slb[i].slbe); 958 sregs->u.s.ppc64.slb[i].slbe);
959 } 959 }
960 } else { 960 } else {
961 for (i = 0; i < 16; i++) { 961 for (i = 0; i < 16; i++) {
962 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]); 962 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
963 } 963 }
964 for (i = 0; i < 8; i++) { 964 for (i = 0; i < 8; i++) {
965 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false, 965 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
966 (u32)sregs->u.s.ppc32.ibat[i]); 966 (u32)sregs->u.s.ppc32.ibat[i]);
967 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true, 967 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
968 (u32)(sregs->u.s.ppc32.ibat[i] >> 32)); 968 (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
969 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false, 969 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
970 (u32)sregs->u.s.ppc32.dbat[i]); 970 (u32)sregs->u.s.ppc32.dbat[i]);
971 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true, 971 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
972 (u32)(sregs->u.s.ppc32.dbat[i] >> 32)); 972 (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
973 } 973 }
974 } 974 }
975 975
976 /* Flush the MMU after messing with the segments */ 976 /* Flush the MMU after messing with the segments */
977 kvmppc_mmu_pte_flush(vcpu, 0, 0); 977 kvmppc_mmu_pte_flush(vcpu, 0, 0);
978 return 0; 978 return 0;
979 } 979 }
980 980
981 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 981 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
982 { 982 {
983 return -ENOTSUPP; 983 return -ENOTSUPP;
984 } 984 }
985 985
986 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 986 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
987 { 987 {
988 return -ENOTSUPP; 988 return -ENOTSUPP;
989 } 989 }
990 990
991 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 991 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
992 struct kvm_translation *tr) 992 struct kvm_translation *tr)
993 { 993 {
994 return 0; 994 return 0;
995 } 995 }
996 996
997 /* 997 /*
998 * Get (and clear) the dirty memory log for a memory slot. 998 * Get (and clear) the dirty memory log for a memory slot.
999 */ 999 */
1000 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 1000 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1001 struct kvm_dirty_log *log) 1001 struct kvm_dirty_log *log)
1002 { 1002 {
1003 struct kvm_memory_slot *memslot; 1003 struct kvm_memory_slot *memslot;
1004 struct kvm_vcpu *vcpu; 1004 struct kvm_vcpu *vcpu;
1005 ulong ga, ga_end; 1005 ulong ga, ga_end;
1006 int is_dirty = 0; 1006 int is_dirty = 0;
1007 int r, n; 1007 int r;
1008 unsigned long n;
1008 1009
1009 mutex_lock(&kvm->slots_lock); 1010 mutex_lock(&kvm->slots_lock);
1010 1011
1011 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1012 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1012 if (r) 1013 if (r)
1013 goto out; 1014 goto out;
1014 1015
1015 /* If nothing is dirty, don't bother messing with page tables. */ 1016 /* If nothing is dirty, don't bother messing with page tables. */
1016 if (is_dirty) { 1017 if (is_dirty) {
1017 memslot = &kvm->memslots->memslots[log->slot]; 1018 memslot = &kvm->memslots->memslots[log->slot];
1018 1019
1019 ga = memslot->base_gfn << PAGE_SHIFT; 1020 ga = memslot->base_gfn << PAGE_SHIFT;
1020 ga_end = ga + (memslot->npages << PAGE_SHIFT); 1021 ga_end = ga + (memslot->npages << PAGE_SHIFT);
1021 1022
1022 kvm_for_each_vcpu(n, vcpu, kvm) 1023 kvm_for_each_vcpu(n, vcpu, kvm)
1023 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); 1024 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
1024 1025
1025 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1026 n = kvm_dirty_bitmap_bytes(memslot);
1026 memset(memslot->dirty_bitmap, 0, n); 1027 memset(memslot->dirty_bitmap, 0, n);
1027 } 1028 }
1028 1029
1029 r = 0; 1030 r = 0;
1030 out: 1031 out:
1031 mutex_unlock(&kvm->slots_lock); 1032 mutex_unlock(&kvm->slots_lock);
1032 return r; 1033 return r;
1033 } 1034 }
1034 1035
1035 int kvmppc_core_check_processor_compat(void) 1036 int kvmppc_core_check_processor_compat(void)
1036 { 1037 {
1037 return 0; 1038 return 0;
1038 } 1039 }
1039 1040
1040 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 1041 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1041 { 1042 {
1042 struct kvmppc_vcpu_book3s *vcpu_book3s; 1043 struct kvmppc_vcpu_book3s *vcpu_book3s;
1043 struct kvm_vcpu *vcpu; 1044 struct kvm_vcpu *vcpu;
1044 int err; 1045 int err;
1045 1046
1046 vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, 1047 vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO,
1047 get_order(sizeof(struct kvmppc_vcpu_book3s))); 1048 get_order(sizeof(struct kvmppc_vcpu_book3s)));
1048 if (!vcpu_book3s) { 1049 if (!vcpu_book3s) {
1049 err = -ENOMEM; 1050 err = -ENOMEM;
1050 goto out; 1051 goto out;
1051 } 1052 }
1052 1053
1053 vcpu = &vcpu_book3s->vcpu; 1054 vcpu = &vcpu_book3s->vcpu;
1054 err = kvm_vcpu_init(vcpu, kvm, id); 1055 err = kvm_vcpu_init(vcpu, kvm, id);
1055 if (err) 1056 if (err)
1056 goto free_vcpu; 1057 goto free_vcpu;
1057 1058
1058 vcpu->arch.host_retip = kvm_return_point; 1059 vcpu->arch.host_retip = kvm_return_point;
1059 vcpu->arch.host_msr = mfmsr(); 1060 vcpu->arch.host_msr = mfmsr();
1060 /* default to book3s_64 (970fx) */ 1061 /* default to book3s_64 (970fx) */
1061 vcpu->arch.pvr = 0x3C0301; 1062 vcpu->arch.pvr = 0x3C0301;
1062 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 1063 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
1063 vcpu_book3s->slb_nr = 64; 1064 vcpu_book3s->slb_nr = 64;
1064 1065
1065 /* remember where some real-mode handlers are */ 1066 /* remember where some real-mode handlers are */
1066 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; 1067 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
1067 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; 1068 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
1068 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; 1069 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
1069 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; 1070 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
1070 1071
1071 vcpu->arch.shadow_msr = MSR_USER64; 1072 vcpu->arch.shadow_msr = MSR_USER64;
1072 1073
1073 err = __init_new_context(); 1074 err = __init_new_context();
1074 if (err < 0) 1075 if (err < 0)
1075 goto free_vcpu; 1076 goto free_vcpu;
1076 vcpu_book3s->context_id = err; 1077 vcpu_book3s->context_id = err;
1077 1078
1078 vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1; 1079 vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1;
1079 vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS; 1080 vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS;
1080 vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; 1081 vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
1081 1082
1082 return vcpu; 1083 return vcpu;
1083 1084
1084 free_vcpu: 1085 free_vcpu:
1085 free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); 1086 free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s)));
1086 out: 1087 out:
1087 return ERR_PTR(err); 1088 return ERR_PTR(err);
1088 } 1089 }
1089 1090
1090 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 1091 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
1091 { 1092 {
1092 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 1093 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
1093 1094
1094 __destroy_context(vcpu_book3s->context_id); 1095 __destroy_context(vcpu_book3s->context_id);
1095 kvm_vcpu_uninit(vcpu); 1096 kvm_vcpu_uninit(vcpu);
1096 free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); 1097 free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s)));
1097 } 1098 }
1098 1099
1099 extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 1100 extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
1100 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1101 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1101 { 1102 {
1102 int ret; 1103 int ret;
1103 struct thread_struct ext_bkp; 1104 struct thread_struct ext_bkp;
1104 bool save_vec = current->thread.used_vr; 1105 bool save_vec = current->thread.used_vr;
1105 bool save_vsx = current->thread.used_vsr; 1106 bool save_vsx = current->thread.used_vsr;
1106 ulong ext_msr; 1107 ulong ext_msr;
1107 1108
1108 /* No need to go into the guest when all we do is going out */ 1109 /* No need to go into the guest when all we do is going out */
1109 if (signal_pending(current)) { 1110 if (signal_pending(current)) {
1110 kvm_run->exit_reason = KVM_EXIT_INTR; 1111 kvm_run->exit_reason = KVM_EXIT_INTR;
1111 return -EINTR; 1112 return -EINTR;
1112 } 1113 }
1113 1114
1114 /* Save FPU state in stack */ 1115 /* Save FPU state in stack */
1115 if (current->thread.regs->msr & MSR_FP) 1116 if (current->thread.regs->msr & MSR_FP)
1116 giveup_fpu(current); 1117 giveup_fpu(current);
1117 memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr)); 1118 memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
1118 ext_bkp.fpscr = current->thread.fpscr; 1119 ext_bkp.fpscr = current->thread.fpscr;
1119 ext_bkp.fpexc_mode = current->thread.fpexc_mode; 1120 ext_bkp.fpexc_mode = current->thread.fpexc_mode;
1120 1121
1121 #ifdef CONFIG_ALTIVEC 1122 #ifdef CONFIG_ALTIVEC
1122 /* Save Altivec state in stack */ 1123 /* Save Altivec state in stack */
1123 if (save_vec) { 1124 if (save_vec) {
1124 if (current->thread.regs->msr & MSR_VEC) 1125 if (current->thread.regs->msr & MSR_VEC)
1125 giveup_altivec(current); 1126 giveup_altivec(current);
1126 memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr)); 1127 memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
1127 ext_bkp.vscr = current->thread.vscr; 1128 ext_bkp.vscr = current->thread.vscr;
1128 ext_bkp.vrsave = current->thread.vrsave; 1129 ext_bkp.vrsave = current->thread.vrsave;
1129 } 1130 }
1130 ext_bkp.used_vr = current->thread.used_vr; 1131 ext_bkp.used_vr = current->thread.used_vr;
1131 #endif 1132 #endif
1132 1133
1133 #ifdef CONFIG_VSX 1134 #ifdef CONFIG_VSX
1134 /* Save VSX state in stack */ 1135 /* Save VSX state in stack */
1135 if (save_vsx && (current->thread.regs->msr & MSR_VSX)) 1136 if (save_vsx && (current->thread.regs->msr & MSR_VSX))
1136 __giveup_vsx(current); 1137 __giveup_vsx(current);
1137 ext_bkp.used_vsr = current->thread.used_vsr; 1138 ext_bkp.used_vsr = current->thread.used_vsr;
1138 #endif 1139 #endif
1139 1140
1140 /* Remember the MSR with disabled extensions */ 1141 /* Remember the MSR with disabled extensions */
1141 ext_msr = current->thread.regs->msr; 1142 ext_msr = current->thread.regs->msr;
1142 1143
1143 /* XXX we get called with irq disabled - change that! */ 1144 /* XXX we get called with irq disabled - change that! */
1144 local_irq_enable(); 1145 local_irq_enable();
1145 1146
1146 ret = __kvmppc_vcpu_entry(kvm_run, vcpu); 1147 ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
1147 1148
1148 local_irq_disable(); 1149 local_irq_disable();
1149 1150
1150 current->thread.regs->msr = ext_msr; 1151 current->thread.regs->msr = ext_msr;
1151 1152
1152 /* Make sure we save the guest FPU/Altivec/VSX state */ 1153 /* Make sure we save the guest FPU/Altivec/VSX state */
1153 kvmppc_giveup_ext(vcpu, MSR_FP); 1154 kvmppc_giveup_ext(vcpu, MSR_FP);
1154 kvmppc_giveup_ext(vcpu, MSR_VEC); 1155 kvmppc_giveup_ext(vcpu, MSR_VEC);
1155 kvmppc_giveup_ext(vcpu, MSR_VSX); 1156 kvmppc_giveup_ext(vcpu, MSR_VSX);
1156 1157
1157 /* Restore FPU state from stack */ 1158 /* Restore FPU state from stack */
1158 memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr)); 1159 memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
1159 current->thread.fpscr = ext_bkp.fpscr; 1160 current->thread.fpscr = ext_bkp.fpscr;
1160 current->thread.fpexc_mode = ext_bkp.fpexc_mode; 1161 current->thread.fpexc_mode = ext_bkp.fpexc_mode;
1161 1162
1162 #ifdef CONFIG_ALTIVEC 1163 #ifdef CONFIG_ALTIVEC
1163 /* Restore Altivec state from stack */ 1164 /* Restore Altivec state from stack */
1164 if (save_vec && current->thread.used_vr) { 1165 if (save_vec && current->thread.used_vr) {
1165 memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr)); 1166 memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
1166 current->thread.vscr = ext_bkp.vscr; 1167 current->thread.vscr = ext_bkp.vscr;
1167 current->thread.vrsave= ext_bkp.vrsave; 1168 current->thread.vrsave= ext_bkp.vrsave;
1168 } 1169 }
1169 current->thread.used_vr = ext_bkp.used_vr; 1170 current->thread.used_vr = ext_bkp.used_vr;
1170 #endif 1171 #endif
1171 1172
1172 #ifdef CONFIG_VSX 1173 #ifdef CONFIG_VSX
1173 current->thread.used_vsr = ext_bkp.used_vsr; 1174 current->thread.used_vsr = ext_bkp.used_vsr;
1174 #endif 1175 #endif
1175 1176
1176 return ret; 1177 return ret;
1177 } 1178 }
1178 1179
1179 static int kvmppc_book3s_init(void) 1180 static int kvmppc_book3s_init(void)
1180 { 1181 {
1181 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE); 1182 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE);
1182 } 1183 }
1183 1184
1184 static void kvmppc_book3s_exit(void) 1185 static void kvmppc_book3s_exit(void)
1185 { 1186 {
1186 kvm_exit(); 1187 kvm_exit();
1187 } 1188 }
1188 1189
1189 module_init(kvmppc_book3s_init); 1190 module_init(kvmppc_book3s_init);
1190 module_exit(kvmppc_book3s_exit); 1191 module_exit(kvmppc_book3s_exit);
1191 1192
1 /* 1 /*
2 * Kernel-based Virtual Machine driver for Linux 2 * Kernel-based Virtual Machine driver for Linux
3 * 3 *
4 * derived from drivers/kvm/kvm_main.c 4 * derived from drivers/kvm/kvm_main.c
5 * 5 *
6 * Copyright (C) 2006 Qumranet, Inc. 6 * Copyright (C) 2006 Qumranet, Inc.
7 * Copyright (C) 2008 Qumranet, Inc. 7 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008 8 * Copyright IBM Corporation, 2008
9 * 9 *
10 * Authors: 10 * Authors:
11 * Avi Kivity <avi@qumranet.com> 11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com> 12 * Yaniv Kamay <yaniv@qumranet.com>
13 * Amit Shah <amit.shah@qumranet.com> 13 * Amit Shah <amit.shah@qumranet.com>
14 * Ben-Ami Yassour <benami@il.ibm.com> 14 * Ben-Ami Yassour <benami@il.ibm.com>
15 * 15 *
16 * This work is licensed under the terms of the GNU GPL, version 2. See 16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory. 17 * the COPYING file in the top-level directory.
18 * 18 *
19 */ 19 */
20 20
21 #include <linux/kvm_host.h> 21 #include <linux/kvm_host.h>
22 #include "irq.h" 22 #include "irq.h"
23 #include "mmu.h" 23 #include "mmu.h"
24 #include "i8254.h" 24 #include "i8254.h"
25 #include "tss.h" 25 #include "tss.h"
26 #include "kvm_cache_regs.h" 26 #include "kvm_cache_regs.h"
27 #include "x86.h" 27 #include "x86.h"
28 28
29 #include <linux/clocksource.h> 29 #include <linux/clocksource.h>
30 #include <linux/interrupt.h> 30 #include <linux/interrupt.h>
31 #include <linux/kvm.h> 31 #include <linux/kvm.h>
32 #include <linux/fs.h> 32 #include <linux/fs.h>
33 #include <linux/vmalloc.h> 33 #include <linux/vmalloc.h>
34 #include <linux/module.h> 34 #include <linux/module.h>
35 #include <linux/mman.h> 35 #include <linux/mman.h>
36 #include <linux/highmem.h> 36 #include <linux/highmem.h>
37 #include <linux/iommu.h> 37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h> 38 #include <linux/intel-iommu.h>
39 #include <linux/cpufreq.h> 39 #include <linux/cpufreq.h>
40 #include <linux/user-return-notifier.h> 40 #include <linux/user-return-notifier.h>
41 #include <linux/srcu.h> 41 #include <linux/srcu.h>
42 #include <linux/slab.h> 42 #include <linux/slab.h>
43 #include <trace/events/kvm.h> 43 #include <trace/events/kvm.h>
44 #undef TRACE_INCLUDE_FILE 44 #undef TRACE_INCLUDE_FILE
45 #define CREATE_TRACE_POINTS 45 #define CREATE_TRACE_POINTS
46 #include "trace.h" 46 #include "trace.h"
47 47
48 #include <asm/debugreg.h> 48 #include <asm/debugreg.h>
49 #include <asm/uaccess.h> 49 #include <asm/uaccess.h>
50 #include <asm/msr.h> 50 #include <asm/msr.h>
51 #include <asm/desc.h> 51 #include <asm/desc.h>
52 #include <asm/mtrr.h> 52 #include <asm/mtrr.h>
53 #include <asm/mce.h> 53 #include <asm/mce.h>
54 54
55 #define MAX_IO_MSRS 256 55 #define MAX_IO_MSRS 256
56 #define CR0_RESERVED_BITS \ 56 #define CR0_RESERVED_BITS \
57 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ 57 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
58 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ 58 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
59 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) 59 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
60 #define CR4_RESERVED_BITS \ 60 #define CR4_RESERVED_BITS \
61 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ 61 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
62 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ 62 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
63 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ 63 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
64 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) 64 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
65 65
66 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) 66 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
67 67
68 #define KVM_MAX_MCE_BANKS 32 68 #define KVM_MAX_MCE_BANKS 32
69 #define KVM_MCE_CAP_SUPPORTED MCG_CTL_P 69 #define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
70 70
71 /* EFER defaults: 71 /* EFER defaults:
72 * - enable syscall per default because its emulated by KVM 72 * - enable syscall per default because its emulated by KVM
73 * - enable LME and LMA per default on 64 bit KVM 73 * - enable LME and LMA per default on 64 bit KVM
74 */ 74 */
75 #ifdef CONFIG_X86_64 75 #ifdef CONFIG_X86_64
76 static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL; 76 static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
77 #else 77 #else
78 static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; 78 static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
79 #endif 79 #endif
80 80
81 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM 81 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
82 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 82 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
83 83
84 static void update_cr8_intercept(struct kvm_vcpu *vcpu); 84 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
85 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 85 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
86 struct kvm_cpuid_entry2 __user *entries); 86 struct kvm_cpuid_entry2 __user *entries);
87 87
88 struct kvm_x86_ops *kvm_x86_ops; 88 struct kvm_x86_ops *kvm_x86_ops;
89 EXPORT_SYMBOL_GPL(kvm_x86_ops); 89 EXPORT_SYMBOL_GPL(kvm_x86_ops);
90 90
91 int ignore_msrs = 0; 91 int ignore_msrs = 0;
92 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); 92 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
93 93
94 #define KVM_NR_SHARED_MSRS 16 94 #define KVM_NR_SHARED_MSRS 16
95 95
96 struct kvm_shared_msrs_global { 96 struct kvm_shared_msrs_global {
97 int nr; 97 int nr;
98 u32 msrs[KVM_NR_SHARED_MSRS]; 98 u32 msrs[KVM_NR_SHARED_MSRS];
99 }; 99 };
100 100
101 struct kvm_shared_msrs { 101 struct kvm_shared_msrs {
102 struct user_return_notifier urn; 102 struct user_return_notifier urn;
103 bool registered; 103 bool registered;
104 struct kvm_shared_msr_values { 104 struct kvm_shared_msr_values {
105 u64 host; 105 u64 host;
106 u64 curr; 106 u64 curr;
107 } values[KVM_NR_SHARED_MSRS]; 107 } values[KVM_NR_SHARED_MSRS];
108 }; 108 };
109 109
110 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; 110 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
111 static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); 111 static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
112 112
113 struct kvm_stats_debugfs_item debugfs_entries[] = { 113 struct kvm_stats_debugfs_item debugfs_entries[] = {
114 { "pf_fixed", VCPU_STAT(pf_fixed) }, 114 { "pf_fixed", VCPU_STAT(pf_fixed) },
115 { "pf_guest", VCPU_STAT(pf_guest) }, 115 { "pf_guest", VCPU_STAT(pf_guest) },
116 { "tlb_flush", VCPU_STAT(tlb_flush) }, 116 { "tlb_flush", VCPU_STAT(tlb_flush) },
117 { "invlpg", VCPU_STAT(invlpg) }, 117 { "invlpg", VCPU_STAT(invlpg) },
118 { "exits", VCPU_STAT(exits) }, 118 { "exits", VCPU_STAT(exits) },
119 { "io_exits", VCPU_STAT(io_exits) }, 119 { "io_exits", VCPU_STAT(io_exits) },
120 { "mmio_exits", VCPU_STAT(mmio_exits) }, 120 { "mmio_exits", VCPU_STAT(mmio_exits) },
121 { "signal_exits", VCPU_STAT(signal_exits) }, 121 { "signal_exits", VCPU_STAT(signal_exits) },
122 { "irq_window", VCPU_STAT(irq_window_exits) }, 122 { "irq_window", VCPU_STAT(irq_window_exits) },
123 { "nmi_window", VCPU_STAT(nmi_window_exits) }, 123 { "nmi_window", VCPU_STAT(nmi_window_exits) },
124 { "halt_exits", VCPU_STAT(halt_exits) }, 124 { "halt_exits", VCPU_STAT(halt_exits) },
125 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 125 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
126 { "hypercalls", VCPU_STAT(hypercalls) }, 126 { "hypercalls", VCPU_STAT(hypercalls) },
127 { "request_irq", VCPU_STAT(request_irq_exits) }, 127 { "request_irq", VCPU_STAT(request_irq_exits) },
128 { "irq_exits", VCPU_STAT(irq_exits) }, 128 { "irq_exits", VCPU_STAT(irq_exits) },
129 { "host_state_reload", VCPU_STAT(host_state_reload) }, 129 { "host_state_reload", VCPU_STAT(host_state_reload) },
130 { "efer_reload", VCPU_STAT(efer_reload) }, 130 { "efer_reload", VCPU_STAT(efer_reload) },
131 { "fpu_reload", VCPU_STAT(fpu_reload) }, 131 { "fpu_reload", VCPU_STAT(fpu_reload) },
132 { "insn_emulation", VCPU_STAT(insn_emulation) }, 132 { "insn_emulation", VCPU_STAT(insn_emulation) },
133 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, 133 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
134 { "irq_injections", VCPU_STAT(irq_injections) }, 134 { "irq_injections", VCPU_STAT(irq_injections) },
135 { "nmi_injections", VCPU_STAT(nmi_injections) }, 135 { "nmi_injections", VCPU_STAT(nmi_injections) },
136 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, 136 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
137 { "mmu_pte_write", VM_STAT(mmu_pte_write) }, 137 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
138 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, 138 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
139 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) }, 139 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
140 { "mmu_flooded", VM_STAT(mmu_flooded) }, 140 { "mmu_flooded", VM_STAT(mmu_flooded) },
141 { "mmu_recycled", VM_STAT(mmu_recycled) }, 141 { "mmu_recycled", VM_STAT(mmu_recycled) },
142 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 142 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
143 { "mmu_unsync", VM_STAT(mmu_unsync) }, 143 { "mmu_unsync", VM_STAT(mmu_unsync) },
144 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 144 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
145 { "largepages", VM_STAT(lpages) }, 145 { "largepages", VM_STAT(lpages) },
146 { NULL } 146 { NULL }
147 }; 147 };
148 148
149 static void kvm_on_user_return(struct user_return_notifier *urn) 149 static void kvm_on_user_return(struct user_return_notifier *urn)
150 { 150 {
151 unsigned slot; 151 unsigned slot;
152 struct kvm_shared_msrs *locals 152 struct kvm_shared_msrs *locals
153 = container_of(urn, struct kvm_shared_msrs, urn); 153 = container_of(urn, struct kvm_shared_msrs, urn);
154 struct kvm_shared_msr_values *values; 154 struct kvm_shared_msr_values *values;
155 155
156 for (slot = 0; slot < shared_msrs_global.nr; ++slot) { 156 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
157 values = &locals->values[slot]; 157 values = &locals->values[slot];
158 if (values->host != values->curr) { 158 if (values->host != values->curr) {
159 wrmsrl(shared_msrs_global.msrs[slot], values->host); 159 wrmsrl(shared_msrs_global.msrs[slot], values->host);
160 values->curr = values->host; 160 values->curr = values->host;
161 } 161 }
162 } 162 }
163 locals->registered = false; 163 locals->registered = false;
164 user_return_notifier_unregister(urn); 164 user_return_notifier_unregister(urn);
165 } 165 }
166 166
167 static void shared_msr_update(unsigned slot, u32 msr) 167 static void shared_msr_update(unsigned slot, u32 msr)
168 { 168 {
169 struct kvm_shared_msrs *smsr; 169 struct kvm_shared_msrs *smsr;
170 u64 value; 170 u64 value;
171 171
172 smsr = &__get_cpu_var(shared_msrs); 172 smsr = &__get_cpu_var(shared_msrs);
173 /* only read, and nobody should modify it at this time, 173 /* only read, and nobody should modify it at this time,
174 * so don't need lock */ 174 * so don't need lock */
175 if (slot >= shared_msrs_global.nr) { 175 if (slot >= shared_msrs_global.nr) {
176 printk(KERN_ERR "kvm: invalid MSR slot!"); 176 printk(KERN_ERR "kvm: invalid MSR slot!");
177 return; 177 return;
178 } 178 }
179 rdmsrl_safe(msr, &value); 179 rdmsrl_safe(msr, &value);
180 smsr->values[slot].host = value; 180 smsr->values[slot].host = value;
181 smsr->values[slot].curr = value; 181 smsr->values[slot].curr = value;
182 } 182 }
183 183
184 void kvm_define_shared_msr(unsigned slot, u32 msr) 184 void kvm_define_shared_msr(unsigned slot, u32 msr)
185 { 185 {
186 if (slot >= shared_msrs_global.nr) 186 if (slot >= shared_msrs_global.nr)
187 shared_msrs_global.nr = slot + 1; 187 shared_msrs_global.nr = slot + 1;
188 shared_msrs_global.msrs[slot] = msr; 188 shared_msrs_global.msrs[slot] = msr;
189 /* we need ensured the shared_msr_global have been updated */ 189 /* we need ensured the shared_msr_global have been updated */
190 smp_wmb(); 190 smp_wmb();
191 } 191 }
192 EXPORT_SYMBOL_GPL(kvm_define_shared_msr); 192 EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
193 193
194 static void kvm_shared_msr_cpu_online(void) 194 static void kvm_shared_msr_cpu_online(void)
195 { 195 {
196 unsigned i; 196 unsigned i;
197 197
198 for (i = 0; i < shared_msrs_global.nr; ++i) 198 for (i = 0; i < shared_msrs_global.nr; ++i)
199 shared_msr_update(i, shared_msrs_global.msrs[i]); 199 shared_msr_update(i, shared_msrs_global.msrs[i]);
200 } 200 }
201 201
202 void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 202 void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
203 { 203 {
204 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 204 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
205 205
206 if (((value ^ smsr->values[slot].curr) & mask) == 0) 206 if (((value ^ smsr->values[slot].curr) & mask) == 0)
207 return; 207 return;
208 smsr->values[slot].curr = value; 208 smsr->values[slot].curr = value;
209 wrmsrl(shared_msrs_global.msrs[slot], value); 209 wrmsrl(shared_msrs_global.msrs[slot], value);
210 if (!smsr->registered) { 210 if (!smsr->registered) {
211 smsr->urn.on_user_return = kvm_on_user_return; 211 smsr->urn.on_user_return = kvm_on_user_return;
212 user_return_notifier_register(&smsr->urn); 212 user_return_notifier_register(&smsr->urn);
213 smsr->registered = true; 213 smsr->registered = true;
214 } 214 }
215 } 215 }
216 EXPORT_SYMBOL_GPL(kvm_set_shared_msr); 216 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
217 217
218 static void drop_user_return_notifiers(void *ignore) 218 static void drop_user_return_notifiers(void *ignore)
219 { 219 {
220 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 220 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
221 221
222 if (smsr->registered) 222 if (smsr->registered)
223 kvm_on_user_return(&smsr->urn); 223 kvm_on_user_return(&smsr->urn);
224 } 224 }
225 225
226 unsigned long segment_base(u16 selector) 226 unsigned long segment_base(u16 selector)
227 { 227 {
228 struct descriptor_table gdt; 228 struct descriptor_table gdt;
229 struct desc_struct *d; 229 struct desc_struct *d;
230 unsigned long table_base; 230 unsigned long table_base;
231 unsigned long v; 231 unsigned long v;
232 232
233 if (selector == 0) 233 if (selector == 0)
234 return 0; 234 return 0;
235 235
236 kvm_get_gdt(&gdt); 236 kvm_get_gdt(&gdt);
237 table_base = gdt.base; 237 table_base = gdt.base;
238 238
239 if (selector & 4) { /* from ldt */ 239 if (selector & 4) { /* from ldt */
240 u16 ldt_selector = kvm_read_ldt(); 240 u16 ldt_selector = kvm_read_ldt();
241 241
242 table_base = segment_base(ldt_selector); 242 table_base = segment_base(ldt_selector);
243 } 243 }
244 d = (struct desc_struct *)(table_base + (selector & ~7)); 244 d = (struct desc_struct *)(table_base + (selector & ~7));
245 v = get_desc_base(d); 245 v = get_desc_base(d);
246 #ifdef CONFIG_X86_64 246 #ifdef CONFIG_X86_64
247 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) 247 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
248 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; 248 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
249 #endif 249 #endif
250 return v; 250 return v;
251 } 251 }
252 EXPORT_SYMBOL_GPL(segment_base); 252 EXPORT_SYMBOL_GPL(segment_base);
253 253
254 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) 254 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
255 { 255 {
256 if (irqchip_in_kernel(vcpu->kvm)) 256 if (irqchip_in_kernel(vcpu->kvm))
257 return vcpu->arch.apic_base; 257 return vcpu->arch.apic_base;
258 else 258 else
259 return vcpu->arch.apic_base; 259 return vcpu->arch.apic_base;
260 } 260 }
261 EXPORT_SYMBOL_GPL(kvm_get_apic_base); 261 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
262 262
263 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) 263 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
264 { 264 {
265 /* TODO: reserve bits check */ 265 /* TODO: reserve bits check */
266 if (irqchip_in_kernel(vcpu->kvm)) 266 if (irqchip_in_kernel(vcpu->kvm))
267 kvm_lapic_set_base(vcpu, data); 267 kvm_lapic_set_base(vcpu, data);
268 else 268 else
269 vcpu->arch.apic_base = data; 269 vcpu->arch.apic_base = data;
270 } 270 }
271 EXPORT_SYMBOL_GPL(kvm_set_apic_base); 271 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
272 272
273 #define EXCPT_BENIGN 0 273 #define EXCPT_BENIGN 0
274 #define EXCPT_CONTRIBUTORY 1 274 #define EXCPT_CONTRIBUTORY 1
275 #define EXCPT_PF 2 275 #define EXCPT_PF 2
276 276
277 static int exception_class(int vector) 277 static int exception_class(int vector)
278 { 278 {
279 switch (vector) { 279 switch (vector) {
280 case PF_VECTOR: 280 case PF_VECTOR:
281 return EXCPT_PF; 281 return EXCPT_PF;
282 case DE_VECTOR: 282 case DE_VECTOR:
283 case TS_VECTOR: 283 case TS_VECTOR:
284 case NP_VECTOR: 284 case NP_VECTOR:
285 case SS_VECTOR: 285 case SS_VECTOR:
286 case GP_VECTOR: 286 case GP_VECTOR:
287 return EXCPT_CONTRIBUTORY; 287 return EXCPT_CONTRIBUTORY;
288 default: 288 default:
289 break; 289 break;
290 } 290 }
291 return EXCPT_BENIGN; 291 return EXCPT_BENIGN;
292 } 292 }
293 293
294 static void kvm_multiple_exception(struct kvm_vcpu *vcpu, 294 static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
295 unsigned nr, bool has_error, u32 error_code) 295 unsigned nr, bool has_error, u32 error_code)
296 { 296 {
297 u32 prev_nr; 297 u32 prev_nr;
298 int class1, class2; 298 int class1, class2;
299 299
300 if (!vcpu->arch.exception.pending) { 300 if (!vcpu->arch.exception.pending) {
301 queue: 301 queue:
302 vcpu->arch.exception.pending = true; 302 vcpu->arch.exception.pending = true;
303 vcpu->arch.exception.has_error_code = has_error; 303 vcpu->arch.exception.has_error_code = has_error;
304 vcpu->arch.exception.nr = nr; 304 vcpu->arch.exception.nr = nr;
305 vcpu->arch.exception.error_code = error_code; 305 vcpu->arch.exception.error_code = error_code;
306 return; 306 return;
307 } 307 }
308 308
309 /* to check exception */ 309 /* to check exception */
310 prev_nr = vcpu->arch.exception.nr; 310 prev_nr = vcpu->arch.exception.nr;
311 if (prev_nr == DF_VECTOR) { 311 if (prev_nr == DF_VECTOR) {
312 /* triple fault -> shutdown */ 312 /* triple fault -> shutdown */
313 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 313 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
314 return; 314 return;
315 } 315 }
316 class1 = exception_class(prev_nr); 316 class1 = exception_class(prev_nr);
317 class2 = exception_class(nr); 317 class2 = exception_class(nr);
318 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) 318 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
319 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { 319 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
320 /* generate double fault per SDM Table 5-5 */ 320 /* generate double fault per SDM Table 5-5 */
321 vcpu->arch.exception.pending = true; 321 vcpu->arch.exception.pending = true;
322 vcpu->arch.exception.has_error_code = true; 322 vcpu->arch.exception.has_error_code = true;
323 vcpu->arch.exception.nr = DF_VECTOR; 323 vcpu->arch.exception.nr = DF_VECTOR;
324 vcpu->arch.exception.error_code = 0; 324 vcpu->arch.exception.error_code = 0;
325 } else 325 } else
326 /* replace previous exception with a new one in a hope 326 /* replace previous exception with a new one in a hope
327 that instruction re-execution will regenerate lost 327 that instruction re-execution will regenerate lost
328 exception */ 328 exception */
329 goto queue; 329 goto queue;
330 } 330 }
331 331
332 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 332 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
333 { 333 {
334 kvm_multiple_exception(vcpu, nr, false, 0); 334 kvm_multiple_exception(vcpu, nr, false, 0);
335 } 335 }
336 EXPORT_SYMBOL_GPL(kvm_queue_exception); 336 EXPORT_SYMBOL_GPL(kvm_queue_exception);
337 337
338 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, 338 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
339 u32 error_code) 339 u32 error_code)
340 { 340 {
341 ++vcpu->stat.pf_guest; 341 ++vcpu->stat.pf_guest;
342 vcpu->arch.cr2 = addr; 342 vcpu->arch.cr2 = addr;
343 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 343 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
344 } 344 }
345 345
346 void kvm_inject_nmi(struct kvm_vcpu *vcpu) 346 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
347 { 347 {
348 vcpu->arch.nmi_pending = 1; 348 vcpu->arch.nmi_pending = 1;
349 } 349 }
350 EXPORT_SYMBOL_GPL(kvm_inject_nmi); 350 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
351 351
352 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 352 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
353 { 353 {
354 kvm_multiple_exception(vcpu, nr, true, error_code); 354 kvm_multiple_exception(vcpu, nr, true, error_code);
355 } 355 }
356 EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 356 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
357 357
358 /* 358 /*
359 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue 359 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue
360 * a #GP and return false. 360 * a #GP and return false.
361 */ 361 */
362 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) 362 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
363 { 363 {
364 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl) 364 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
365 return true; 365 return true;
366 kvm_queue_exception_e(vcpu, GP_VECTOR, 0); 366 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
367 return false; 367 return false;
368 } 368 }
369 EXPORT_SYMBOL_GPL(kvm_require_cpl); 369 EXPORT_SYMBOL_GPL(kvm_require_cpl);
370 370
371 /* 371 /*
372 * Load the pae pdptrs. Return true is they are all valid. 372 * Load the pae pdptrs. Return true is they are all valid.
373 */ 373 */
374 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) 374 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
375 { 375 {
376 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; 376 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
377 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; 377 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
378 int i; 378 int i;
379 int ret; 379 int ret;
380 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; 380 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
381 381
382 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, 382 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
383 offset * sizeof(u64), sizeof(pdpte)); 383 offset * sizeof(u64), sizeof(pdpte));
384 if (ret < 0) { 384 if (ret < 0) {
385 ret = 0; 385 ret = 0;
386 goto out; 386 goto out;
387 } 387 }
388 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { 388 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
389 if (is_present_gpte(pdpte[i]) && 389 if (is_present_gpte(pdpte[i]) &&
390 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { 390 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
391 ret = 0; 391 ret = 0;
392 goto out; 392 goto out;
393 } 393 }
394 } 394 }
395 ret = 1; 395 ret = 1;
396 396
397 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); 397 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
398 __set_bit(VCPU_EXREG_PDPTR, 398 __set_bit(VCPU_EXREG_PDPTR,
399 (unsigned long *)&vcpu->arch.regs_avail); 399 (unsigned long *)&vcpu->arch.regs_avail);
400 __set_bit(VCPU_EXREG_PDPTR, 400 __set_bit(VCPU_EXREG_PDPTR,
401 (unsigned long *)&vcpu->arch.regs_dirty); 401 (unsigned long *)&vcpu->arch.regs_dirty);
402 out: 402 out:
403 403
404 return ret; 404 return ret;
405 } 405 }
406 EXPORT_SYMBOL_GPL(load_pdptrs); 406 EXPORT_SYMBOL_GPL(load_pdptrs);
407 407
408 static bool pdptrs_changed(struct kvm_vcpu *vcpu) 408 static bool pdptrs_changed(struct kvm_vcpu *vcpu)
409 { 409 {
410 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; 410 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
411 bool changed = true; 411 bool changed = true;
412 int r; 412 int r;
413 413
414 if (is_long_mode(vcpu) || !is_pae(vcpu)) 414 if (is_long_mode(vcpu) || !is_pae(vcpu))
415 return false; 415 return false;
416 416
417 if (!test_bit(VCPU_EXREG_PDPTR, 417 if (!test_bit(VCPU_EXREG_PDPTR,
418 (unsigned long *)&vcpu->arch.regs_avail)) 418 (unsigned long *)&vcpu->arch.regs_avail))
419 return true; 419 return true;
420 420
421 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); 421 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
422 if (r < 0) 422 if (r < 0)
423 goto out; 423 goto out;
424 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; 424 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
425 out: 425 out:
426 426
427 return changed; 427 return changed;
428 } 428 }
429 429
430 void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 430 void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
431 { 431 {
432 cr0 |= X86_CR0_ET; 432 cr0 |= X86_CR0_ET;
433 433
434 #ifdef CONFIG_X86_64 434 #ifdef CONFIG_X86_64
435 if (cr0 & 0xffffffff00000000UL) { 435 if (cr0 & 0xffffffff00000000UL) {
436 kvm_inject_gp(vcpu, 0); 436 kvm_inject_gp(vcpu, 0);
437 return; 437 return;
438 } 438 }
439 #endif 439 #endif
440 440
441 cr0 &= ~CR0_RESERVED_BITS; 441 cr0 &= ~CR0_RESERVED_BITS;
442 442
443 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 443 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
444 kvm_inject_gp(vcpu, 0); 444 kvm_inject_gp(vcpu, 0);
445 return; 445 return;
446 } 446 }
447 447
448 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { 448 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
449 kvm_inject_gp(vcpu, 0); 449 kvm_inject_gp(vcpu, 0);
450 return; 450 return;
451 } 451 }
452 452
453 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 453 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
454 #ifdef CONFIG_X86_64 454 #ifdef CONFIG_X86_64
455 if ((vcpu->arch.efer & EFER_LME)) { 455 if ((vcpu->arch.efer & EFER_LME)) {
456 int cs_db, cs_l; 456 int cs_db, cs_l;
457 457
458 if (!is_pae(vcpu)) { 458 if (!is_pae(vcpu)) {
459 kvm_inject_gp(vcpu, 0); 459 kvm_inject_gp(vcpu, 0);
460 return; 460 return;
461 } 461 }
462 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 462 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
463 if (cs_l) { 463 if (cs_l) {
464 kvm_inject_gp(vcpu, 0); 464 kvm_inject_gp(vcpu, 0);
465 return; 465 return;
466 466
467 } 467 }
468 } else 468 } else
469 #endif 469 #endif
470 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 470 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
471 kvm_inject_gp(vcpu, 0); 471 kvm_inject_gp(vcpu, 0);
472 return; 472 return;
473 } 473 }
474 474
475 } 475 }
476 476
477 kvm_x86_ops->set_cr0(vcpu, cr0); 477 kvm_x86_ops->set_cr0(vcpu, cr0);
478 vcpu->arch.cr0 = cr0; 478 vcpu->arch.cr0 = cr0;
479 479
480 kvm_mmu_reset_context(vcpu); 480 kvm_mmu_reset_context(vcpu);
481 return; 481 return;
482 } 482 }
483 EXPORT_SYMBOL_GPL(kvm_set_cr0); 483 EXPORT_SYMBOL_GPL(kvm_set_cr0);
484 484
485 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 485 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
486 { 486 {
487 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); 487 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f));
488 } 488 }
489 EXPORT_SYMBOL_GPL(kvm_lmsw); 489 EXPORT_SYMBOL_GPL(kvm_lmsw);
490 490
491 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 491 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
492 { 492 {
493 unsigned long old_cr4 = kvm_read_cr4(vcpu); 493 unsigned long old_cr4 = kvm_read_cr4(vcpu);
494 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 494 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
495 495
496 if (cr4 & CR4_RESERVED_BITS) { 496 if (cr4 & CR4_RESERVED_BITS) {
497 kvm_inject_gp(vcpu, 0); 497 kvm_inject_gp(vcpu, 0);
498 return; 498 return;
499 } 499 }
500 500
501 if (is_long_mode(vcpu)) { 501 if (is_long_mode(vcpu)) {
502 if (!(cr4 & X86_CR4_PAE)) { 502 if (!(cr4 & X86_CR4_PAE)) {
503 kvm_inject_gp(vcpu, 0); 503 kvm_inject_gp(vcpu, 0);
504 return; 504 return;
505 } 505 }
506 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 506 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
507 && ((cr4 ^ old_cr4) & pdptr_bits) 507 && ((cr4 ^ old_cr4) & pdptr_bits)
508 && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 508 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
509 kvm_inject_gp(vcpu, 0); 509 kvm_inject_gp(vcpu, 0);
510 return; 510 return;
511 } 511 }
512 512
513 if (cr4 & X86_CR4_VMXE) { 513 if (cr4 & X86_CR4_VMXE) {
514 kvm_inject_gp(vcpu, 0); 514 kvm_inject_gp(vcpu, 0);
515 return; 515 return;
516 } 516 }
517 kvm_x86_ops->set_cr4(vcpu, cr4); 517 kvm_x86_ops->set_cr4(vcpu, cr4);
518 vcpu->arch.cr4 = cr4; 518 vcpu->arch.cr4 = cr4;
519 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; 519 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
520 kvm_mmu_reset_context(vcpu); 520 kvm_mmu_reset_context(vcpu);
521 } 521 }
522 EXPORT_SYMBOL_GPL(kvm_set_cr4); 522 EXPORT_SYMBOL_GPL(kvm_set_cr4);
523 523
524 void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 524 void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
525 { 525 {
526 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 526 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
527 kvm_mmu_sync_roots(vcpu); 527 kvm_mmu_sync_roots(vcpu);
528 kvm_mmu_flush_tlb(vcpu); 528 kvm_mmu_flush_tlb(vcpu);
529 return; 529 return;
530 } 530 }
531 531
532 if (is_long_mode(vcpu)) { 532 if (is_long_mode(vcpu)) {
533 if (cr3 & CR3_L_MODE_RESERVED_BITS) { 533 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
534 kvm_inject_gp(vcpu, 0); 534 kvm_inject_gp(vcpu, 0);
535 return; 535 return;
536 } 536 }
537 } else { 537 } else {
538 if (is_pae(vcpu)) { 538 if (is_pae(vcpu)) {
539 if (cr3 & CR3_PAE_RESERVED_BITS) { 539 if (cr3 & CR3_PAE_RESERVED_BITS) {
540 kvm_inject_gp(vcpu, 0); 540 kvm_inject_gp(vcpu, 0);
541 return; 541 return;
542 } 542 }
543 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { 543 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
544 kvm_inject_gp(vcpu, 0); 544 kvm_inject_gp(vcpu, 0);
545 return; 545 return;
546 } 546 }
547 } 547 }
548 /* 548 /*
549 * We don't check reserved bits in nonpae mode, because 549 * We don't check reserved bits in nonpae mode, because
550 * this isn't enforced, and VMware depends on this. 550 * this isn't enforced, and VMware depends on this.
551 */ 551 */
552 } 552 }
553 553
554 /* 554 /*
555 * Does the new cr3 value map to physical memory? (Note, we 555 * Does the new cr3 value map to physical memory? (Note, we
556 * catch an invalid cr3 even in real-mode, because it would 556 * catch an invalid cr3 even in real-mode, because it would
557 * cause trouble later on when we turn on paging anyway.) 557 * cause trouble later on when we turn on paging anyway.)
558 * 558 *
559 * A real CPU would silently accept an invalid cr3 and would 559 * A real CPU would silently accept an invalid cr3 and would
560 * attempt to use it - with largely undefined (and often hard 560 * attempt to use it - with largely undefined (and often hard
561 * to debug) behavior on the guest side. 561 * to debug) behavior on the guest side.
562 */ 562 */
563 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) 563 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
564 kvm_inject_gp(vcpu, 0); 564 kvm_inject_gp(vcpu, 0);
565 else { 565 else {
566 vcpu->arch.cr3 = cr3; 566 vcpu->arch.cr3 = cr3;
567 vcpu->arch.mmu.new_cr3(vcpu); 567 vcpu->arch.mmu.new_cr3(vcpu);
568 } 568 }
569 } 569 }
570 EXPORT_SYMBOL_GPL(kvm_set_cr3); 570 EXPORT_SYMBOL_GPL(kvm_set_cr3);
571 571
572 void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 572 void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
573 { 573 {
574 if (cr8 & CR8_RESERVED_BITS) { 574 if (cr8 & CR8_RESERVED_BITS) {
575 kvm_inject_gp(vcpu, 0); 575 kvm_inject_gp(vcpu, 0);
576 return; 576 return;
577 } 577 }
578 if (irqchip_in_kernel(vcpu->kvm)) 578 if (irqchip_in_kernel(vcpu->kvm))
579 kvm_lapic_set_tpr(vcpu, cr8); 579 kvm_lapic_set_tpr(vcpu, cr8);
580 else 580 else
581 vcpu->arch.cr8 = cr8; 581 vcpu->arch.cr8 = cr8;
582 } 582 }
583 EXPORT_SYMBOL_GPL(kvm_set_cr8); 583 EXPORT_SYMBOL_GPL(kvm_set_cr8);
584 584
585 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) 585 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
586 { 586 {
587 if (irqchip_in_kernel(vcpu->kvm)) 587 if (irqchip_in_kernel(vcpu->kvm))
588 return kvm_lapic_get_cr8(vcpu); 588 return kvm_lapic_get_cr8(vcpu);
589 else 589 else
590 return vcpu->arch.cr8; 590 return vcpu->arch.cr8;
591 } 591 }
592 EXPORT_SYMBOL_GPL(kvm_get_cr8); 592 EXPORT_SYMBOL_GPL(kvm_get_cr8);
593 593
594 static inline u32 bit(int bitno) 594 static inline u32 bit(int bitno)
595 { 595 {
596 return 1 << (bitno & 31); 596 return 1 << (bitno & 31);
597 } 597 }
598 598
599 /* 599 /*
600 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 600 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
601 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 601 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
602 * 602 *
603 * This list is modified at module load time to reflect the 603 * This list is modified at module load time to reflect the
604 * capabilities of the host cpu. This capabilities test skips MSRs that are 604 * capabilities of the host cpu. This capabilities test skips MSRs that are
605 * kvm-specific. Those are put in the beginning of the list. 605 * kvm-specific. Those are put in the beginning of the list.
606 */ 606 */
607 607
608 #define KVM_SAVE_MSRS_BEGIN 5 608 #define KVM_SAVE_MSRS_BEGIN 5
609 static u32 msrs_to_save[] = { 609 static u32 msrs_to_save[] = {
610 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 610 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
611 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 611 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
612 HV_X64_MSR_APIC_ASSIST_PAGE, 612 HV_X64_MSR_APIC_ASSIST_PAGE,
613 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 613 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
614 MSR_K6_STAR, 614 MSR_K6_STAR,
615 #ifdef CONFIG_X86_64 615 #ifdef CONFIG_X86_64
616 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 616 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
617 #endif 617 #endif
618 MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA 618 MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
619 }; 619 };
620 620
621 static unsigned num_msrs_to_save; 621 static unsigned num_msrs_to_save;
622 622
623 static u32 emulated_msrs[] = { 623 static u32 emulated_msrs[] = {
624 MSR_IA32_MISC_ENABLE, 624 MSR_IA32_MISC_ENABLE,
625 }; 625 };
626 626
627 static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 627 static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
628 { 628 {
629 if (efer & efer_reserved_bits) { 629 if (efer & efer_reserved_bits) {
630 kvm_inject_gp(vcpu, 0); 630 kvm_inject_gp(vcpu, 0);
631 return; 631 return;
632 } 632 }
633 633
634 if (is_paging(vcpu) 634 if (is_paging(vcpu)
635 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 635 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
636 kvm_inject_gp(vcpu, 0); 636 kvm_inject_gp(vcpu, 0);
637 return; 637 return;
638 } 638 }
639 639
640 if (efer & EFER_FFXSR) { 640 if (efer & EFER_FFXSR) {
641 struct kvm_cpuid_entry2 *feat; 641 struct kvm_cpuid_entry2 *feat;
642 642
643 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 643 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
644 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 644 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
645 kvm_inject_gp(vcpu, 0); 645 kvm_inject_gp(vcpu, 0);
646 return; 646 return;
647 } 647 }
648 } 648 }
649 649
650 if (efer & EFER_SVME) { 650 if (efer & EFER_SVME) {
651 struct kvm_cpuid_entry2 *feat; 651 struct kvm_cpuid_entry2 *feat;
652 652
653 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 653 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
654 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 654 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
655 kvm_inject_gp(vcpu, 0); 655 kvm_inject_gp(vcpu, 0);
656 return; 656 return;
657 } 657 }
658 } 658 }
659 659
660 kvm_x86_ops->set_efer(vcpu, efer); 660 kvm_x86_ops->set_efer(vcpu, efer);
661 661
662 efer &= ~EFER_LMA; 662 efer &= ~EFER_LMA;
663 efer |= vcpu->arch.efer & EFER_LMA; 663 efer |= vcpu->arch.efer & EFER_LMA;
664 664
665 vcpu->arch.efer = efer; 665 vcpu->arch.efer = efer;
666 666
667 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 667 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
668 kvm_mmu_reset_context(vcpu); 668 kvm_mmu_reset_context(vcpu);
669 } 669 }
670 670
671 void kvm_enable_efer_bits(u64 mask) 671 void kvm_enable_efer_bits(u64 mask)
672 { 672 {
673 efer_reserved_bits &= ~mask; 673 efer_reserved_bits &= ~mask;
674 } 674 }
675 EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); 675 EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
676 676
677 677
678 /* 678 /*
679 * Writes msr value into into the appropriate "register". 679 * Writes msr value into into the appropriate "register".
680 * Returns 0 on success, non-0 otherwise. 680 * Returns 0 on success, non-0 otherwise.
681 * Assumes vcpu_load() was already called. 681 * Assumes vcpu_load() was already called.
682 */ 682 */
683 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 683 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
684 { 684 {
685 return kvm_x86_ops->set_msr(vcpu, msr_index, data); 685 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
686 } 686 }
687 687
688 /* 688 /*
689 * Adapt set_msr() to msr_io()'s calling convention 689 * Adapt set_msr() to msr_io()'s calling convention
690 */ 690 */
691 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) 691 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
692 { 692 {
693 return kvm_set_msr(vcpu, index, *data); 693 return kvm_set_msr(vcpu, index, *data);
694 } 694 }
695 695
696 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) 696 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
697 { 697 {
698 static int version; 698 static int version;
699 struct pvclock_wall_clock wc; 699 struct pvclock_wall_clock wc;
700 struct timespec boot; 700 struct timespec boot;
701 701
702 if (!wall_clock) 702 if (!wall_clock)
703 return; 703 return;
704 704
705 version++; 705 version++;
706 706
707 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 707 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
708 708
709 /* 709 /*
710 * The guest calculates current wall clock time by adding 710 * The guest calculates current wall clock time by adding
711 * system time (updated by kvm_write_guest_time below) to the 711 * system time (updated by kvm_write_guest_time below) to the
712 * wall clock specified here. guest system time equals host 712 * wall clock specified here. guest system time equals host
713 * system time for us, thus we must fill in host boot time here. 713 * system time for us, thus we must fill in host boot time here.
714 */ 714 */
715 getboottime(&boot); 715 getboottime(&boot);
716 716
717 wc.sec = boot.tv_sec; 717 wc.sec = boot.tv_sec;
718 wc.nsec = boot.tv_nsec; 718 wc.nsec = boot.tv_nsec;
719 wc.version = version; 719 wc.version = version;
720 720
721 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); 721 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
722 722
723 version++; 723 version++;
724 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 724 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
725 } 725 }
726 726
727 static uint32_t div_frac(uint32_t dividend, uint32_t divisor) 727 static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
728 { 728 {
729 uint32_t quotient, remainder; 729 uint32_t quotient, remainder;
730 730
731 /* Don't try to replace with do_div(), this one calculates 731 /* Don't try to replace with do_div(), this one calculates
732 * "(dividend << 32) / divisor" */ 732 * "(dividend << 32) / divisor" */
733 __asm__ ( "divl %4" 733 __asm__ ( "divl %4"
734 : "=a" (quotient), "=d" (remainder) 734 : "=a" (quotient), "=d" (remainder)
735 : "0" (0), "1" (dividend), "r" (divisor) ); 735 : "0" (0), "1" (dividend), "r" (divisor) );
736 return quotient; 736 return quotient;
737 } 737 }
738 738
739 static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock) 739 static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
740 { 740 {
741 uint64_t nsecs = 1000000000LL; 741 uint64_t nsecs = 1000000000LL;
742 int32_t shift = 0; 742 int32_t shift = 0;
743 uint64_t tps64; 743 uint64_t tps64;
744 uint32_t tps32; 744 uint32_t tps32;
745 745
746 tps64 = tsc_khz * 1000LL; 746 tps64 = tsc_khz * 1000LL;
747 while (tps64 > nsecs*2) { 747 while (tps64 > nsecs*2) {
748 tps64 >>= 1; 748 tps64 >>= 1;
749 shift--; 749 shift--;
750 } 750 }
751 751
752 tps32 = (uint32_t)tps64; 752 tps32 = (uint32_t)tps64;
753 while (tps32 <= (uint32_t)nsecs) { 753 while (tps32 <= (uint32_t)nsecs) {
754 tps32 <<= 1; 754 tps32 <<= 1;
755 shift++; 755 shift++;
756 } 756 }
757 757
758 hv_clock->tsc_shift = shift; 758 hv_clock->tsc_shift = shift;
759 hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32); 759 hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
760 760
761 pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n", 761 pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
762 __func__, tsc_khz, hv_clock->tsc_shift, 762 __func__, tsc_khz, hv_clock->tsc_shift,
763 hv_clock->tsc_to_system_mul); 763 hv_clock->tsc_to_system_mul);
764 } 764 }
765 765
766 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); 766 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
767 767
768 static void kvm_write_guest_time(struct kvm_vcpu *v) 768 static void kvm_write_guest_time(struct kvm_vcpu *v)
769 { 769 {
770 struct timespec ts; 770 struct timespec ts;
771 unsigned long flags; 771 unsigned long flags;
772 struct kvm_vcpu_arch *vcpu = &v->arch; 772 struct kvm_vcpu_arch *vcpu = &v->arch;
773 void *shared_kaddr; 773 void *shared_kaddr;
774 unsigned long this_tsc_khz; 774 unsigned long this_tsc_khz;
775 775
776 if ((!vcpu->time_page)) 776 if ((!vcpu->time_page))
777 return; 777 return;
778 778
779 this_tsc_khz = get_cpu_var(cpu_tsc_khz); 779 this_tsc_khz = get_cpu_var(cpu_tsc_khz);
780 if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) { 780 if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) {
781 kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); 781 kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock);
782 vcpu->hv_clock_tsc_khz = this_tsc_khz; 782 vcpu->hv_clock_tsc_khz = this_tsc_khz;
783 } 783 }
784 put_cpu_var(cpu_tsc_khz); 784 put_cpu_var(cpu_tsc_khz);
785 785
786 /* Keep irq disabled to prevent changes to the clock */ 786 /* Keep irq disabled to prevent changes to the clock */
787 local_irq_save(flags); 787 local_irq_save(flags);
788 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); 788 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
789 ktime_get_ts(&ts); 789 ktime_get_ts(&ts);
790 monotonic_to_bootbased(&ts); 790 monotonic_to_bootbased(&ts);
791 local_irq_restore(flags); 791 local_irq_restore(flags);
792 792
793 /* With all the info we got, fill in the values */ 793 /* With all the info we got, fill in the values */
794 794
795 vcpu->hv_clock.system_time = ts.tv_nsec + 795 vcpu->hv_clock.system_time = ts.tv_nsec +
796 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; 796 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
797 797
798 /* 798 /*
799 * The interface expects us to write an even number signaling that the 799 * The interface expects us to write an even number signaling that the
800 * update is finished. Since the guest won't see the intermediate 800 * update is finished. Since the guest won't see the intermediate
801 * state, we just increase by 2 at the end. 801 * state, we just increase by 2 at the end.
802 */ 802 */
803 vcpu->hv_clock.version += 2; 803 vcpu->hv_clock.version += 2;
804 804
805 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); 805 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
806 806
807 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, 807 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
808 sizeof(vcpu->hv_clock)); 808 sizeof(vcpu->hv_clock));
809 809
810 kunmap_atomic(shared_kaddr, KM_USER0); 810 kunmap_atomic(shared_kaddr, KM_USER0);
811 811
812 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 812 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
813 } 813 }
814 814
815 static int kvm_request_guest_time_update(struct kvm_vcpu *v) 815 static int kvm_request_guest_time_update(struct kvm_vcpu *v)
816 { 816 {
817 struct kvm_vcpu_arch *vcpu = &v->arch; 817 struct kvm_vcpu_arch *vcpu = &v->arch;
818 818
819 if (!vcpu->time_page) 819 if (!vcpu->time_page)
820 return 0; 820 return 0;
821 set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); 821 set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
822 return 1; 822 return 1;
823 } 823 }
824 824
825 static bool msr_mtrr_valid(unsigned msr) 825 static bool msr_mtrr_valid(unsigned msr)
826 { 826 {
827 switch (msr) { 827 switch (msr) {
828 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: 828 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
829 case MSR_MTRRfix64K_00000: 829 case MSR_MTRRfix64K_00000:
830 case MSR_MTRRfix16K_80000: 830 case MSR_MTRRfix16K_80000:
831 case MSR_MTRRfix16K_A0000: 831 case MSR_MTRRfix16K_A0000:
832 case MSR_MTRRfix4K_C0000: 832 case MSR_MTRRfix4K_C0000:
833 case MSR_MTRRfix4K_C8000: 833 case MSR_MTRRfix4K_C8000:
834 case MSR_MTRRfix4K_D0000: 834 case MSR_MTRRfix4K_D0000:
835 case MSR_MTRRfix4K_D8000: 835 case MSR_MTRRfix4K_D8000:
836 case MSR_MTRRfix4K_E0000: 836 case MSR_MTRRfix4K_E0000:
837 case MSR_MTRRfix4K_E8000: 837 case MSR_MTRRfix4K_E8000:
838 case MSR_MTRRfix4K_F0000: 838 case MSR_MTRRfix4K_F0000:
839 case MSR_MTRRfix4K_F8000: 839 case MSR_MTRRfix4K_F8000:
840 case MSR_MTRRdefType: 840 case MSR_MTRRdefType:
841 case MSR_IA32_CR_PAT: 841 case MSR_IA32_CR_PAT:
842 return true; 842 return true;
843 case 0x2f8: 843 case 0x2f8:
844 return true; 844 return true;
845 } 845 }
846 return false; 846 return false;
847 } 847 }
848 848
849 static bool valid_pat_type(unsigned t) 849 static bool valid_pat_type(unsigned t)
850 { 850 {
851 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ 851 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
852 } 852 }
853 853
854 static bool valid_mtrr_type(unsigned t) 854 static bool valid_mtrr_type(unsigned t)
855 { 855 {
856 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ 856 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
857 } 857 }
858 858
859 static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) 859 static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
860 { 860 {
861 int i; 861 int i;
862 862
863 if (!msr_mtrr_valid(msr)) 863 if (!msr_mtrr_valid(msr))
864 return false; 864 return false;
865 865
866 if (msr == MSR_IA32_CR_PAT) { 866 if (msr == MSR_IA32_CR_PAT) {
867 for (i = 0; i < 8; i++) 867 for (i = 0; i < 8; i++)
868 if (!valid_pat_type((data >> (i * 8)) & 0xff)) 868 if (!valid_pat_type((data >> (i * 8)) & 0xff))
869 return false; 869 return false;
870 return true; 870 return true;
871 } else if (msr == MSR_MTRRdefType) { 871 } else if (msr == MSR_MTRRdefType) {
872 if (data & ~0xcff) 872 if (data & ~0xcff)
873 return false; 873 return false;
874 return valid_mtrr_type(data & 0xff); 874 return valid_mtrr_type(data & 0xff);
875 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) { 875 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
876 for (i = 0; i < 8 ; i++) 876 for (i = 0; i < 8 ; i++)
877 if (!valid_mtrr_type((data >> (i * 8)) & 0xff)) 877 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
878 return false; 878 return false;
879 return true; 879 return true;
880 } 880 }
881 881
882 /* variable MTRRs */ 882 /* variable MTRRs */
883 return valid_mtrr_type(data & 0xff); 883 return valid_mtrr_type(data & 0xff);
884 } 884 }
885 885
886 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 886 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
887 { 887 {
888 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 888 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
889 889
890 if (!mtrr_valid(vcpu, msr, data)) 890 if (!mtrr_valid(vcpu, msr, data))
891 return 1; 891 return 1;
892 892
893 if (msr == MSR_MTRRdefType) { 893 if (msr == MSR_MTRRdefType) {
894 vcpu->arch.mtrr_state.def_type = data; 894 vcpu->arch.mtrr_state.def_type = data;
895 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; 895 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
896 } else if (msr == MSR_MTRRfix64K_00000) 896 } else if (msr == MSR_MTRRfix64K_00000)
897 p[0] = data; 897 p[0] = data;
898 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) 898 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
899 p[1 + msr - MSR_MTRRfix16K_80000] = data; 899 p[1 + msr - MSR_MTRRfix16K_80000] = data;
900 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) 900 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
901 p[3 + msr - MSR_MTRRfix4K_C0000] = data; 901 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
902 else if (msr == MSR_IA32_CR_PAT) 902 else if (msr == MSR_IA32_CR_PAT)
903 vcpu->arch.pat = data; 903 vcpu->arch.pat = data;
904 else { /* Variable MTRRs */ 904 else { /* Variable MTRRs */
905 int idx, is_mtrr_mask; 905 int idx, is_mtrr_mask;
906 u64 *pt; 906 u64 *pt;
907 907
908 idx = (msr - 0x200) / 2; 908 idx = (msr - 0x200) / 2;
909 is_mtrr_mask = msr - 0x200 - 2 * idx; 909 is_mtrr_mask = msr - 0x200 - 2 * idx;
910 if (!is_mtrr_mask) 910 if (!is_mtrr_mask)
911 pt = 911 pt =
912 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; 912 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
913 else 913 else
914 pt = 914 pt =
915 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; 915 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
916 *pt = data; 916 *pt = data;
917 } 917 }
918 918
919 kvm_mmu_reset_context(vcpu); 919 kvm_mmu_reset_context(vcpu);
920 return 0; 920 return 0;
921 } 921 }
922 922
923 static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) 923 static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
924 { 924 {
925 u64 mcg_cap = vcpu->arch.mcg_cap; 925 u64 mcg_cap = vcpu->arch.mcg_cap;
926 unsigned bank_num = mcg_cap & 0xff; 926 unsigned bank_num = mcg_cap & 0xff;
927 927
928 switch (msr) { 928 switch (msr) {
929 case MSR_IA32_MCG_STATUS: 929 case MSR_IA32_MCG_STATUS:
930 vcpu->arch.mcg_status = data; 930 vcpu->arch.mcg_status = data;
931 break; 931 break;
932 case MSR_IA32_MCG_CTL: 932 case MSR_IA32_MCG_CTL:
933 if (!(mcg_cap & MCG_CTL_P)) 933 if (!(mcg_cap & MCG_CTL_P))
934 return 1; 934 return 1;
935 if (data != 0 && data != ~(u64)0) 935 if (data != 0 && data != ~(u64)0)
936 return -1; 936 return -1;
937 vcpu->arch.mcg_ctl = data; 937 vcpu->arch.mcg_ctl = data;
938 break; 938 break;
939 default: 939 default:
940 if (msr >= MSR_IA32_MC0_CTL && 940 if (msr >= MSR_IA32_MC0_CTL &&
941 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 941 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
942 u32 offset = msr - MSR_IA32_MC0_CTL; 942 u32 offset = msr - MSR_IA32_MC0_CTL;
943 /* only 0 or all 1s can be written to IA32_MCi_CTL 943 /* only 0 or all 1s can be written to IA32_MCi_CTL
944 * some Linux kernels though clear bit 10 in bank 4 to 944 * some Linux kernels though clear bit 10 in bank 4 to
945 * workaround a BIOS/GART TBL issue on AMD K8s, ignore 945 * workaround a BIOS/GART TBL issue on AMD K8s, ignore
946 * this to avoid an uncatched #GP in the guest 946 * this to avoid an uncatched #GP in the guest
947 */ 947 */
948 if ((offset & 0x3) == 0 && 948 if ((offset & 0x3) == 0 &&
949 data != 0 && (data | (1 << 10)) != ~(u64)0) 949 data != 0 && (data | (1 << 10)) != ~(u64)0)
950 return -1; 950 return -1;
951 vcpu->arch.mce_banks[offset] = data; 951 vcpu->arch.mce_banks[offset] = data;
952 break; 952 break;
953 } 953 }
954 return 1; 954 return 1;
955 } 955 }
956 return 0; 956 return 0;
957 } 957 }
958 958
959 static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) 959 static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
960 { 960 {
961 struct kvm *kvm = vcpu->kvm; 961 struct kvm *kvm = vcpu->kvm;
962 int lm = is_long_mode(vcpu); 962 int lm = is_long_mode(vcpu);
963 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 963 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
964 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; 964 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
965 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 965 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
966 : kvm->arch.xen_hvm_config.blob_size_32; 966 : kvm->arch.xen_hvm_config.blob_size_32;
967 u32 page_num = data & ~PAGE_MASK; 967 u32 page_num = data & ~PAGE_MASK;
968 u64 page_addr = data & PAGE_MASK; 968 u64 page_addr = data & PAGE_MASK;
969 u8 *page; 969 u8 *page;
970 int r; 970 int r;
971 971
972 r = -E2BIG; 972 r = -E2BIG;
973 if (page_num >= blob_size) 973 if (page_num >= blob_size)
974 goto out; 974 goto out;
975 r = -ENOMEM; 975 r = -ENOMEM;
976 page = kzalloc(PAGE_SIZE, GFP_KERNEL); 976 page = kzalloc(PAGE_SIZE, GFP_KERNEL);
977 if (!page) 977 if (!page)
978 goto out; 978 goto out;
979 r = -EFAULT; 979 r = -EFAULT;
980 if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) 980 if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
981 goto out_free; 981 goto out_free;
982 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) 982 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
983 goto out_free; 983 goto out_free;
984 r = 0; 984 r = 0;
985 out_free: 985 out_free:
986 kfree(page); 986 kfree(page);
987 out: 987 out:
988 return r; 988 return r;
989 } 989 }
990 990
991 static bool kvm_hv_hypercall_enabled(struct kvm *kvm) 991 static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
992 { 992 {
993 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; 993 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
994 } 994 }
995 995
996 static bool kvm_hv_msr_partition_wide(u32 msr) 996 static bool kvm_hv_msr_partition_wide(u32 msr)
997 { 997 {
998 bool r = false; 998 bool r = false;
999 switch (msr) { 999 switch (msr) {
1000 case HV_X64_MSR_GUEST_OS_ID: 1000 case HV_X64_MSR_GUEST_OS_ID:
1001 case HV_X64_MSR_HYPERCALL: 1001 case HV_X64_MSR_HYPERCALL:
1002 r = true; 1002 r = true;
1003 break; 1003 break;
1004 } 1004 }
1005 1005
1006 return r; 1006 return r;
1007 } 1007 }
1008 1008
1009 static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1009 static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1010 { 1010 {
1011 struct kvm *kvm = vcpu->kvm; 1011 struct kvm *kvm = vcpu->kvm;
1012 1012
1013 switch (msr) { 1013 switch (msr) {
1014 case HV_X64_MSR_GUEST_OS_ID: 1014 case HV_X64_MSR_GUEST_OS_ID:
1015 kvm->arch.hv_guest_os_id = data; 1015 kvm->arch.hv_guest_os_id = data;
1016 /* setting guest os id to zero disables hypercall page */ 1016 /* setting guest os id to zero disables hypercall page */
1017 if (!kvm->arch.hv_guest_os_id) 1017 if (!kvm->arch.hv_guest_os_id)
1018 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; 1018 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1019 break; 1019 break;
1020 case HV_X64_MSR_HYPERCALL: { 1020 case HV_X64_MSR_HYPERCALL: {
1021 u64 gfn; 1021 u64 gfn;
1022 unsigned long addr; 1022 unsigned long addr;
1023 u8 instructions[4]; 1023 u8 instructions[4];
1024 1024
1025 /* if guest os id is not set hypercall should remain disabled */ 1025 /* if guest os id is not set hypercall should remain disabled */
1026 if (!kvm->arch.hv_guest_os_id) 1026 if (!kvm->arch.hv_guest_os_id)
1027 break; 1027 break;
1028 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { 1028 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1029 kvm->arch.hv_hypercall = data; 1029 kvm->arch.hv_hypercall = data;
1030 break; 1030 break;
1031 } 1031 }
1032 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; 1032 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1033 addr = gfn_to_hva(kvm, gfn); 1033 addr = gfn_to_hva(kvm, gfn);
1034 if (kvm_is_error_hva(addr)) 1034 if (kvm_is_error_hva(addr))
1035 return 1; 1035 return 1;
1036 kvm_x86_ops->patch_hypercall(vcpu, instructions); 1036 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1037 ((unsigned char *)instructions)[3] = 0xc3; /* ret */ 1037 ((unsigned char *)instructions)[3] = 0xc3; /* ret */
1038 if (copy_to_user((void __user *)addr, instructions, 4)) 1038 if (copy_to_user((void __user *)addr, instructions, 4))
1039 return 1; 1039 return 1;
1040 kvm->arch.hv_hypercall = data; 1040 kvm->arch.hv_hypercall = data;
1041 break; 1041 break;
1042 } 1042 }
1043 default: 1043 default:
1044 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " 1044 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1045 "data 0x%llx\n", msr, data); 1045 "data 0x%llx\n", msr, data);
1046 return 1; 1046 return 1;
1047 } 1047 }
1048 return 0; 1048 return 0;
1049 } 1049 }
1050 1050
1051 static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1051 static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1052 { 1052 {
1053 switch (msr) { 1053 switch (msr) {
1054 case HV_X64_MSR_APIC_ASSIST_PAGE: { 1054 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1055 unsigned long addr; 1055 unsigned long addr;
1056 1056
1057 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { 1057 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1058 vcpu->arch.hv_vapic = data; 1058 vcpu->arch.hv_vapic = data;
1059 break; 1059 break;
1060 } 1060 }
1061 addr = gfn_to_hva(vcpu->kvm, data >> 1061 addr = gfn_to_hva(vcpu->kvm, data >>
1062 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); 1062 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1063 if (kvm_is_error_hva(addr)) 1063 if (kvm_is_error_hva(addr))
1064 return 1; 1064 return 1;
1065 if (clear_user((void __user *)addr, PAGE_SIZE)) 1065 if (clear_user((void __user *)addr, PAGE_SIZE))
1066 return 1; 1066 return 1;
1067 vcpu->arch.hv_vapic = data; 1067 vcpu->arch.hv_vapic = data;
1068 break; 1068 break;
1069 } 1069 }
1070 case HV_X64_MSR_EOI: 1070 case HV_X64_MSR_EOI:
1071 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); 1071 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1072 case HV_X64_MSR_ICR: 1072 case HV_X64_MSR_ICR:
1073 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); 1073 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1074 case HV_X64_MSR_TPR: 1074 case HV_X64_MSR_TPR:
1075 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); 1075 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1076 default: 1076 default:
1077 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " 1077 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1078 "data 0x%llx\n", msr, data); 1078 "data 0x%llx\n", msr, data);
1079 return 1; 1079 return 1;
1080 } 1080 }
1081 1081
1082 return 0; 1082 return 0;
1083 } 1083 }
1084 1084
1085 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1085 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1086 { 1086 {
1087 switch (msr) { 1087 switch (msr) {
1088 case MSR_EFER: 1088 case MSR_EFER:
1089 set_efer(vcpu, data); 1089 set_efer(vcpu, data);
1090 break; 1090 break;
1091 case MSR_K7_HWCR: 1091 case MSR_K7_HWCR:
1092 data &= ~(u64)0x40; /* ignore flush filter disable */ 1092 data &= ~(u64)0x40; /* ignore flush filter disable */
1093 if (data != 0) { 1093 if (data != 0) {
1094 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 1094 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1095 data); 1095 data);
1096 return 1; 1096 return 1;
1097 } 1097 }
1098 break; 1098 break;
1099 case MSR_FAM10H_MMIO_CONF_BASE: 1099 case MSR_FAM10H_MMIO_CONF_BASE:
1100 if (data != 0) { 1100 if (data != 0) {
1101 pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: " 1101 pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
1102 "0x%llx\n", data); 1102 "0x%llx\n", data);
1103 return 1; 1103 return 1;
1104 } 1104 }
1105 break; 1105 break;
1106 case MSR_AMD64_NB_CFG: 1106 case MSR_AMD64_NB_CFG:
1107 break; 1107 break;
1108 case MSR_IA32_DEBUGCTLMSR: 1108 case MSR_IA32_DEBUGCTLMSR:
1109 if (!data) { 1109 if (!data) {
1110 /* We support the non-activated case already */ 1110 /* We support the non-activated case already */
1111 break; 1111 break;
1112 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) { 1112 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
1113 /* Values other than LBR and BTF are vendor-specific, 1113 /* Values other than LBR and BTF are vendor-specific,
1114 thus reserved and should throw a #GP */ 1114 thus reserved and should throw a #GP */
1115 return 1; 1115 return 1;
1116 } 1116 }
1117 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", 1117 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1118 __func__, data); 1118 __func__, data);
1119 break; 1119 break;
1120 case MSR_IA32_UCODE_REV: 1120 case MSR_IA32_UCODE_REV:
1121 case MSR_IA32_UCODE_WRITE: 1121 case MSR_IA32_UCODE_WRITE:
1122 case MSR_VM_HSAVE_PA: 1122 case MSR_VM_HSAVE_PA:
1123 case MSR_AMD64_PATCH_LOADER: 1123 case MSR_AMD64_PATCH_LOADER:
1124 break; 1124 break;
1125 case 0x200 ... 0x2ff: 1125 case 0x200 ... 0x2ff:
1126 return set_msr_mtrr(vcpu, msr, data); 1126 return set_msr_mtrr(vcpu, msr, data);
1127 case MSR_IA32_APICBASE: 1127 case MSR_IA32_APICBASE:
1128 kvm_set_apic_base(vcpu, data); 1128 kvm_set_apic_base(vcpu, data);
1129 break; 1129 break;
1130 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: 1130 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1131 return kvm_x2apic_msr_write(vcpu, msr, data); 1131 return kvm_x2apic_msr_write(vcpu, msr, data);
1132 case MSR_IA32_MISC_ENABLE: 1132 case MSR_IA32_MISC_ENABLE:
1133 vcpu->arch.ia32_misc_enable_msr = data; 1133 vcpu->arch.ia32_misc_enable_msr = data;
1134 break; 1134 break;
1135 case MSR_KVM_WALL_CLOCK: 1135 case MSR_KVM_WALL_CLOCK:
1136 vcpu->kvm->arch.wall_clock = data; 1136 vcpu->kvm->arch.wall_clock = data;
1137 kvm_write_wall_clock(vcpu->kvm, data); 1137 kvm_write_wall_clock(vcpu->kvm, data);
1138 break; 1138 break;
1139 case MSR_KVM_SYSTEM_TIME: { 1139 case MSR_KVM_SYSTEM_TIME: {
1140 if (vcpu->arch.time_page) { 1140 if (vcpu->arch.time_page) {
1141 kvm_release_page_dirty(vcpu->arch.time_page); 1141 kvm_release_page_dirty(vcpu->arch.time_page);
1142 vcpu->arch.time_page = NULL; 1142 vcpu->arch.time_page = NULL;
1143 } 1143 }
1144 1144
1145 vcpu->arch.time = data; 1145 vcpu->arch.time = data;
1146 1146
1147 /* we verify if the enable bit is set... */ 1147 /* we verify if the enable bit is set... */
1148 if (!(data & 1)) 1148 if (!(data & 1))
1149 break; 1149 break;
1150 1150
1151 /* ...but clean it before doing the actual write */ 1151 /* ...but clean it before doing the actual write */
1152 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); 1152 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1153 1153
1154 vcpu->arch.time_page = 1154 vcpu->arch.time_page =
1155 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); 1155 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
1156 1156
1157 if (is_error_page(vcpu->arch.time_page)) { 1157 if (is_error_page(vcpu->arch.time_page)) {
1158 kvm_release_page_clean(vcpu->arch.time_page); 1158 kvm_release_page_clean(vcpu->arch.time_page);
1159 vcpu->arch.time_page = NULL; 1159 vcpu->arch.time_page = NULL;
1160 } 1160 }
1161 1161
1162 kvm_request_guest_time_update(vcpu); 1162 kvm_request_guest_time_update(vcpu);
1163 break; 1163 break;
1164 } 1164 }
1165 case MSR_IA32_MCG_CTL: 1165 case MSR_IA32_MCG_CTL:
1166 case MSR_IA32_MCG_STATUS: 1166 case MSR_IA32_MCG_STATUS:
1167 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1167 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1168 return set_msr_mce(vcpu, msr, data); 1168 return set_msr_mce(vcpu, msr, data);
1169 1169
1170 /* Performance counters are not protected by a CPUID bit, 1170 /* Performance counters are not protected by a CPUID bit,
1171 * so we should check all of them in the generic path for the sake of 1171 * so we should check all of them in the generic path for the sake of
1172 * cross vendor migration. 1172 * cross vendor migration.
1173 * Writing a zero into the event select MSRs disables them, 1173 * Writing a zero into the event select MSRs disables them,
1174 * which we perfectly emulate ;-). Any other value should be at least 1174 * which we perfectly emulate ;-). Any other value should be at least
1175 * reported, some guests depend on them. 1175 * reported, some guests depend on them.
1176 */ 1176 */
1177 case MSR_P6_EVNTSEL0: 1177 case MSR_P6_EVNTSEL0:
1178 case MSR_P6_EVNTSEL1: 1178 case MSR_P6_EVNTSEL1:
1179 case MSR_K7_EVNTSEL0: 1179 case MSR_K7_EVNTSEL0:
1180 case MSR_K7_EVNTSEL1: 1180 case MSR_K7_EVNTSEL1:
1181 case MSR_K7_EVNTSEL2: 1181 case MSR_K7_EVNTSEL2:
1182 case MSR_K7_EVNTSEL3: 1182 case MSR_K7_EVNTSEL3:
1183 if (data != 0) 1183 if (data != 0)
1184 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1184 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1185 "0x%x data 0x%llx\n", msr, data); 1185 "0x%x data 0x%llx\n", msr, data);
1186 break; 1186 break;
1187 /* at least RHEL 4 unconditionally writes to the perfctr registers, 1187 /* at least RHEL 4 unconditionally writes to the perfctr registers,
1188 * so we ignore writes to make it happy. 1188 * so we ignore writes to make it happy.
1189 */ 1189 */
1190 case MSR_P6_PERFCTR0: 1190 case MSR_P6_PERFCTR0:
1191 case MSR_P6_PERFCTR1: 1191 case MSR_P6_PERFCTR1:
1192 case MSR_K7_PERFCTR0: 1192 case MSR_K7_PERFCTR0:
1193 case MSR_K7_PERFCTR1: 1193 case MSR_K7_PERFCTR1:
1194 case MSR_K7_PERFCTR2: 1194 case MSR_K7_PERFCTR2:
1195 case MSR_K7_PERFCTR3: 1195 case MSR_K7_PERFCTR3:
1196 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1196 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1197 "0x%x data 0x%llx\n", msr, data); 1197 "0x%x data 0x%llx\n", msr, data);
1198 break; 1198 break;
1199 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 1199 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1200 if (kvm_hv_msr_partition_wide(msr)) { 1200 if (kvm_hv_msr_partition_wide(msr)) {
1201 int r; 1201 int r;
1202 mutex_lock(&vcpu->kvm->lock); 1202 mutex_lock(&vcpu->kvm->lock);
1203 r = set_msr_hyperv_pw(vcpu, msr, data); 1203 r = set_msr_hyperv_pw(vcpu, msr, data);
1204 mutex_unlock(&vcpu->kvm->lock); 1204 mutex_unlock(&vcpu->kvm->lock);
1205 return r; 1205 return r;
1206 } else 1206 } else
1207 return set_msr_hyperv(vcpu, msr, data); 1207 return set_msr_hyperv(vcpu, msr, data);
1208 break; 1208 break;
1209 default: 1209 default:
1210 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) 1210 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1211 return xen_hvm_config(vcpu, data); 1211 return xen_hvm_config(vcpu, data);
1212 if (!ignore_msrs) { 1212 if (!ignore_msrs) {
1213 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", 1213 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
1214 msr, data); 1214 msr, data);
1215 return 1; 1215 return 1;
1216 } else { 1216 } else {
1217 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", 1217 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
1218 msr, data); 1218 msr, data);
1219 break; 1219 break;
1220 } 1220 }
1221 } 1221 }
1222 return 0; 1222 return 0;
1223 } 1223 }
1224 EXPORT_SYMBOL_GPL(kvm_set_msr_common); 1224 EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1225 1225
1226 1226
1227 /* 1227 /*
1228 * Reads an msr value (of 'msr_index') into 'pdata'. 1228 * Reads an msr value (of 'msr_index') into 'pdata'.
1229 * Returns 0 on success, non-0 otherwise. 1229 * Returns 0 on success, non-0 otherwise.
1230 * Assumes vcpu_load() was already called. 1230 * Assumes vcpu_load() was already called.
1231 */ 1231 */
1232 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 1232 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1233 { 1233 {
1234 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); 1234 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
1235 } 1235 }
1236 1236
1237 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1237 static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1238 { 1238 {
1239 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 1239 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1240 1240
1241 if (!msr_mtrr_valid(msr)) 1241 if (!msr_mtrr_valid(msr))
1242 return 1; 1242 return 1;
1243 1243
1244 if (msr == MSR_MTRRdefType) 1244 if (msr == MSR_MTRRdefType)
1245 *pdata = vcpu->arch.mtrr_state.def_type + 1245 *pdata = vcpu->arch.mtrr_state.def_type +
1246 (vcpu->arch.mtrr_state.enabled << 10); 1246 (vcpu->arch.mtrr_state.enabled << 10);
1247 else if (msr == MSR_MTRRfix64K_00000) 1247 else if (msr == MSR_MTRRfix64K_00000)
1248 *pdata = p[0]; 1248 *pdata = p[0];
1249 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) 1249 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1250 *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; 1250 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
1251 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) 1251 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1252 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; 1252 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
1253 else if (msr == MSR_IA32_CR_PAT) 1253 else if (msr == MSR_IA32_CR_PAT)
1254 *pdata = vcpu->arch.pat; 1254 *pdata = vcpu->arch.pat;
1255 else { /* Variable MTRRs */ 1255 else { /* Variable MTRRs */
1256 int idx, is_mtrr_mask; 1256 int idx, is_mtrr_mask;
1257 u64 *pt; 1257 u64 *pt;
1258 1258
1259 idx = (msr - 0x200) / 2; 1259 idx = (msr - 0x200) / 2;
1260 is_mtrr_mask = msr - 0x200 - 2 * idx; 1260 is_mtrr_mask = msr - 0x200 - 2 * idx;
1261 if (!is_mtrr_mask) 1261 if (!is_mtrr_mask)
1262 pt = 1262 pt =
1263 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; 1263 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1264 else 1264 else
1265 pt = 1265 pt =
1266 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; 1266 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1267 *pdata = *pt; 1267 *pdata = *pt;
1268 } 1268 }
1269 1269
1270 return 0; 1270 return 0;
1271 } 1271 }
1272 1272
1273 static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1273 static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1274 { 1274 {
1275 u64 data; 1275 u64 data;
1276 u64 mcg_cap = vcpu->arch.mcg_cap; 1276 u64 mcg_cap = vcpu->arch.mcg_cap;
1277 unsigned bank_num = mcg_cap & 0xff; 1277 unsigned bank_num = mcg_cap & 0xff;
1278 1278
1279 switch (msr) { 1279 switch (msr) {
1280 case MSR_IA32_P5_MC_ADDR: 1280 case MSR_IA32_P5_MC_ADDR:
1281 case MSR_IA32_P5_MC_TYPE: 1281 case MSR_IA32_P5_MC_TYPE:
1282 data = 0; 1282 data = 0;
1283 break; 1283 break;
1284 case MSR_IA32_MCG_CAP: 1284 case MSR_IA32_MCG_CAP:
1285 data = vcpu->arch.mcg_cap; 1285 data = vcpu->arch.mcg_cap;
1286 break; 1286 break;
1287 case MSR_IA32_MCG_CTL: 1287 case MSR_IA32_MCG_CTL:
1288 if (!(mcg_cap & MCG_CTL_P)) 1288 if (!(mcg_cap & MCG_CTL_P))
1289 return 1; 1289 return 1;
1290 data = vcpu->arch.mcg_ctl; 1290 data = vcpu->arch.mcg_ctl;
1291 break; 1291 break;
1292 case MSR_IA32_MCG_STATUS: 1292 case MSR_IA32_MCG_STATUS:
1293 data = vcpu->arch.mcg_status; 1293 data = vcpu->arch.mcg_status;
1294 break; 1294 break;
1295 default: 1295 default:
1296 if (msr >= MSR_IA32_MC0_CTL && 1296 if (msr >= MSR_IA32_MC0_CTL &&
1297 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 1297 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1298 u32 offset = msr - MSR_IA32_MC0_CTL; 1298 u32 offset = msr - MSR_IA32_MC0_CTL;
1299 data = vcpu->arch.mce_banks[offset]; 1299 data = vcpu->arch.mce_banks[offset];
1300 break; 1300 break;
1301 } 1301 }
1302 return 1; 1302 return 1;
1303 } 1303 }
1304 *pdata = data; 1304 *pdata = data;
1305 return 0; 1305 return 0;
1306 } 1306 }
1307 1307
1308 static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1308 static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1309 { 1309 {
1310 u64 data = 0; 1310 u64 data = 0;
1311 struct kvm *kvm = vcpu->kvm; 1311 struct kvm *kvm = vcpu->kvm;
1312 1312
1313 switch (msr) { 1313 switch (msr) {
1314 case HV_X64_MSR_GUEST_OS_ID: 1314 case HV_X64_MSR_GUEST_OS_ID:
1315 data = kvm->arch.hv_guest_os_id; 1315 data = kvm->arch.hv_guest_os_id;
1316 break; 1316 break;
1317 case HV_X64_MSR_HYPERCALL: 1317 case HV_X64_MSR_HYPERCALL:
1318 data = kvm->arch.hv_hypercall; 1318 data = kvm->arch.hv_hypercall;
1319 break; 1319 break;
1320 default: 1320 default:
1321 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1321 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1322 return 1; 1322 return 1;
1323 } 1323 }
1324 1324
1325 *pdata = data; 1325 *pdata = data;
1326 return 0; 1326 return 0;
1327 } 1327 }
1328 1328
1329 static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1329 static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1330 { 1330 {
1331 u64 data = 0; 1331 u64 data = 0;
1332 1332
1333 switch (msr) { 1333 switch (msr) {
1334 case HV_X64_MSR_VP_INDEX: { 1334 case HV_X64_MSR_VP_INDEX: {
1335 int r; 1335 int r;
1336 struct kvm_vcpu *v; 1336 struct kvm_vcpu *v;
1337 kvm_for_each_vcpu(r, v, vcpu->kvm) 1337 kvm_for_each_vcpu(r, v, vcpu->kvm)
1338 if (v == vcpu) 1338 if (v == vcpu)
1339 data = r; 1339 data = r;
1340 break; 1340 break;
1341 } 1341 }
1342 case HV_X64_MSR_EOI: 1342 case HV_X64_MSR_EOI:
1343 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); 1343 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1344 case HV_X64_MSR_ICR: 1344 case HV_X64_MSR_ICR:
1345 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); 1345 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1346 case HV_X64_MSR_TPR: 1346 case HV_X64_MSR_TPR:
1347 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); 1347 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1348 default: 1348 default:
1349 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); 1349 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1350 return 1; 1350 return 1;
1351 } 1351 }
1352 *pdata = data; 1352 *pdata = data;
1353 return 0; 1353 return 0;
1354 } 1354 }
1355 1355
1356 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1356 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1357 { 1357 {
1358 u64 data; 1358 u64 data;
1359 1359
1360 switch (msr) { 1360 switch (msr) {
1361 case MSR_IA32_PLATFORM_ID: 1361 case MSR_IA32_PLATFORM_ID:
1362 case MSR_IA32_UCODE_REV: 1362 case MSR_IA32_UCODE_REV:
1363 case MSR_IA32_EBL_CR_POWERON: 1363 case MSR_IA32_EBL_CR_POWERON:
1364 case MSR_IA32_DEBUGCTLMSR: 1364 case MSR_IA32_DEBUGCTLMSR:
1365 case MSR_IA32_LASTBRANCHFROMIP: 1365 case MSR_IA32_LASTBRANCHFROMIP:
1366 case MSR_IA32_LASTBRANCHTOIP: 1366 case MSR_IA32_LASTBRANCHTOIP:
1367 case MSR_IA32_LASTINTFROMIP: 1367 case MSR_IA32_LASTINTFROMIP:
1368 case MSR_IA32_LASTINTTOIP: 1368 case MSR_IA32_LASTINTTOIP:
1369 case MSR_K8_SYSCFG: 1369 case MSR_K8_SYSCFG:
1370 case MSR_K7_HWCR: 1370 case MSR_K7_HWCR:
1371 case MSR_VM_HSAVE_PA: 1371 case MSR_VM_HSAVE_PA:
1372 case MSR_P6_PERFCTR0: 1372 case MSR_P6_PERFCTR0:
1373 case MSR_P6_PERFCTR1: 1373 case MSR_P6_PERFCTR1:
1374 case MSR_P6_EVNTSEL0: 1374 case MSR_P6_EVNTSEL0:
1375 case MSR_P6_EVNTSEL1: 1375 case MSR_P6_EVNTSEL1:
1376 case MSR_K7_EVNTSEL0: 1376 case MSR_K7_EVNTSEL0:
1377 case MSR_K7_PERFCTR0: 1377 case MSR_K7_PERFCTR0:
1378 case MSR_K8_INT_PENDING_MSG: 1378 case MSR_K8_INT_PENDING_MSG:
1379 case MSR_AMD64_NB_CFG: 1379 case MSR_AMD64_NB_CFG:
1380 case MSR_FAM10H_MMIO_CONF_BASE: 1380 case MSR_FAM10H_MMIO_CONF_BASE:
1381 data = 0; 1381 data = 0;
1382 break; 1382 break;
1383 case MSR_MTRRcap: 1383 case MSR_MTRRcap:
1384 data = 0x500 | KVM_NR_VAR_MTRR; 1384 data = 0x500 | KVM_NR_VAR_MTRR;
1385 break; 1385 break;
1386 case 0x200 ... 0x2ff: 1386 case 0x200 ... 0x2ff:
1387 return get_msr_mtrr(vcpu, msr, pdata); 1387 return get_msr_mtrr(vcpu, msr, pdata);
1388 case 0xcd: /* fsb frequency */ 1388 case 0xcd: /* fsb frequency */
1389 data = 3; 1389 data = 3;
1390 break; 1390 break;
1391 case MSR_IA32_APICBASE: 1391 case MSR_IA32_APICBASE:
1392 data = kvm_get_apic_base(vcpu); 1392 data = kvm_get_apic_base(vcpu);
1393 break; 1393 break;
1394 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: 1394 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1395 return kvm_x2apic_msr_read(vcpu, msr, pdata); 1395 return kvm_x2apic_msr_read(vcpu, msr, pdata);
1396 break; 1396 break;
1397 case MSR_IA32_MISC_ENABLE: 1397 case MSR_IA32_MISC_ENABLE:
1398 data = vcpu->arch.ia32_misc_enable_msr; 1398 data = vcpu->arch.ia32_misc_enable_msr;
1399 break; 1399 break;
1400 case MSR_IA32_PERF_STATUS: 1400 case MSR_IA32_PERF_STATUS:
1401 /* TSC increment by tick */ 1401 /* TSC increment by tick */
1402 data = 1000ULL; 1402 data = 1000ULL;
1403 /* CPU multiplier */ 1403 /* CPU multiplier */
1404 data |= (((uint64_t)4ULL) << 40); 1404 data |= (((uint64_t)4ULL) << 40);
1405 break; 1405 break;
1406 case MSR_EFER: 1406 case MSR_EFER:
1407 data = vcpu->arch.efer; 1407 data = vcpu->arch.efer;
1408 break; 1408 break;
1409 case MSR_KVM_WALL_CLOCK: 1409 case MSR_KVM_WALL_CLOCK:
1410 data = vcpu->kvm->arch.wall_clock; 1410 data = vcpu->kvm->arch.wall_clock;
1411 break; 1411 break;
1412 case MSR_KVM_SYSTEM_TIME: 1412 case MSR_KVM_SYSTEM_TIME:
1413 data = vcpu->arch.time; 1413 data = vcpu->arch.time;
1414 break; 1414 break;
1415 case MSR_IA32_P5_MC_ADDR: 1415 case MSR_IA32_P5_MC_ADDR:
1416 case MSR_IA32_P5_MC_TYPE: 1416 case MSR_IA32_P5_MC_TYPE:
1417 case MSR_IA32_MCG_CAP: 1417 case MSR_IA32_MCG_CAP:
1418 case MSR_IA32_MCG_CTL: 1418 case MSR_IA32_MCG_CTL:
1419 case MSR_IA32_MCG_STATUS: 1419 case MSR_IA32_MCG_STATUS:
1420 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1420 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1421 return get_msr_mce(vcpu, msr, pdata); 1421 return get_msr_mce(vcpu, msr, pdata);
1422 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: 1422 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1423 if (kvm_hv_msr_partition_wide(msr)) { 1423 if (kvm_hv_msr_partition_wide(msr)) {
1424 int r; 1424 int r;
1425 mutex_lock(&vcpu->kvm->lock); 1425 mutex_lock(&vcpu->kvm->lock);
1426 r = get_msr_hyperv_pw(vcpu, msr, pdata); 1426 r = get_msr_hyperv_pw(vcpu, msr, pdata);
1427 mutex_unlock(&vcpu->kvm->lock); 1427 mutex_unlock(&vcpu->kvm->lock);
1428 return r; 1428 return r;
1429 } else 1429 } else
1430 return get_msr_hyperv(vcpu, msr, pdata); 1430 return get_msr_hyperv(vcpu, msr, pdata);
1431 break; 1431 break;
1432 default: 1432 default:
1433 if (!ignore_msrs) { 1433 if (!ignore_msrs) {
1434 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 1434 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1435 return 1; 1435 return 1;
1436 } else { 1436 } else {
1437 pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr); 1437 pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
1438 data = 0; 1438 data = 0;
1439 } 1439 }
1440 break; 1440 break;
1441 } 1441 }
1442 *pdata = data; 1442 *pdata = data;
1443 return 0; 1443 return 0;
1444 } 1444 }
1445 EXPORT_SYMBOL_GPL(kvm_get_msr_common); 1445 EXPORT_SYMBOL_GPL(kvm_get_msr_common);
1446 1446
1447 /* 1447 /*
1448 * Read or write a bunch of msrs. All parameters are kernel addresses. 1448 * Read or write a bunch of msrs. All parameters are kernel addresses.
1449 * 1449 *
1450 * @return number of msrs set successfully. 1450 * @return number of msrs set successfully.
1451 */ 1451 */
1452 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, 1452 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1453 struct kvm_msr_entry *entries, 1453 struct kvm_msr_entry *entries,
1454 int (*do_msr)(struct kvm_vcpu *vcpu, 1454 int (*do_msr)(struct kvm_vcpu *vcpu,
1455 unsigned index, u64 *data)) 1455 unsigned index, u64 *data))
1456 { 1456 {
1457 int i, idx; 1457 int i, idx;
1458 1458
1459 vcpu_load(vcpu); 1459 vcpu_load(vcpu);
1460 1460
1461 idx = srcu_read_lock(&vcpu->kvm->srcu); 1461 idx = srcu_read_lock(&vcpu->kvm->srcu);
1462 for (i = 0; i < msrs->nmsrs; ++i) 1462 for (i = 0; i < msrs->nmsrs; ++i)
1463 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1463 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1464 break; 1464 break;
1465 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1465 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1466 1466
1467 vcpu_put(vcpu); 1467 vcpu_put(vcpu);
1468 1468
1469 return i; 1469 return i;
1470 } 1470 }
1471 1471
1472 /* 1472 /*
1473 * Read or write a bunch of msrs. Parameters are user addresses. 1473 * Read or write a bunch of msrs. Parameters are user addresses.
1474 * 1474 *
1475 * @return number of msrs set successfully. 1475 * @return number of msrs set successfully.
1476 */ 1476 */
1477 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, 1477 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
1478 int (*do_msr)(struct kvm_vcpu *vcpu, 1478 int (*do_msr)(struct kvm_vcpu *vcpu,
1479 unsigned index, u64 *data), 1479 unsigned index, u64 *data),
1480 int writeback) 1480 int writeback)
1481 { 1481 {
1482 struct kvm_msrs msrs; 1482 struct kvm_msrs msrs;
1483 struct kvm_msr_entry *entries; 1483 struct kvm_msr_entry *entries;
1484 int r, n; 1484 int r, n;
1485 unsigned size; 1485 unsigned size;
1486 1486
1487 r = -EFAULT; 1487 r = -EFAULT;
1488 if (copy_from_user(&msrs, user_msrs, sizeof msrs)) 1488 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
1489 goto out; 1489 goto out;
1490 1490
1491 r = -E2BIG; 1491 r = -E2BIG;
1492 if (msrs.nmsrs >= MAX_IO_MSRS) 1492 if (msrs.nmsrs >= MAX_IO_MSRS)
1493 goto out; 1493 goto out;
1494 1494
1495 r = -ENOMEM; 1495 r = -ENOMEM;
1496 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; 1496 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
1497 entries = vmalloc(size); 1497 entries = vmalloc(size);
1498 if (!entries) 1498 if (!entries)
1499 goto out; 1499 goto out;
1500 1500
1501 r = -EFAULT; 1501 r = -EFAULT;
1502 if (copy_from_user(entries, user_msrs->entries, size)) 1502 if (copy_from_user(entries, user_msrs->entries, size))
1503 goto out_free; 1503 goto out_free;
1504 1504
1505 r = n = __msr_io(vcpu, &msrs, entries, do_msr); 1505 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
1506 if (r < 0) 1506 if (r < 0)
1507 goto out_free; 1507 goto out_free;
1508 1508
1509 r = -EFAULT; 1509 r = -EFAULT;
1510 if (writeback && copy_to_user(user_msrs->entries, entries, size)) 1510 if (writeback && copy_to_user(user_msrs->entries, entries, size))
1511 goto out_free; 1511 goto out_free;
1512 1512
1513 r = n; 1513 r = n;
1514 1514
1515 out_free: 1515 out_free:
1516 vfree(entries); 1516 vfree(entries);
1517 out: 1517 out:
1518 return r; 1518 return r;
1519 } 1519 }
1520 1520
1521 int kvm_dev_ioctl_check_extension(long ext) 1521 int kvm_dev_ioctl_check_extension(long ext)
1522 { 1522 {
1523 int r; 1523 int r;
1524 1524
1525 switch (ext) { 1525 switch (ext) {
1526 case KVM_CAP_IRQCHIP: 1526 case KVM_CAP_IRQCHIP:
1527 case KVM_CAP_HLT: 1527 case KVM_CAP_HLT:
1528 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 1528 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
1529 case KVM_CAP_SET_TSS_ADDR: 1529 case KVM_CAP_SET_TSS_ADDR:
1530 case KVM_CAP_EXT_CPUID: 1530 case KVM_CAP_EXT_CPUID:
1531 case KVM_CAP_CLOCKSOURCE: 1531 case KVM_CAP_CLOCKSOURCE:
1532 case KVM_CAP_PIT: 1532 case KVM_CAP_PIT:
1533 case KVM_CAP_NOP_IO_DELAY: 1533 case KVM_CAP_NOP_IO_DELAY:
1534 case KVM_CAP_MP_STATE: 1534 case KVM_CAP_MP_STATE:
1535 case KVM_CAP_SYNC_MMU: 1535 case KVM_CAP_SYNC_MMU:
1536 case KVM_CAP_REINJECT_CONTROL: 1536 case KVM_CAP_REINJECT_CONTROL:
1537 case KVM_CAP_IRQ_INJECT_STATUS: 1537 case KVM_CAP_IRQ_INJECT_STATUS:
1538 case KVM_CAP_ASSIGN_DEV_IRQ: 1538 case KVM_CAP_ASSIGN_DEV_IRQ:
1539 case KVM_CAP_IRQFD: 1539 case KVM_CAP_IRQFD:
1540 case KVM_CAP_IOEVENTFD: 1540 case KVM_CAP_IOEVENTFD:
1541 case KVM_CAP_PIT2: 1541 case KVM_CAP_PIT2:
1542 case KVM_CAP_PIT_STATE2: 1542 case KVM_CAP_PIT_STATE2:
1543 case KVM_CAP_SET_IDENTITY_MAP_ADDR: 1543 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
1544 case KVM_CAP_XEN_HVM: 1544 case KVM_CAP_XEN_HVM:
1545 case KVM_CAP_ADJUST_CLOCK: 1545 case KVM_CAP_ADJUST_CLOCK:
1546 case KVM_CAP_VCPU_EVENTS: 1546 case KVM_CAP_VCPU_EVENTS:
1547 case KVM_CAP_HYPERV: 1547 case KVM_CAP_HYPERV:
1548 case KVM_CAP_HYPERV_VAPIC: 1548 case KVM_CAP_HYPERV_VAPIC:
1549 case KVM_CAP_HYPERV_SPIN: 1549 case KVM_CAP_HYPERV_SPIN:
1550 case KVM_CAP_PCI_SEGMENT: 1550 case KVM_CAP_PCI_SEGMENT:
1551 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1551 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1552 r = 1; 1552 r = 1;
1553 break; 1553 break;
1554 case KVM_CAP_COALESCED_MMIO: 1554 case KVM_CAP_COALESCED_MMIO:
1555 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 1555 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
1556 break; 1556 break;
1557 case KVM_CAP_VAPIC: 1557 case KVM_CAP_VAPIC:
1558 r = !kvm_x86_ops->cpu_has_accelerated_tpr(); 1558 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
1559 break; 1559 break;
1560 case KVM_CAP_NR_VCPUS: 1560 case KVM_CAP_NR_VCPUS:
1561 r = KVM_MAX_VCPUS; 1561 r = KVM_MAX_VCPUS;
1562 break; 1562 break;
1563 case KVM_CAP_NR_MEMSLOTS: 1563 case KVM_CAP_NR_MEMSLOTS:
1564 r = KVM_MEMORY_SLOTS; 1564 r = KVM_MEMORY_SLOTS;
1565 break; 1565 break;
1566 case KVM_CAP_PV_MMU: /* obsolete */ 1566 case KVM_CAP_PV_MMU: /* obsolete */
1567 r = 0; 1567 r = 0;
1568 break; 1568 break;
1569 case KVM_CAP_IOMMU: 1569 case KVM_CAP_IOMMU:
1570 r = iommu_found(); 1570 r = iommu_found();
1571 break; 1571 break;
1572 case KVM_CAP_MCE: 1572 case KVM_CAP_MCE:
1573 r = KVM_MAX_MCE_BANKS; 1573 r = KVM_MAX_MCE_BANKS;
1574 break; 1574 break;
1575 default: 1575 default:
1576 r = 0; 1576 r = 0;
1577 break; 1577 break;
1578 } 1578 }
1579 return r; 1579 return r;
1580 1580
1581 } 1581 }
1582 1582
1583 long kvm_arch_dev_ioctl(struct file *filp, 1583 long kvm_arch_dev_ioctl(struct file *filp,
1584 unsigned int ioctl, unsigned long arg) 1584 unsigned int ioctl, unsigned long arg)
1585 { 1585 {
1586 void __user *argp = (void __user *)arg; 1586 void __user *argp = (void __user *)arg;
1587 long r; 1587 long r;
1588 1588
1589 switch (ioctl) { 1589 switch (ioctl) {
1590 case KVM_GET_MSR_INDEX_LIST: { 1590 case KVM_GET_MSR_INDEX_LIST: {
1591 struct kvm_msr_list __user *user_msr_list = argp; 1591 struct kvm_msr_list __user *user_msr_list = argp;
1592 struct kvm_msr_list msr_list; 1592 struct kvm_msr_list msr_list;
1593 unsigned n; 1593 unsigned n;
1594 1594
1595 r = -EFAULT; 1595 r = -EFAULT;
1596 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) 1596 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
1597 goto out; 1597 goto out;
1598 n = msr_list.nmsrs; 1598 n = msr_list.nmsrs;
1599 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs); 1599 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
1600 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) 1600 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
1601 goto out; 1601 goto out;
1602 r = -E2BIG; 1602 r = -E2BIG;
1603 if (n < msr_list.nmsrs) 1603 if (n < msr_list.nmsrs)
1604 goto out; 1604 goto out;
1605 r = -EFAULT; 1605 r = -EFAULT;
1606 if (copy_to_user(user_msr_list->indices, &msrs_to_save, 1606 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
1607 num_msrs_to_save * sizeof(u32))) 1607 num_msrs_to_save * sizeof(u32)))
1608 goto out; 1608 goto out;
1609 if (copy_to_user(user_msr_list->indices + num_msrs_to_save, 1609 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
1610 &emulated_msrs, 1610 &emulated_msrs,
1611 ARRAY_SIZE(emulated_msrs) * sizeof(u32))) 1611 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
1612 goto out; 1612 goto out;
1613 r = 0; 1613 r = 0;
1614 break; 1614 break;
1615 } 1615 }
1616 case KVM_GET_SUPPORTED_CPUID: { 1616 case KVM_GET_SUPPORTED_CPUID: {
1617 struct kvm_cpuid2 __user *cpuid_arg = argp; 1617 struct kvm_cpuid2 __user *cpuid_arg = argp;
1618 struct kvm_cpuid2 cpuid; 1618 struct kvm_cpuid2 cpuid;
1619 1619
1620 r = -EFAULT; 1620 r = -EFAULT;
1621 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1621 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1622 goto out; 1622 goto out;
1623 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 1623 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
1624 cpuid_arg->entries); 1624 cpuid_arg->entries);
1625 if (r) 1625 if (r)
1626 goto out; 1626 goto out;
1627 1627
1628 r = -EFAULT; 1628 r = -EFAULT;
1629 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) 1629 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
1630 goto out; 1630 goto out;
1631 r = 0; 1631 r = 0;
1632 break; 1632 break;
1633 } 1633 }
1634 case KVM_X86_GET_MCE_CAP_SUPPORTED: { 1634 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
1635 u64 mce_cap; 1635 u64 mce_cap;
1636 1636
1637 mce_cap = KVM_MCE_CAP_SUPPORTED; 1637 mce_cap = KVM_MCE_CAP_SUPPORTED;
1638 r = -EFAULT; 1638 r = -EFAULT;
1639 if (copy_to_user(argp, &mce_cap, sizeof mce_cap)) 1639 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
1640 goto out; 1640 goto out;
1641 r = 0; 1641 r = 0;
1642 break; 1642 break;
1643 } 1643 }
1644 default: 1644 default:
1645 r = -EINVAL; 1645 r = -EINVAL;
1646 } 1646 }
1647 out: 1647 out:
1648 return r; 1648 return r;
1649 } 1649 }
1650 1650
1651 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1651 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1652 { 1652 {
1653 kvm_x86_ops->vcpu_load(vcpu, cpu); 1653 kvm_x86_ops->vcpu_load(vcpu, cpu);
1654 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { 1654 if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
1655 unsigned long khz = cpufreq_quick_get(cpu); 1655 unsigned long khz = cpufreq_quick_get(cpu);
1656 if (!khz) 1656 if (!khz)
1657 khz = tsc_khz; 1657 khz = tsc_khz;
1658 per_cpu(cpu_tsc_khz, cpu) = khz; 1658 per_cpu(cpu_tsc_khz, cpu) = khz;
1659 } 1659 }
1660 kvm_request_guest_time_update(vcpu); 1660 kvm_request_guest_time_update(vcpu);
1661 } 1661 }
1662 1662
1663 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1663 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1664 { 1664 {
1665 kvm_put_guest_fpu(vcpu); 1665 kvm_put_guest_fpu(vcpu);
1666 kvm_x86_ops->vcpu_put(vcpu); 1666 kvm_x86_ops->vcpu_put(vcpu);
1667 } 1667 }
1668 1668
1669 static int is_efer_nx(void) 1669 static int is_efer_nx(void)
1670 { 1670 {
1671 unsigned long long efer = 0; 1671 unsigned long long efer = 0;
1672 1672
1673 rdmsrl_safe(MSR_EFER, &efer); 1673 rdmsrl_safe(MSR_EFER, &efer);
1674 return efer & EFER_NX; 1674 return efer & EFER_NX;
1675 } 1675 }
1676 1676
1677 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) 1677 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
1678 { 1678 {
1679 int i; 1679 int i;
1680 struct kvm_cpuid_entry2 *e, *entry; 1680 struct kvm_cpuid_entry2 *e, *entry;
1681 1681
1682 entry = NULL; 1682 entry = NULL;
1683 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { 1683 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
1684 e = &vcpu->arch.cpuid_entries[i]; 1684 e = &vcpu->arch.cpuid_entries[i];
1685 if (e->function == 0x80000001) { 1685 if (e->function == 0x80000001) {
1686 entry = e; 1686 entry = e;
1687 break; 1687 break;
1688 } 1688 }
1689 } 1689 }
1690 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { 1690 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
1691 entry->edx &= ~(1 << 20); 1691 entry->edx &= ~(1 << 20);
1692 printk(KERN_INFO "kvm: guest NX capability removed\n"); 1692 printk(KERN_INFO "kvm: guest NX capability removed\n");
1693 } 1693 }
1694 } 1694 }
1695 1695
1696 /* when an old userspace process fills a new kernel module */ 1696 /* when an old userspace process fills a new kernel module */
1697 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, 1697 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1698 struct kvm_cpuid *cpuid, 1698 struct kvm_cpuid *cpuid,
1699 struct kvm_cpuid_entry __user *entries) 1699 struct kvm_cpuid_entry __user *entries)
1700 { 1700 {
1701 int r, i; 1701 int r, i;
1702 struct kvm_cpuid_entry *cpuid_entries; 1702 struct kvm_cpuid_entry *cpuid_entries;
1703 1703
1704 r = -E2BIG; 1704 r = -E2BIG;
1705 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 1705 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1706 goto out; 1706 goto out;
1707 r = -ENOMEM; 1707 r = -ENOMEM;
1708 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); 1708 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
1709 if (!cpuid_entries) 1709 if (!cpuid_entries)
1710 goto out; 1710 goto out;
1711 r = -EFAULT; 1711 r = -EFAULT;
1712 if (copy_from_user(cpuid_entries, entries, 1712 if (copy_from_user(cpuid_entries, entries,
1713 cpuid->nent * sizeof(struct kvm_cpuid_entry))) 1713 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
1714 goto out_free; 1714 goto out_free;
1715 for (i = 0; i < cpuid->nent; i++) { 1715 for (i = 0; i < cpuid->nent; i++) {
1716 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; 1716 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
1717 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; 1717 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
1718 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; 1718 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
1719 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; 1719 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
1720 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx; 1720 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
1721 vcpu->arch.cpuid_entries[i].index = 0; 1721 vcpu->arch.cpuid_entries[i].index = 0;
1722 vcpu->arch.cpuid_entries[i].flags = 0; 1722 vcpu->arch.cpuid_entries[i].flags = 0;
1723 vcpu->arch.cpuid_entries[i].padding[0] = 0; 1723 vcpu->arch.cpuid_entries[i].padding[0] = 0;
1724 vcpu->arch.cpuid_entries[i].padding[1] = 0; 1724 vcpu->arch.cpuid_entries[i].padding[1] = 0;
1725 vcpu->arch.cpuid_entries[i].padding[2] = 0; 1725 vcpu->arch.cpuid_entries[i].padding[2] = 0;
1726 } 1726 }
1727 vcpu->arch.cpuid_nent = cpuid->nent; 1727 vcpu->arch.cpuid_nent = cpuid->nent;
1728 cpuid_fix_nx_cap(vcpu); 1728 cpuid_fix_nx_cap(vcpu);
1729 r = 0; 1729 r = 0;
1730 kvm_apic_set_version(vcpu); 1730 kvm_apic_set_version(vcpu);
1731 kvm_x86_ops->cpuid_update(vcpu); 1731 kvm_x86_ops->cpuid_update(vcpu);
1732 1732
1733 out_free: 1733 out_free:
1734 vfree(cpuid_entries); 1734 vfree(cpuid_entries);
1735 out: 1735 out:
1736 return r; 1736 return r;
1737 } 1737 }
1738 1738
1739 static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, 1739 static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1740 struct kvm_cpuid2 *cpuid, 1740 struct kvm_cpuid2 *cpuid,
1741 struct kvm_cpuid_entry2 __user *entries) 1741 struct kvm_cpuid_entry2 __user *entries)
1742 { 1742 {
1743 int r; 1743 int r;
1744 1744
1745 r = -E2BIG; 1745 r = -E2BIG;
1746 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 1746 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1747 goto out; 1747 goto out;
1748 r = -EFAULT; 1748 r = -EFAULT;
1749 if (copy_from_user(&vcpu->arch.cpuid_entries, entries, 1749 if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
1750 cpuid->nent * sizeof(struct kvm_cpuid_entry2))) 1750 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
1751 goto out; 1751 goto out;
1752 vcpu->arch.cpuid_nent = cpuid->nent; 1752 vcpu->arch.cpuid_nent = cpuid->nent;
1753 kvm_apic_set_version(vcpu); 1753 kvm_apic_set_version(vcpu);
1754 kvm_x86_ops->cpuid_update(vcpu); 1754 kvm_x86_ops->cpuid_update(vcpu);
1755 return 0; 1755 return 0;
1756 1756
1757 out: 1757 out:
1758 return r; 1758 return r;
1759 } 1759 }
1760 1760
1761 static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, 1761 static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1762 struct kvm_cpuid2 *cpuid, 1762 struct kvm_cpuid2 *cpuid,
1763 struct kvm_cpuid_entry2 __user *entries) 1763 struct kvm_cpuid_entry2 __user *entries)
1764 { 1764 {
1765 int r; 1765 int r;
1766 1766
1767 r = -E2BIG; 1767 r = -E2BIG;
1768 if (cpuid->nent < vcpu->arch.cpuid_nent) 1768 if (cpuid->nent < vcpu->arch.cpuid_nent)
1769 goto out; 1769 goto out;
1770 r = -EFAULT; 1770 r = -EFAULT;
1771 if (copy_to_user(entries, &vcpu->arch.cpuid_entries, 1771 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
1772 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) 1772 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
1773 goto out; 1773 goto out;
1774 return 0; 1774 return 0;
1775 1775
1776 out: 1776 out:
1777 cpuid->nent = vcpu->arch.cpuid_nent; 1777 cpuid->nent = vcpu->arch.cpuid_nent;
1778 return r; 1778 return r;
1779 } 1779 }
1780 1780
1781 static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, 1781 static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1782 u32 index) 1782 u32 index)
1783 { 1783 {
1784 entry->function = function; 1784 entry->function = function;
1785 entry->index = index; 1785 entry->index = index;
1786 cpuid_count(entry->function, entry->index, 1786 cpuid_count(entry->function, entry->index,
1787 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); 1787 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
1788 entry->flags = 0; 1788 entry->flags = 0;
1789 } 1789 }
1790 1790
1791 #define F(x) bit(X86_FEATURE_##x) 1791 #define F(x) bit(X86_FEATURE_##x)
1792 1792
1793 static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 1793 static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1794 u32 index, int *nent, int maxnent) 1794 u32 index, int *nent, int maxnent)
1795 { 1795 {
1796 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 1796 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
1797 #ifdef CONFIG_X86_64 1797 #ifdef CONFIG_X86_64
1798 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) 1798 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
1799 ? F(GBPAGES) : 0; 1799 ? F(GBPAGES) : 0;
1800 unsigned f_lm = F(LM); 1800 unsigned f_lm = F(LM);
1801 #else 1801 #else
1802 unsigned f_gbpages = 0; 1802 unsigned f_gbpages = 0;
1803 unsigned f_lm = 0; 1803 unsigned f_lm = 0;
1804 #endif 1804 #endif
1805 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; 1805 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
1806 1806
1807 /* cpuid 1.edx */ 1807 /* cpuid 1.edx */
1808 const u32 kvm_supported_word0_x86_features = 1808 const u32 kvm_supported_word0_x86_features =
1809 F(FPU) | F(VME) | F(DE) | F(PSE) | 1809 F(FPU) | F(VME) | F(DE) | F(PSE) |
1810 F(TSC) | F(MSR) | F(PAE) | F(MCE) | 1810 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
1811 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | 1811 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
1812 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1812 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1813 F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | 1813 F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
1814 0 /* Reserved, DS, ACPI */ | F(MMX) | 1814 0 /* Reserved, DS, ACPI */ | F(MMX) |
1815 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | 1815 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
1816 0 /* HTT, TM, Reserved, PBE */; 1816 0 /* HTT, TM, Reserved, PBE */;
1817 /* cpuid 0x80000001.edx */ 1817 /* cpuid 0x80000001.edx */
1818 const u32 kvm_supported_word1_x86_features = 1818 const u32 kvm_supported_word1_x86_features =
1819 F(FPU) | F(VME) | F(DE) | F(PSE) | 1819 F(FPU) | F(VME) | F(DE) | F(PSE) |
1820 F(TSC) | F(MSR) | F(PAE) | F(MCE) | 1820 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
1821 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | 1821 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
1822 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1822 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1823 F(PAT) | F(PSE36) | 0 /* Reserved */ | 1823 F(PAT) | F(PSE36) | 0 /* Reserved */ |
1824 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | 1824 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
1825 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | 1825 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
1826 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1826 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
1827 /* cpuid 1.ecx */ 1827 /* cpuid 1.ecx */
1828 const u32 kvm_supported_word4_x86_features = 1828 const u32 kvm_supported_word4_x86_features =
1829 F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | 1829 F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ |
1830 0 /* DS-CPL, VMX, SMX, EST */ | 1830 0 /* DS-CPL, VMX, SMX, EST */ |
1831 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | 1831 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
1832 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 1832 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
1833 0 /* Reserved, DCA */ | F(XMM4_1) | 1833 0 /* Reserved, DCA */ | F(XMM4_1) |
1834 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 1834 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
1835 0 /* Reserved, XSAVE, OSXSAVE */; 1835 0 /* Reserved, XSAVE, OSXSAVE */;
1836 /* cpuid 0x80000001.ecx */ 1836 /* cpuid 0x80000001.ecx */
1837 const u32 kvm_supported_word6_x86_features = 1837 const u32 kvm_supported_word6_x86_features =
1838 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | 1838 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
1839 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | 1839 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
1840 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | 1840 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
1841 0 /* SKINIT */ | 0 /* WDT */; 1841 0 /* SKINIT */ | 0 /* WDT */;
1842 1842
1843 /* all calls to cpuid_count() should be made on the same cpu */ 1843 /* all calls to cpuid_count() should be made on the same cpu */
1844 get_cpu(); 1844 get_cpu();
1845 do_cpuid_1_ent(entry, function, index); 1845 do_cpuid_1_ent(entry, function, index);
1846 ++*nent; 1846 ++*nent;
1847 1847
1848 switch (function) { 1848 switch (function) {
1849 case 0: 1849 case 0:
1850 entry->eax = min(entry->eax, (u32)0xb); 1850 entry->eax = min(entry->eax, (u32)0xb);
1851 break; 1851 break;
1852 case 1: 1852 case 1:
1853 entry->edx &= kvm_supported_word0_x86_features; 1853 entry->edx &= kvm_supported_word0_x86_features;
1854 entry->ecx &= kvm_supported_word4_x86_features; 1854 entry->ecx &= kvm_supported_word4_x86_features;
1855 /* we support x2apic emulation even if host does not support 1855 /* we support x2apic emulation even if host does not support
1856 * it since we emulate x2apic in software */ 1856 * it since we emulate x2apic in software */
1857 entry->ecx |= F(X2APIC); 1857 entry->ecx |= F(X2APIC);
1858 break; 1858 break;
1859 /* function 2 entries are STATEFUL. That is, repeated cpuid commands 1859 /* function 2 entries are STATEFUL. That is, repeated cpuid commands
1860 * may return different values. This forces us to get_cpu() before 1860 * may return different values. This forces us to get_cpu() before
1861 * issuing the first command, and also to emulate this annoying behavior 1861 * issuing the first command, and also to emulate this annoying behavior
1862 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ 1862 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
1863 case 2: { 1863 case 2: {
1864 int t, times = entry->eax & 0xff; 1864 int t, times = entry->eax & 0xff;
1865 1865
1866 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1866 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
1867 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; 1867 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
1868 for (t = 1; t < times && *nent < maxnent; ++t) { 1868 for (t = 1; t < times && *nent < maxnent; ++t) {
1869 do_cpuid_1_ent(&entry[t], function, 0); 1869 do_cpuid_1_ent(&entry[t], function, 0);
1870 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; 1870 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
1871 ++*nent; 1871 ++*nent;
1872 } 1872 }
1873 break; 1873 break;
1874 } 1874 }
1875 /* function 4 and 0xb have additional index. */ 1875 /* function 4 and 0xb have additional index. */
1876 case 4: { 1876 case 4: {
1877 int i, cache_type; 1877 int i, cache_type;
1878 1878
1879 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1879 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1880 /* read more entries until cache_type is zero */ 1880 /* read more entries until cache_type is zero */
1881 for (i = 1; *nent < maxnent; ++i) { 1881 for (i = 1; *nent < maxnent; ++i) {
1882 cache_type = entry[i - 1].eax & 0x1f; 1882 cache_type = entry[i - 1].eax & 0x1f;
1883 if (!cache_type) 1883 if (!cache_type)
1884 break; 1884 break;
1885 do_cpuid_1_ent(&entry[i], function, i); 1885 do_cpuid_1_ent(&entry[i], function, i);
1886 entry[i].flags |= 1886 entry[i].flags |=
1887 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1887 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1888 ++*nent; 1888 ++*nent;
1889 } 1889 }
1890 break; 1890 break;
1891 } 1891 }
1892 case 0xb: { 1892 case 0xb: {
1893 int i, level_type; 1893 int i, level_type;
1894 1894
1895 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1895 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1896 /* read more entries until level_type is zero */ 1896 /* read more entries until level_type is zero */
1897 for (i = 1; *nent < maxnent; ++i) { 1897 for (i = 1; *nent < maxnent; ++i) {
1898 level_type = entry[i - 1].ecx & 0xff00; 1898 level_type = entry[i - 1].ecx & 0xff00;
1899 if (!level_type) 1899 if (!level_type)
1900 break; 1900 break;
1901 do_cpuid_1_ent(&entry[i], function, i); 1901 do_cpuid_1_ent(&entry[i], function, i);
1902 entry[i].flags |= 1902 entry[i].flags |=
1903 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 1903 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1904 ++*nent; 1904 ++*nent;
1905 } 1905 }
1906 break; 1906 break;
1907 } 1907 }
1908 case 0x80000000: 1908 case 0x80000000:
1909 entry->eax = min(entry->eax, 0x8000001a); 1909 entry->eax = min(entry->eax, 0x8000001a);
1910 break; 1910 break;
1911 case 0x80000001: 1911 case 0x80000001:
1912 entry->edx &= kvm_supported_word1_x86_features; 1912 entry->edx &= kvm_supported_word1_x86_features;
1913 entry->ecx &= kvm_supported_word6_x86_features; 1913 entry->ecx &= kvm_supported_word6_x86_features;
1914 break; 1914 break;
1915 } 1915 }
1916 put_cpu(); 1916 put_cpu();
1917 } 1917 }
1918 1918
1919 #undef F 1919 #undef F
1920 1920
1921 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 1921 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1922 struct kvm_cpuid_entry2 __user *entries) 1922 struct kvm_cpuid_entry2 __user *entries)
1923 { 1923 {
1924 struct kvm_cpuid_entry2 *cpuid_entries; 1924 struct kvm_cpuid_entry2 *cpuid_entries;
1925 int limit, nent = 0, r = -E2BIG; 1925 int limit, nent = 0, r = -E2BIG;
1926 u32 func; 1926 u32 func;
1927 1927
1928 if (cpuid->nent < 1) 1928 if (cpuid->nent < 1)
1929 goto out; 1929 goto out;
1930 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) 1930 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1931 cpuid->nent = KVM_MAX_CPUID_ENTRIES; 1931 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
1932 r = -ENOMEM; 1932 r = -ENOMEM;
1933 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); 1933 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
1934 if (!cpuid_entries) 1934 if (!cpuid_entries)
1935 goto out; 1935 goto out;
1936 1936
1937 do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent); 1937 do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
1938 limit = cpuid_entries[0].eax; 1938 limit = cpuid_entries[0].eax;
1939 for (func = 1; func <= limit && nent < cpuid->nent; ++func) 1939 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
1940 do_cpuid_ent(&cpuid_entries[nent], func, 0, 1940 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1941 &nent, cpuid->nent); 1941 &nent, cpuid->nent);
1942 r = -E2BIG; 1942 r = -E2BIG;
1943 if (nent >= cpuid->nent) 1943 if (nent >= cpuid->nent)
1944 goto out_free; 1944 goto out_free;
1945 1945
1946 do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent); 1946 do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
1947 limit = cpuid_entries[nent - 1].eax; 1947 limit = cpuid_entries[nent - 1].eax;
1948 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) 1948 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
1949 do_cpuid_ent(&cpuid_entries[nent], func, 0, 1949 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1950 &nent, cpuid->nent); 1950 &nent, cpuid->nent);
1951 r = -E2BIG; 1951 r = -E2BIG;
1952 if (nent >= cpuid->nent) 1952 if (nent >= cpuid->nent)
1953 goto out_free; 1953 goto out_free;
1954 1954
1955 r = -EFAULT; 1955 r = -EFAULT;
1956 if (copy_to_user(entries, cpuid_entries, 1956 if (copy_to_user(entries, cpuid_entries,
1957 nent * sizeof(struct kvm_cpuid_entry2))) 1957 nent * sizeof(struct kvm_cpuid_entry2)))
1958 goto out_free; 1958 goto out_free;
1959 cpuid->nent = nent; 1959 cpuid->nent = nent;
1960 r = 0; 1960 r = 0;
1961 1961
1962 out_free: 1962 out_free:
1963 vfree(cpuid_entries); 1963 vfree(cpuid_entries);
1964 out: 1964 out:
1965 return r; 1965 return r;
1966 } 1966 }
1967 1967
1968 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 1968 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
1969 struct kvm_lapic_state *s) 1969 struct kvm_lapic_state *s)
1970 { 1970 {
1971 vcpu_load(vcpu); 1971 vcpu_load(vcpu);
1972 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); 1972 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
1973 vcpu_put(vcpu); 1973 vcpu_put(vcpu);
1974 1974
1975 return 0; 1975 return 0;
1976 } 1976 }
1977 1977
1978 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, 1978 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
1979 struct kvm_lapic_state *s) 1979 struct kvm_lapic_state *s)
1980 { 1980 {
1981 vcpu_load(vcpu); 1981 vcpu_load(vcpu);
1982 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); 1982 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
1983 kvm_apic_post_state_restore(vcpu); 1983 kvm_apic_post_state_restore(vcpu);
1984 update_cr8_intercept(vcpu); 1984 update_cr8_intercept(vcpu);
1985 vcpu_put(vcpu); 1985 vcpu_put(vcpu);
1986 1986
1987 return 0; 1987 return 0;
1988 } 1988 }
1989 1989
1990 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, 1990 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
1991 struct kvm_interrupt *irq) 1991 struct kvm_interrupt *irq)
1992 { 1992 {
1993 if (irq->irq < 0 || irq->irq >= 256) 1993 if (irq->irq < 0 || irq->irq >= 256)
1994 return -EINVAL; 1994 return -EINVAL;
1995 if (irqchip_in_kernel(vcpu->kvm)) 1995 if (irqchip_in_kernel(vcpu->kvm))
1996 return -ENXIO; 1996 return -ENXIO;
1997 vcpu_load(vcpu); 1997 vcpu_load(vcpu);
1998 1998
1999 kvm_queue_interrupt(vcpu, irq->irq, false); 1999 kvm_queue_interrupt(vcpu, irq->irq, false);
2000 2000
2001 vcpu_put(vcpu); 2001 vcpu_put(vcpu);
2002 2002
2003 return 0; 2003 return 0;
2004 } 2004 }
2005 2005
2006 static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) 2006 static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2007 { 2007 {
2008 vcpu_load(vcpu); 2008 vcpu_load(vcpu);
2009 kvm_inject_nmi(vcpu); 2009 kvm_inject_nmi(vcpu);
2010 vcpu_put(vcpu); 2010 vcpu_put(vcpu);
2011 2011
2012 return 0; 2012 return 0;
2013 } 2013 }
2014 2014
2015 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, 2015 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2016 struct kvm_tpr_access_ctl *tac) 2016 struct kvm_tpr_access_ctl *tac)
2017 { 2017 {
2018 if (tac->flags) 2018 if (tac->flags)
2019 return -EINVAL; 2019 return -EINVAL;
2020 vcpu->arch.tpr_access_reporting = !!tac->enabled; 2020 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2021 return 0; 2021 return 0;
2022 } 2022 }
2023 2023
2024 static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, 2024 static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2025 u64 mcg_cap) 2025 u64 mcg_cap)
2026 { 2026 {
2027 int r; 2027 int r;
2028 unsigned bank_num = mcg_cap & 0xff, bank; 2028 unsigned bank_num = mcg_cap & 0xff, bank;
2029 2029
2030 r = -EINVAL; 2030 r = -EINVAL;
2031 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) 2031 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2032 goto out; 2032 goto out;
2033 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000)) 2033 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2034 goto out; 2034 goto out;
2035 r = 0; 2035 r = 0;
2036 vcpu->arch.mcg_cap = mcg_cap; 2036 vcpu->arch.mcg_cap = mcg_cap;
2037 /* Init IA32_MCG_CTL to all 1s */ 2037 /* Init IA32_MCG_CTL to all 1s */
2038 if (mcg_cap & MCG_CTL_P) 2038 if (mcg_cap & MCG_CTL_P)
2039 vcpu->arch.mcg_ctl = ~(u64)0; 2039 vcpu->arch.mcg_ctl = ~(u64)0;
2040 /* Init IA32_MCi_CTL to all 1s */ 2040 /* Init IA32_MCi_CTL to all 1s */
2041 for (bank = 0; bank < bank_num; bank++) 2041 for (bank = 0; bank < bank_num; bank++)
2042 vcpu->arch.mce_banks[bank*4] = ~(u64)0; 2042 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2043 out: 2043 out:
2044 return r; 2044 return r;
2045 } 2045 }
2046 2046
2047 static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, 2047 static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2048 struct kvm_x86_mce *mce) 2048 struct kvm_x86_mce *mce)
2049 { 2049 {
2050 u64 mcg_cap = vcpu->arch.mcg_cap; 2050 u64 mcg_cap = vcpu->arch.mcg_cap;
2051 unsigned bank_num = mcg_cap & 0xff; 2051 unsigned bank_num = mcg_cap & 0xff;
2052 u64 *banks = vcpu->arch.mce_banks; 2052 u64 *banks = vcpu->arch.mce_banks;
2053 2053
2054 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL)) 2054 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2055 return -EINVAL; 2055 return -EINVAL;
2056 /* 2056 /*
2057 * if IA32_MCG_CTL is not all 1s, the uncorrected error 2057 * if IA32_MCG_CTL is not all 1s, the uncorrected error
2058 * reporting is disabled 2058 * reporting is disabled
2059 */ 2059 */
2060 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) && 2060 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2061 vcpu->arch.mcg_ctl != ~(u64)0) 2061 vcpu->arch.mcg_ctl != ~(u64)0)
2062 return 0; 2062 return 0;
2063 banks += 4 * mce->bank; 2063 banks += 4 * mce->bank;
2064 /* 2064 /*
2065 * if IA32_MCi_CTL is not all 1s, the uncorrected error 2065 * if IA32_MCi_CTL is not all 1s, the uncorrected error
2066 * reporting is disabled for the bank 2066 * reporting is disabled for the bank
2067 */ 2067 */
2068 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0) 2068 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2069 return 0; 2069 return 0;
2070 if (mce->status & MCI_STATUS_UC) { 2070 if (mce->status & MCI_STATUS_UC) {
2071 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || 2071 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2072 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { 2072 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2073 printk(KERN_DEBUG "kvm: set_mce: " 2073 printk(KERN_DEBUG "kvm: set_mce: "
2074 "injects mce exception while " 2074 "injects mce exception while "
2075 "previous one is in progress!\n"); 2075 "previous one is in progress!\n");
2076 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); 2076 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
2077 return 0; 2077 return 0;
2078 } 2078 }
2079 if (banks[1] & MCI_STATUS_VAL) 2079 if (banks[1] & MCI_STATUS_VAL)
2080 mce->status |= MCI_STATUS_OVER; 2080 mce->status |= MCI_STATUS_OVER;
2081 banks[2] = mce->addr; 2081 banks[2] = mce->addr;
2082 banks[3] = mce->misc; 2082 banks[3] = mce->misc;
2083 vcpu->arch.mcg_status = mce->mcg_status; 2083 vcpu->arch.mcg_status = mce->mcg_status;
2084 banks[1] = mce->status; 2084 banks[1] = mce->status;
2085 kvm_queue_exception(vcpu, MC_VECTOR); 2085 kvm_queue_exception(vcpu, MC_VECTOR);
2086 } else if (!(banks[1] & MCI_STATUS_VAL) 2086 } else if (!(banks[1] & MCI_STATUS_VAL)
2087 || !(banks[1] & MCI_STATUS_UC)) { 2087 || !(banks[1] & MCI_STATUS_UC)) {
2088 if (banks[1] & MCI_STATUS_VAL) 2088 if (banks[1] & MCI_STATUS_VAL)
2089 mce->status |= MCI_STATUS_OVER; 2089 mce->status |= MCI_STATUS_OVER;
2090 banks[2] = mce->addr; 2090 banks[2] = mce->addr;
2091 banks[3] = mce->misc; 2091 banks[3] = mce->misc;
2092 banks[1] = mce->status; 2092 banks[1] = mce->status;
2093 } else 2093 } else
2094 banks[1] |= MCI_STATUS_OVER; 2094 banks[1] |= MCI_STATUS_OVER;
2095 return 0; 2095 return 0;
2096 } 2096 }
2097 2097
2098 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, 2098 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2099 struct kvm_vcpu_events *events) 2099 struct kvm_vcpu_events *events)
2100 { 2100 {
2101 vcpu_load(vcpu); 2101 vcpu_load(vcpu);
2102 2102
2103 events->exception.injected = vcpu->arch.exception.pending; 2103 events->exception.injected = vcpu->arch.exception.pending;
2104 events->exception.nr = vcpu->arch.exception.nr; 2104 events->exception.nr = vcpu->arch.exception.nr;
2105 events->exception.has_error_code = vcpu->arch.exception.has_error_code; 2105 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2106 events->exception.error_code = vcpu->arch.exception.error_code; 2106 events->exception.error_code = vcpu->arch.exception.error_code;
2107 2107
2108 events->interrupt.injected = vcpu->arch.interrupt.pending; 2108 events->interrupt.injected = vcpu->arch.interrupt.pending;
2109 events->interrupt.nr = vcpu->arch.interrupt.nr; 2109 events->interrupt.nr = vcpu->arch.interrupt.nr;
2110 events->interrupt.soft = vcpu->arch.interrupt.soft; 2110 events->interrupt.soft = vcpu->arch.interrupt.soft;
2111 2111
2112 events->nmi.injected = vcpu->arch.nmi_injected; 2112 events->nmi.injected = vcpu->arch.nmi_injected;
2113 events->nmi.pending = vcpu->arch.nmi_pending; 2113 events->nmi.pending = vcpu->arch.nmi_pending;
2114 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); 2114 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2115 2115
2116 events->sipi_vector = vcpu->arch.sipi_vector; 2116 events->sipi_vector = vcpu->arch.sipi_vector;
2117 2117
2118 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2118 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2119 | KVM_VCPUEVENT_VALID_SIPI_VECTOR); 2119 | KVM_VCPUEVENT_VALID_SIPI_VECTOR);
2120 2120
2121 vcpu_put(vcpu); 2121 vcpu_put(vcpu);
2122 } 2122 }
2123 2123
2124 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, 2124 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2125 struct kvm_vcpu_events *events) 2125 struct kvm_vcpu_events *events)
2126 { 2126 {
2127 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING 2127 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2128 | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) 2128 | KVM_VCPUEVENT_VALID_SIPI_VECTOR))
2129 return -EINVAL; 2129 return -EINVAL;
2130 2130
2131 vcpu_load(vcpu); 2131 vcpu_load(vcpu);
2132 2132
2133 vcpu->arch.exception.pending = events->exception.injected; 2133 vcpu->arch.exception.pending = events->exception.injected;
2134 vcpu->arch.exception.nr = events->exception.nr; 2134 vcpu->arch.exception.nr = events->exception.nr;
2135 vcpu->arch.exception.has_error_code = events->exception.has_error_code; 2135 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2136 vcpu->arch.exception.error_code = events->exception.error_code; 2136 vcpu->arch.exception.error_code = events->exception.error_code;
2137 2137
2138 vcpu->arch.interrupt.pending = events->interrupt.injected; 2138 vcpu->arch.interrupt.pending = events->interrupt.injected;
2139 vcpu->arch.interrupt.nr = events->interrupt.nr; 2139 vcpu->arch.interrupt.nr = events->interrupt.nr;
2140 vcpu->arch.interrupt.soft = events->interrupt.soft; 2140 vcpu->arch.interrupt.soft = events->interrupt.soft;
2141 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) 2141 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
2142 kvm_pic_clear_isr_ack(vcpu->kvm); 2142 kvm_pic_clear_isr_ack(vcpu->kvm);
2143 2143
2144 vcpu->arch.nmi_injected = events->nmi.injected; 2144 vcpu->arch.nmi_injected = events->nmi.injected;
2145 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) 2145 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2146 vcpu->arch.nmi_pending = events->nmi.pending; 2146 vcpu->arch.nmi_pending = events->nmi.pending;
2147 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); 2147 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2148 2148
2149 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) 2149 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
2150 vcpu->arch.sipi_vector = events->sipi_vector; 2150 vcpu->arch.sipi_vector = events->sipi_vector;
2151 2151
2152 vcpu_put(vcpu); 2152 vcpu_put(vcpu);
2153 2153
2154 return 0; 2154 return 0;
2155 } 2155 }
2156 2156
2157 long kvm_arch_vcpu_ioctl(struct file *filp, 2157 long kvm_arch_vcpu_ioctl(struct file *filp,
2158 unsigned int ioctl, unsigned long arg) 2158 unsigned int ioctl, unsigned long arg)
2159 { 2159 {
2160 struct kvm_vcpu *vcpu = filp->private_data; 2160 struct kvm_vcpu *vcpu = filp->private_data;
2161 void __user *argp = (void __user *)arg; 2161 void __user *argp = (void __user *)arg;
2162 int r; 2162 int r;
2163 struct kvm_lapic_state *lapic = NULL; 2163 struct kvm_lapic_state *lapic = NULL;
2164 2164
2165 switch (ioctl) { 2165 switch (ioctl) {
2166 case KVM_GET_LAPIC: { 2166 case KVM_GET_LAPIC: {
2167 r = -EINVAL; 2167 r = -EINVAL;
2168 if (!vcpu->arch.apic) 2168 if (!vcpu->arch.apic)
2169 goto out; 2169 goto out;
2170 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2170 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2171 2171
2172 r = -ENOMEM; 2172 r = -ENOMEM;
2173 if (!lapic) 2173 if (!lapic)
2174 goto out; 2174 goto out;
2175 r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); 2175 r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
2176 if (r) 2176 if (r)
2177 goto out; 2177 goto out;
2178 r = -EFAULT; 2178 r = -EFAULT;
2179 if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) 2179 if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
2180 goto out; 2180 goto out;
2181 r = 0; 2181 r = 0;
2182 break; 2182 break;
2183 } 2183 }
2184 case KVM_SET_LAPIC: { 2184 case KVM_SET_LAPIC: {
2185 r = -EINVAL; 2185 r = -EINVAL;
2186 if (!vcpu->arch.apic) 2186 if (!vcpu->arch.apic)
2187 goto out; 2187 goto out;
2188 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); 2188 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2189 r = -ENOMEM; 2189 r = -ENOMEM;
2190 if (!lapic) 2190 if (!lapic)
2191 goto out; 2191 goto out;
2192 r = -EFAULT; 2192 r = -EFAULT;
2193 if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) 2193 if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
2194 goto out; 2194 goto out;
2195 r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); 2195 r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
2196 if (r) 2196 if (r)
2197 goto out; 2197 goto out;
2198 r = 0; 2198 r = 0;
2199 break; 2199 break;
2200 } 2200 }
2201 case KVM_INTERRUPT: { 2201 case KVM_INTERRUPT: {
2202 struct kvm_interrupt irq; 2202 struct kvm_interrupt irq;
2203 2203
2204 r = -EFAULT; 2204 r = -EFAULT;
2205 if (copy_from_user(&irq, argp, sizeof irq)) 2205 if (copy_from_user(&irq, argp, sizeof irq))
2206 goto out; 2206 goto out;
2207 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 2207 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
2208 if (r) 2208 if (r)
2209 goto out; 2209 goto out;
2210 r = 0; 2210 r = 0;
2211 break; 2211 break;
2212 } 2212 }
2213 case KVM_NMI: { 2213 case KVM_NMI: {
2214 r = kvm_vcpu_ioctl_nmi(vcpu); 2214 r = kvm_vcpu_ioctl_nmi(vcpu);
2215 if (r) 2215 if (r)
2216 goto out; 2216 goto out;
2217 r = 0; 2217 r = 0;
2218 break; 2218 break;
2219 } 2219 }
2220 case KVM_SET_CPUID: { 2220 case KVM_SET_CPUID: {
2221 struct kvm_cpuid __user *cpuid_arg = argp; 2221 struct kvm_cpuid __user *cpuid_arg = argp;
2222 struct kvm_cpuid cpuid; 2222 struct kvm_cpuid cpuid;
2223 2223
2224 r = -EFAULT; 2224 r = -EFAULT;
2225 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2225 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2226 goto out; 2226 goto out;
2227 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); 2227 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
2228 if (r) 2228 if (r)
2229 goto out; 2229 goto out;
2230 break; 2230 break;
2231 } 2231 }
2232 case KVM_SET_CPUID2: { 2232 case KVM_SET_CPUID2: {
2233 struct kvm_cpuid2 __user *cpuid_arg = argp; 2233 struct kvm_cpuid2 __user *cpuid_arg = argp;
2234 struct kvm_cpuid2 cpuid; 2234 struct kvm_cpuid2 cpuid;
2235 2235
2236 r = -EFAULT; 2236 r = -EFAULT;
2237 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2237 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2238 goto out; 2238 goto out;
2239 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, 2239 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
2240 cpuid_arg->entries); 2240 cpuid_arg->entries);
2241 if (r) 2241 if (r)
2242 goto out; 2242 goto out;
2243 break; 2243 break;
2244 } 2244 }
2245 case KVM_GET_CPUID2: { 2245 case KVM_GET_CPUID2: {
2246 struct kvm_cpuid2 __user *cpuid_arg = argp; 2246 struct kvm_cpuid2 __user *cpuid_arg = argp;
2247 struct kvm_cpuid2 cpuid; 2247 struct kvm_cpuid2 cpuid;
2248 2248
2249 r = -EFAULT; 2249 r = -EFAULT;
2250 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 2250 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2251 goto out; 2251 goto out;
2252 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, 2252 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
2253 cpuid_arg->entries); 2253 cpuid_arg->entries);
2254 if (r) 2254 if (r)
2255 goto out; 2255 goto out;
2256 r = -EFAULT; 2256 r = -EFAULT;
2257 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) 2257 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2258 goto out; 2258 goto out;
2259 r = 0; 2259 r = 0;
2260 break; 2260 break;
2261 } 2261 }
2262 case KVM_GET_MSRS: 2262 case KVM_GET_MSRS:
2263 r = msr_io(vcpu, argp, kvm_get_msr, 1); 2263 r = msr_io(vcpu, argp, kvm_get_msr, 1);
2264 break; 2264 break;
2265 case KVM_SET_MSRS: 2265 case KVM_SET_MSRS:
2266 r = msr_io(vcpu, argp, do_set_msr, 0); 2266 r = msr_io(vcpu, argp, do_set_msr, 0);
2267 break; 2267 break;
2268 case KVM_TPR_ACCESS_REPORTING: { 2268 case KVM_TPR_ACCESS_REPORTING: {
2269 struct kvm_tpr_access_ctl tac; 2269 struct kvm_tpr_access_ctl tac;
2270 2270
2271 r = -EFAULT; 2271 r = -EFAULT;
2272 if (copy_from_user(&tac, argp, sizeof tac)) 2272 if (copy_from_user(&tac, argp, sizeof tac))
2273 goto out; 2273 goto out;
2274 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac); 2274 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
2275 if (r) 2275 if (r)
2276 goto out; 2276 goto out;
2277 r = -EFAULT; 2277 r = -EFAULT;
2278 if (copy_to_user(argp, &tac, sizeof tac)) 2278 if (copy_to_user(argp, &tac, sizeof tac))
2279 goto out; 2279 goto out;
2280 r = 0; 2280 r = 0;
2281 break; 2281 break;
2282 }; 2282 };
2283 case KVM_SET_VAPIC_ADDR: { 2283 case KVM_SET_VAPIC_ADDR: {
2284 struct kvm_vapic_addr va; 2284 struct kvm_vapic_addr va;
2285 2285
2286 r = -EINVAL; 2286 r = -EINVAL;
2287 if (!irqchip_in_kernel(vcpu->kvm)) 2287 if (!irqchip_in_kernel(vcpu->kvm))
2288 goto out; 2288 goto out;
2289 r = -EFAULT; 2289 r = -EFAULT;
2290 if (copy_from_user(&va, argp, sizeof va)) 2290 if (copy_from_user(&va, argp, sizeof va))
2291 goto out; 2291 goto out;
2292 r = 0; 2292 r = 0;
2293 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); 2293 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
2294 break; 2294 break;
2295 } 2295 }
2296 case KVM_X86_SETUP_MCE: { 2296 case KVM_X86_SETUP_MCE: {
2297 u64 mcg_cap; 2297 u64 mcg_cap;
2298 2298
2299 r = -EFAULT; 2299 r = -EFAULT;
2300 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap)) 2300 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
2301 goto out; 2301 goto out;
2302 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap); 2302 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
2303 break; 2303 break;
2304 } 2304 }
2305 case KVM_X86_SET_MCE: { 2305 case KVM_X86_SET_MCE: {
2306 struct kvm_x86_mce mce; 2306 struct kvm_x86_mce mce;
2307 2307
2308 r = -EFAULT; 2308 r = -EFAULT;
2309 if (copy_from_user(&mce, argp, sizeof mce)) 2309 if (copy_from_user(&mce, argp, sizeof mce))
2310 goto out; 2310 goto out;
2311 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2311 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
2312 break; 2312 break;
2313 } 2313 }
2314 case KVM_GET_VCPU_EVENTS: { 2314 case KVM_GET_VCPU_EVENTS: {
2315 struct kvm_vcpu_events events; 2315 struct kvm_vcpu_events events;
2316 2316
2317 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); 2317 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
2318 2318
2319 r = -EFAULT; 2319 r = -EFAULT;
2320 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) 2320 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
2321 break; 2321 break;
2322 r = 0; 2322 r = 0;
2323 break; 2323 break;
2324 } 2324 }
2325 case KVM_SET_VCPU_EVENTS: { 2325 case KVM_SET_VCPU_EVENTS: {
2326 struct kvm_vcpu_events events; 2326 struct kvm_vcpu_events events;
2327 2327
2328 r = -EFAULT; 2328 r = -EFAULT;
2329 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) 2329 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
2330 break; 2330 break;
2331 2331
2332 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); 2332 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
2333 break; 2333 break;
2334 } 2334 }
2335 default: 2335 default:
2336 r = -EINVAL; 2336 r = -EINVAL;
2337 } 2337 }
2338 out: 2338 out:
2339 kfree(lapic); 2339 kfree(lapic);
2340 return r; 2340 return r;
2341 } 2341 }
2342 2342
2343 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) 2343 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
2344 { 2344 {
2345 int ret; 2345 int ret;
2346 2346
2347 if (addr > (unsigned int)(-3 * PAGE_SIZE)) 2347 if (addr > (unsigned int)(-3 * PAGE_SIZE))
2348 return -1; 2348 return -1;
2349 ret = kvm_x86_ops->set_tss_addr(kvm, addr); 2349 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
2350 return ret; 2350 return ret;
2351 } 2351 }
2352 2352
2353 static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, 2353 static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
2354 u64 ident_addr) 2354 u64 ident_addr)
2355 { 2355 {
2356 kvm->arch.ept_identity_map_addr = ident_addr; 2356 kvm->arch.ept_identity_map_addr = ident_addr;
2357 return 0; 2357 return 0;
2358 } 2358 }
2359 2359
2360 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, 2360 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
2361 u32 kvm_nr_mmu_pages) 2361 u32 kvm_nr_mmu_pages)
2362 { 2362 {
2363 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) 2363 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
2364 return -EINVAL; 2364 return -EINVAL;
2365 2365
2366 mutex_lock(&kvm->slots_lock); 2366 mutex_lock(&kvm->slots_lock);
2367 spin_lock(&kvm->mmu_lock); 2367 spin_lock(&kvm->mmu_lock);
2368 2368
2369 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 2369 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
2370 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 2370 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
2371 2371
2372 spin_unlock(&kvm->mmu_lock); 2372 spin_unlock(&kvm->mmu_lock);
2373 mutex_unlock(&kvm->slots_lock); 2373 mutex_unlock(&kvm->slots_lock);
2374 return 0; 2374 return 0;
2375 } 2375 }
2376 2376
2377 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) 2377 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
2378 { 2378 {
2379 return kvm->arch.n_alloc_mmu_pages; 2379 return kvm->arch.n_alloc_mmu_pages;
2380 } 2380 }
2381 2381
2382 gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) 2382 gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2383 { 2383 {
2384 int i; 2384 int i;
2385 struct kvm_mem_alias *alias; 2385 struct kvm_mem_alias *alias;
2386 struct kvm_mem_aliases *aliases; 2386 struct kvm_mem_aliases *aliases;
2387 2387
2388 aliases = rcu_dereference(kvm->arch.aliases); 2388 aliases = rcu_dereference(kvm->arch.aliases);
2389 2389
2390 for (i = 0; i < aliases->naliases; ++i) { 2390 for (i = 0; i < aliases->naliases; ++i) {
2391 alias = &aliases->aliases[i]; 2391 alias = &aliases->aliases[i];
2392 if (alias->flags & KVM_ALIAS_INVALID) 2392 if (alias->flags & KVM_ALIAS_INVALID)
2393 continue; 2393 continue;
2394 if (gfn >= alias->base_gfn 2394 if (gfn >= alias->base_gfn
2395 && gfn < alias->base_gfn + alias->npages) 2395 && gfn < alias->base_gfn + alias->npages)
2396 return alias->target_gfn + gfn - alias->base_gfn; 2396 return alias->target_gfn + gfn - alias->base_gfn;
2397 } 2397 }
2398 return gfn; 2398 return gfn;
2399 } 2399 }
2400 2400
2401 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 2401 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
2402 { 2402 {
2403 int i; 2403 int i;
2404 struct kvm_mem_alias *alias; 2404 struct kvm_mem_alias *alias;
2405 struct kvm_mem_aliases *aliases; 2405 struct kvm_mem_aliases *aliases;
2406 2406
2407 aliases = rcu_dereference(kvm->arch.aliases); 2407 aliases = rcu_dereference(kvm->arch.aliases);
2408 2408
2409 for (i = 0; i < aliases->naliases; ++i) { 2409 for (i = 0; i < aliases->naliases; ++i) {
2410 alias = &aliases->aliases[i]; 2410 alias = &aliases->aliases[i];
2411 if (gfn >= alias->base_gfn 2411 if (gfn >= alias->base_gfn
2412 && gfn < alias->base_gfn + alias->npages) 2412 && gfn < alias->base_gfn + alias->npages)
2413 return alias->target_gfn + gfn - alias->base_gfn; 2413 return alias->target_gfn + gfn - alias->base_gfn;
2414 } 2414 }
2415 return gfn; 2415 return gfn;
2416 } 2416 }
2417 2417
2418 /* 2418 /*
2419 * Set a new alias region. Aliases map a portion of physical memory into 2419 * Set a new alias region. Aliases map a portion of physical memory into
2420 * another portion. This is useful for memory windows, for example the PC 2420 * another portion. This is useful for memory windows, for example the PC
2421 * VGA region. 2421 * VGA region.
2422 */ 2422 */
2423 static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, 2423 static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
2424 struct kvm_memory_alias *alias) 2424 struct kvm_memory_alias *alias)
2425 { 2425 {
2426 int r, n; 2426 int r, n;
2427 struct kvm_mem_alias *p; 2427 struct kvm_mem_alias *p;
2428 struct kvm_mem_aliases *aliases, *old_aliases; 2428 struct kvm_mem_aliases *aliases, *old_aliases;
2429 2429
2430 r = -EINVAL; 2430 r = -EINVAL;
2431 /* General sanity checks */ 2431 /* General sanity checks */
2432 if (alias->memory_size & (PAGE_SIZE - 1)) 2432 if (alias->memory_size & (PAGE_SIZE - 1))
2433 goto out; 2433 goto out;
2434 if (alias->guest_phys_addr & (PAGE_SIZE - 1)) 2434 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
2435 goto out; 2435 goto out;
2436 if (alias->slot >= KVM_ALIAS_SLOTS) 2436 if (alias->slot >= KVM_ALIAS_SLOTS)
2437 goto out; 2437 goto out;
2438 if (alias->guest_phys_addr + alias->memory_size 2438 if (alias->guest_phys_addr + alias->memory_size
2439 < alias->guest_phys_addr) 2439 < alias->guest_phys_addr)
2440 goto out; 2440 goto out;
2441 if (alias->target_phys_addr + alias->memory_size 2441 if (alias->target_phys_addr + alias->memory_size
2442 < alias->target_phys_addr) 2442 < alias->target_phys_addr)
2443 goto out; 2443 goto out;
2444 2444
2445 r = -ENOMEM; 2445 r = -ENOMEM;
2446 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 2446 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2447 if (!aliases) 2447 if (!aliases)
2448 goto out; 2448 goto out;
2449 2449
2450 mutex_lock(&kvm->slots_lock); 2450 mutex_lock(&kvm->slots_lock);
2451 2451
2452 /* invalidate any gfn reference in case of deletion/shrinking */ 2452 /* invalidate any gfn reference in case of deletion/shrinking */
2453 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); 2453 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2454 aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; 2454 aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
2455 old_aliases = kvm->arch.aliases; 2455 old_aliases = kvm->arch.aliases;
2456 rcu_assign_pointer(kvm->arch.aliases, aliases); 2456 rcu_assign_pointer(kvm->arch.aliases, aliases);
2457 synchronize_srcu_expedited(&kvm->srcu); 2457 synchronize_srcu_expedited(&kvm->srcu);
2458 kvm_mmu_zap_all(kvm); 2458 kvm_mmu_zap_all(kvm);
2459 kfree(old_aliases); 2459 kfree(old_aliases);
2460 2460
2461 r = -ENOMEM; 2461 r = -ENOMEM;
2462 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 2462 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2463 if (!aliases) 2463 if (!aliases)
2464 goto out_unlock; 2464 goto out_unlock;
2465 2465
2466 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); 2466 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2467 2467
2468 p = &aliases->aliases[alias->slot]; 2468 p = &aliases->aliases[alias->slot];
2469 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 2469 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
2470 p->npages = alias->memory_size >> PAGE_SHIFT; 2470 p->npages = alias->memory_size >> PAGE_SHIFT;
2471 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; 2471 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
2472 p->flags &= ~(KVM_ALIAS_INVALID); 2472 p->flags &= ~(KVM_ALIAS_INVALID);
2473 2473
2474 for (n = KVM_ALIAS_SLOTS; n > 0; --n) 2474 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
2475 if (aliases->aliases[n - 1].npages) 2475 if (aliases->aliases[n - 1].npages)
2476 break; 2476 break;
2477 aliases->naliases = n; 2477 aliases->naliases = n;
2478 2478
2479 old_aliases = kvm->arch.aliases; 2479 old_aliases = kvm->arch.aliases;
2480 rcu_assign_pointer(kvm->arch.aliases, aliases); 2480 rcu_assign_pointer(kvm->arch.aliases, aliases);
2481 synchronize_srcu_expedited(&kvm->srcu); 2481 synchronize_srcu_expedited(&kvm->srcu);
2482 kfree(old_aliases); 2482 kfree(old_aliases);
2483 r = 0; 2483 r = 0;
2484 2484
2485 out_unlock: 2485 out_unlock:
2486 mutex_unlock(&kvm->slots_lock); 2486 mutex_unlock(&kvm->slots_lock);
2487 out: 2487 out:
2488 return r; 2488 return r;
2489 } 2489 }
2490 2490
2491 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) 2491 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2492 { 2492 {
2493 int r; 2493 int r;
2494 2494
2495 r = 0; 2495 r = 0;
2496 switch (chip->chip_id) { 2496 switch (chip->chip_id) {
2497 case KVM_IRQCHIP_PIC_MASTER: 2497 case KVM_IRQCHIP_PIC_MASTER:
2498 memcpy(&chip->chip.pic, 2498 memcpy(&chip->chip.pic,
2499 &pic_irqchip(kvm)->pics[0], 2499 &pic_irqchip(kvm)->pics[0],
2500 sizeof(struct kvm_pic_state)); 2500 sizeof(struct kvm_pic_state));
2501 break; 2501 break;
2502 case KVM_IRQCHIP_PIC_SLAVE: 2502 case KVM_IRQCHIP_PIC_SLAVE:
2503 memcpy(&chip->chip.pic, 2503 memcpy(&chip->chip.pic,
2504 &pic_irqchip(kvm)->pics[1], 2504 &pic_irqchip(kvm)->pics[1],
2505 sizeof(struct kvm_pic_state)); 2505 sizeof(struct kvm_pic_state));
2506 break; 2506 break;
2507 case KVM_IRQCHIP_IOAPIC: 2507 case KVM_IRQCHIP_IOAPIC:
2508 r = kvm_get_ioapic(kvm, &chip->chip.ioapic); 2508 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
2509 break; 2509 break;
2510 default: 2510 default:
2511 r = -EINVAL; 2511 r = -EINVAL;
2512 break; 2512 break;
2513 } 2513 }
2514 return r; 2514 return r;
2515 } 2515 }
2516 2516
2517 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) 2517 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2518 { 2518 {
2519 int r; 2519 int r;
2520 2520
2521 r = 0; 2521 r = 0;
2522 switch (chip->chip_id) { 2522 switch (chip->chip_id) {
2523 case KVM_IRQCHIP_PIC_MASTER: 2523 case KVM_IRQCHIP_PIC_MASTER:
2524 raw_spin_lock(&pic_irqchip(kvm)->lock); 2524 raw_spin_lock(&pic_irqchip(kvm)->lock);
2525 memcpy(&pic_irqchip(kvm)->pics[0], 2525 memcpy(&pic_irqchip(kvm)->pics[0],
2526 &chip->chip.pic, 2526 &chip->chip.pic,
2527 sizeof(struct kvm_pic_state)); 2527 sizeof(struct kvm_pic_state));
2528 raw_spin_unlock(&pic_irqchip(kvm)->lock); 2528 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2529 break; 2529 break;
2530 case KVM_IRQCHIP_PIC_SLAVE: 2530 case KVM_IRQCHIP_PIC_SLAVE:
2531 raw_spin_lock(&pic_irqchip(kvm)->lock); 2531 raw_spin_lock(&pic_irqchip(kvm)->lock);
2532 memcpy(&pic_irqchip(kvm)->pics[1], 2532 memcpy(&pic_irqchip(kvm)->pics[1],
2533 &chip->chip.pic, 2533 &chip->chip.pic,
2534 sizeof(struct kvm_pic_state)); 2534 sizeof(struct kvm_pic_state));
2535 raw_spin_unlock(&pic_irqchip(kvm)->lock); 2535 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2536 break; 2536 break;
2537 case KVM_IRQCHIP_IOAPIC: 2537 case KVM_IRQCHIP_IOAPIC:
2538 r = kvm_set_ioapic(kvm, &chip->chip.ioapic); 2538 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
2539 break; 2539 break;
2540 default: 2540 default:
2541 r = -EINVAL; 2541 r = -EINVAL;
2542 break; 2542 break;
2543 } 2543 }
2544 kvm_pic_update_irq(pic_irqchip(kvm)); 2544 kvm_pic_update_irq(pic_irqchip(kvm));
2545 return r; 2545 return r;
2546 } 2546 }
2547 2547
2548 static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) 2548 static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
2549 { 2549 {
2550 int r = 0; 2550 int r = 0;
2551 2551
2552 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2552 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2553 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); 2553 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
2554 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2554 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2555 return r; 2555 return r;
2556 } 2556 }
2557 2557
2558 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) 2558 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
2559 { 2559 {
2560 int r = 0; 2560 int r = 0;
2561 2561
2562 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2562 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2563 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); 2563 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
2564 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); 2564 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
2565 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2565 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2566 return r; 2566 return r;
2567 } 2567 }
2568 2568
2569 static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) 2569 static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
2570 { 2570 {
2571 int r = 0; 2571 int r = 0;
2572 2572
2573 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2573 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2574 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels, 2574 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
2575 sizeof(ps->channels)); 2575 sizeof(ps->channels));
2576 ps->flags = kvm->arch.vpit->pit_state.flags; 2576 ps->flags = kvm->arch.vpit->pit_state.flags;
2577 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2577 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2578 return r; 2578 return r;
2579 } 2579 }
2580 2580
2581 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) 2581 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
2582 { 2582 {
2583 int r = 0, start = 0; 2583 int r = 0, start = 0;
2584 u32 prev_legacy, cur_legacy; 2584 u32 prev_legacy, cur_legacy;
2585 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2585 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2586 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; 2586 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
2587 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY; 2587 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
2588 if (!prev_legacy && cur_legacy) 2588 if (!prev_legacy && cur_legacy)
2589 start = 1; 2589 start = 1;
2590 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels, 2590 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
2591 sizeof(kvm->arch.vpit->pit_state.channels)); 2591 sizeof(kvm->arch.vpit->pit_state.channels));
2592 kvm->arch.vpit->pit_state.flags = ps->flags; 2592 kvm->arch.vpit->pit_state.flags = ps->flags;
2593 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); 2593 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
2594 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2594 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2595 return r; 2595 return r;
2596 } 2596 }
2597 2597
2598 static int kvm_vm_ioctl_reinject(struct kvm *kvm, 2598 static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2599 struct kvm_reinject_control *control) 2599 struct kvm_reinject_control *control)
2600 { 2600 {
2601 if (!kvm->arch.vpit) 2601 if (!kvm->arch.vpit)
2602 return -ENXIO; 2602 return -ENXIO;
2603 mutex_lock(&kvm->arch.vpit->pit_state.lock); 2603 mutex_lock(&kvm->arch.vpit->pit_state.lock);
2604 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; 2604 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
2605 mutex_unlock(&kvm->arch.vpit->pit_state.lock); 2605 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
2606 return 0; 2606 return 0;
2607 } 2607 }
2608 2608
2609 /* 2609 /*
2610 * Get (and clear) the dirty memory log for a memory slot. 2610 * Get (and clear) the dirty memory log for a memory slot.
2611 */ 2611 */
2612 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2612 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2613 struct kvm_dirty_log *log) 2613 struct kvm_dirty_log *log)
2614 { 2614 {
2615 int r, n, i; 2615 int r, i;
2616 struct kvm_memory_slot *memslot; 2616 struct kvm_memory_slot *memslot;
2617 unsigned long n;
2617 unsigned long is_dirty = 0; 2618 unsigned long is_dirty = 0;
2618 unsigned long *dirty_bitmap = NULL; 2619 unsigned long *dirty_bitmap = NULL;
2619 2620
2620 mutex_lock(&kvm->slots_lock); 2621 mutex_lock(&kvm->slots_lock);
2621 2622
2622 r = -EINVAL; 2623 r = -EINVAL;
2623 if (log->slot >= KVM_MEMORY_SLOTS) 2624 if (log->slot >= KVM_MEMORY_SLOTS)
2624 goto out; 2625 goto out;
2625 2626
2626 memslot = &kvm->memslots->memslots[log->slot]; 2627 memslot = &kvm->memslots->memslots[log->slot];
2627 r = -ENOENT; 2628 r = -ENOENT;
2628 if (!memslot->dirty_bitmap) 2629 if (!memslot->dirty_bitmap)
2629 goto out; 2630 goto out;
2630 2631
2631 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2632 n = kvm_dirty_bitmap_bytes(memslot);
2632 2633
2633 r = -ENOMEM; 2634 r = -ENOMEM;
2634 dirty_bitmap = vmalloc(n); 2635 dirty_bitmap = vmalloc(n);
2635 if (!dirty_bitmap) 2636 if (!dirty_bitmap)
2636 goto out; 2637 goto out;
2637 memset(dirty_bitmap, 0, n); 2638 memset(dirty_bitmap, 0, n);
2638 2639
2639 for (i = 0; !is_dirty && i < n/sizeof(long); i++) 2640 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
2640 is_dirty = memslot->dirty_bitmap[i]; 2641 is_dirty = memslot->dirty_bitmap[i];
2641 2642
2642 /* If nothing is dirty, don't bother messing with page tables. */ 2643 /* If nothing is dirty, don't bother messing with page tables. */
2643 if (is_dirty) { 2644 if (is_dirty) {
2644 struct kvm_memslots *slots, *old_slots; 2645 struct kvm_memslots *slots, *old_slots;
2645 2646
2646 spin_lock(&kvm->mmu_lock); 2647 spin_lock(&kvm->mmu_lock);
2647 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2648 kvm_mmu_slot_remove_write_access(kvm, log->slot);
2648 spin_unlock(&kvm->mmu_lock); 2649 spin_unlock(&kvm->mmu_lock);
2649 2650
2650 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 2651 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
2651 if (!slots) 2652 if (!slots)
2652 goto out_free; 2653 goto out_free;
2653 2654
2654 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 2655 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
2655 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; 2656 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
2656 2657
2657 old_slots = kvm->memslots; 2658 old_slots = kvm->memslots;
2658 rcu_assign_pointer(kvm->memslots, slots); 2659 rcu_assign_pointer(kvm->memslots, slots);
2659 synchronize_srcu_expedited(&kvm->srcu); 2660 synchronize_srcu_expedited(&kvm->srcu);
2660 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; 2661 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
2661 kfree(old_slots); 2662 kfree(old_slots);
2662 } 2663 }
2663 2664
2664 r = 0; 2665 r = 0;
2665 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) 2666 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
2666 r = -EFAULT; 2667 r = -EFAULT;
2667 out_free: 2668 out_free:
2668 vfree(dirty_bitmap); 2669 vfree(dirty_bitmap);
2669 out: 2670 out:
2670 mutex_unlock(&kvm->slots_lock); 2671 mutex_unlock(&kvm->slots_lock);
2671 return r; 2672 return r;
2672 } 2673 }
2673 2674
2674 long kvm_arch_vm_ioctl(struct file *filp, 2675 long kvm_arch_vm_ioctl(struct file *filp,
2675 unsigned int ioctl, unsigned long arg) 2676 unsigned int ioctl, unsigned long arg)
2676 { 2677 {
2677 struct kvm *kvm = filp->private_data; 2678 struct kvm *kvm = filp->private_data;
2678 void __user *argp = (void __user *)arg; 2679 void __user *argp = (void __user *)arg;
2679 int r = -ENOTTY; 2680 int r = -ENOTTY;
2680 /* 2681 /*
2681 * This union makes it completely explicit to gcc-3.x 2682 * This union makes it completely explicit to gcc-3.x
2682 * that these two variables' stack usage should be 2683 * that these two variables' stack usage should be
2683 * combined, not added together. 2684 * combined, not added together.
2684 */ 2685 */
2685 union { 2686 union {
2686 struct kvm_pit_state ps; 2687 struct kvm_pit_state ps;
2687 struct kvm_pit_state2 ps2; 2688 struct kvm_pit_state2 ps2;
2688 struct kvm_memory_alias alias; 2689 struct kvm_memory_alias alias;
2689 struct kvm_pit_config pit_config; 2690 struct kvm_pit_config pit_config;
2690 } u; 2691 } u;
2691 2692
2692 switch (ioctl) { 2693 switch (ioctl) {
2693 case KVM_SET_TSS_ADDR: 2694 case KVM_SET_TSS_ADDR:
2694 r = kvm_vm_ioctl_set_tss_addr(kvm, arg); 2695 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
2695 if (r < 0) 2696 if (r < 0)
2696 goto out; 2697 goto out;
2697 break; 2698 break;
2698 case KVM_SET_IDENTITY_MAP_ADDR: { 2699 case KVM_SET_IDENTITY_MAP_ADDR: {
2699 u64 ident_addr; 2700 u64 ident_addr;
2700 2701
2701 r = -EFAULT; 2702 r = -EFAULT;
2702 if (copy_from_user(&ident_addr, argp, sizeof ident_addr)) 2703 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
2703 goto out; 2704 goto out;
2704 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); 2705 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
2705 if (r < 0) 2706 if (r < 0)
2706 goto out; 2707 goto out;
2707 break; 2708 break;
2708 } 2709 }
2709 case KVM_SET_MEMORY_REGION: { 2710 case KVM_SET_MEMORY_REGION: {
2710 struct kvm_memory_region kvm_mem; 2711 struct kvm_memory_region kvm_mem;
2711 struct kvm_userspace_memory_region kvm_userspace_mem; 2712 struct kvm_userspace_memory_region kvm_userspace_mem;
2712 2713
2713 r = -EFAULT; 2714 r = -EFAULT;
2714 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) 2715 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
2715 goto out; 2716 goto out;
2716 kvm_userspace_mem.slot = kvm_mem.slot; 2717 kvm_userspace_mem.slot = kvm_mem.slot;
2717 kvm_userspace_mem.flags = kvm_mem.flags; 2718 kvm_userspace_mem.flags = kvm_mem.flags;
2718 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; 2719 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
2719 kvm_userspace_mem.memory_size = kvm_mem.memory_size; 2720 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
2720 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0); 2721 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
2721 if (r) 2722 if (r)
2722 goto out; 2723 goto out;
2723 break; 2724 break;
2724 } 2725 }
2725 case KVM_SET_NR_MMU_PAGES: 2726 case KVM_SET_NR_MMU_PAGES:
2726 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); 2727 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
2727 if (r) 2728 if (r)
2728 goto out; 2729 goto out;
2729 break; 2730 break;
2730 case KVM_GET_NR_MMU_PAGES: 2731 case KVM_GET_NR_MMU_PAGES:
2731 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); 2732 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
2732 break; 2733 break;
2733 case KVM_SET_MEMORY_ALIAS: 2734 case KVM_SET_MEMORY_ALIAS:
2734 r = -EFAULT; 2735 r = -EFAULT;
2735 if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias))) 2736 if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
2736 goto out; 2737 goto out;
2737 r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias); 2738 r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
2738 if (r) 2739 if (r)
2739 goto out; 2740 goto out;
2740 break; 2741 break;
2741 case KVM_CREATE_IRQCHIP: { 2742 case KVM_CREATE_IRQCHIP: {
2742 struct kvm_pic *vpic; 2743 struct kvm_pic *vpic;
2743 2744
2744 mutex_lock(&kvm->lock); 2745 mutex_lock(&kvm->lock);
2745 r = -EEXIST; 2746 r = -EEXIST;
2746 if (kvm->arch.vpic) 2747 if (kvm->arch.vpic)
2747 goto create_irqchip_unlock; 2748 goto create_irqchip_unlock;
2748 r = -ENOMEM; 2749 r = -ENOMEM;
2749 vpic = kvm_create_pic(kvm); 2750 vpic = kvm_create_pic(kvm);
2750 if (vpic) { 2751 if (vpic) {
2751 r = kvm_ioapic_init(kvm); 2752 r = kvm_ioapic_init(kvm);
2752 if (r) { 2753 if (r) {
2753 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, 2754 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
2754 &vpic->dev); 2755 &vpic->dev);
2755 kfree(vpic); 2756 kfree(vpic);
2756 goto create_irqchip_unlock; 2757 goto create_irqchip_unlock;
2757 } 2758 }
2758 } else 2759 } else
2759 goto create_irqchip_unlock; 2760 goto create_irqchip_unlock;
2760 smp_wmb(); 2761 smp_wmb();
2761 kvm->arch.vpic = vpic; 2762 kvm->arch.vpic = vpic;
2762 smp_wmb(); 2763 smp_wmb();
2763 r = kvm_setup_default_irq_routing(kvm); 2764 r = kvm_setup_default_irq_routing(kvm);
2764 if (r) { 2765 if (r) {
2765 mutex_lock(&kvm->irq_lock); 2766 mutex_lock(&kvm->irq_lock);
2766 kvm_ioapic_destroy(kvm); 2767 kvm_ioapic_destroy(kvm);
2767 kvm_destroy_pic(kvm); 2768 kvm_destroy_pic(kvm);
2768 mutex_unlock(&kvm->irq_lock); 2769 mutex_unlock(&kvm->irq_lock);
2769 } 2770 }
2770 create_irqchip_unlock: 2771 create_irqchip_unlock:
2771 mutex_unlock(&kvm->lock); 2772 mutex_unlock(&kvm->lock);
2772 break; 2773 break;
2773 } 2774 }
2774 case KVM_CREATE_PIT: 2775 case KVM_CREATE_PIT:
2775 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; 2776 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
2776 goto create_pit; 2777 goto create_pit;
2777 case KVM_CREATE_PIT2: 2778 case KVM_CREATE_PIT2:
2778 r = -EFAULT; 2779 r = -EFAULT;
2779 if (copy_from_user(&u.pit_config, argp, 2780 if (copy_from_user(&u.pit_config, argp,
2780 sizeof(struct kvm_pit_config))) 2781 sizeof(struct kvm_pit_config)))
2781 goto out; 2782 goto out;
2782 create_pit: 2783 create_pit:
2783 mutex_lock(&kvm->slots_lock); 2784 mutex_lock(&kvm->slots_lock);
2784 r = -EEXIST; 2785 r = -EEXIST;
2785 if (kvm->arch.vpit) 2786 if (kvm->arch.vpit)
2786 goto create_pit_unlock; 2787 goto create_pit_unlock;
2787 r = -ENOMEM; 2788 r = -ENOMEM;
2788 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags); 2789 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
2789 if (kvm->arch.vpit) 2790 if (kvm->arch.vpit)
2790 r = 0; 2791 r = 0;
2791 create_pit_unlock: 2792 create_pit_unlock:
2792 mutex_unlock(&kvm->slots_lock); 2793 mutex_unlock(&kvm->slots_lock);
2793 break; 2794 break;
2794 case KVM_IRQ_LINE_STATUS: 2795 case KVM_IRQ_LINE_STATUS:
2795 case KVM_IRQ_LINE: { 2796 case KVM_IRQ_LINE: {
2796 struct kvm_irq_level irq_event; 2797 struct kvm_irq_level irq_event;
2797 2798
2798 r = -EFAULT; 2799 r = -EFAULT;
2799 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 2800 if (copy_from_user(&irq_event, argp, sizeof irq_event))
2800 goto out; 2801 goto out;
2801 if (irqchip_in_kernel(kvm)) { 2802 if (irqchip_in_kernel(kvm)) {
2802 __s32 status; 2803 __s32 status;
2803 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 2804 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
2804 irq_event.irq, irq_event.level); 2805 irq_event.irq, irq_event.level);
2805 if (ioctl == KVM_IRQ_LINE_STATUS) { 2806 if (ioctl == KVM_IRQ_LINE_STATUS) {
2806 irq_event.status = status; 2807 irq_event.status = status;
2807 if (copy_to_user(argp, &irq_event, 2808 if (copy_to_user(argp, &irq_event,
2808 sizeof irq_event)) 2809 sizeof irq_event))
2809 goto out; 2810 goto out;
2810 } 2811 }
2811 r = 0; 2812 r = 0;
2812 } 2813 }
2813 break; 2814 break;
2814 } 2815 }
2815 case KVM_GET_IRQCHIP: { 2816 case KVM_GET_IRQCHIP: {
2816 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 2817 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
2817 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); 2818 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
2818 2819
2819 r = -ENOMEM; 2820 r = -ENOMEM;
2820 if (!chip) 2821 if (!chip)
2821 goto out; 2822 goto out;
2822 r = -EFAULT; 2823 r = -EFAULT;
2823 if (copy_from_user(chip, argp, sizeof *chip)) 2824 if (copy_from_user(chip, argp, sizeof *chip))
2824 goto get_irqchip_out; 2825 goto get_irqchip_out;
2825 r = -ENXIO; 2826 r = -ENXIO;
2826 if (!irqchip_in_kernel(kvm)) 2827 if (!irqchip_in_kernel(kvm))
2827 goto get_irqchip_out; 2828 goto get_irqchip_out;
2828 r = kvm_vm_ioctl_get_irqchip(kvm, chip); 2829 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
2829 if (r) 2830 if (r)
2830 goto get_irqchip_out; 2831 goto get_irqchip_out;
2831 r = -EFAULT; 2832 r = -EFAULT;
2832 if (copy_to_user(argp, chip, sizeof *chip)) 2833 if (copy_to_user(argp, chip, sizeof *chip))
2833 goto get_irqchip_out; 2834 goto get_irqchip_out;
2834 r = 0; 2835 r = 0;
2835 get_irqchip_out: 2836 get_irqchip_out:
2836 kfree(chip); 2837 kfree(chip);
2837 if (r) 2838 if (r)
2838 goto out; 2839 goto out;
2839 break; 2840 break;
2840 } 2841 }
2841 case KVM_SET_IRQCHIP: { 2842 case KVM_SET_IRQCHIP: {
2842 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ 2843 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
2843 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); 2844 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
2844 2845
2845 r = -ENOMEM; 2846 r = -ENOMEM;
2846 if (!chip) 2847 if (!chip)
2847 goto out; 2848 goto out;
2848 r = -EFAULT; 2849 r = -EFAULT;
2849 if (copy_from_user(chip, argp, sizeof *chip)) 2850 if (copy_from_user(chip, argp, sizeof *chip))
2850 goto set_irqchip_out; 2851 goto set_irqchip_out;
2851 r = -ENXIO; 2852 r = -ENXIO;
2852 if (!irqchip_in_kernel(kvm)) 2853 if (!irqchip_in_kernel(kvm))
2853 goto set_irqchip_out; 2854 goto set_irqchip_out;
2854 r = kvm_vm_ioctl_set_irqchip(kvm, chip); 2855 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
2855 if (r) 2856 if (r)
2856 goto set_irqchip_out; 2857 goto set_irqchip_out;
2857 r = 0; 2858 r = 0;
2858 set_irqchip_out: 2859 set_irqchip_out:
2859 kfree(chip); 2860 kfree(chip);
2860 if (r) 2861 if (r)
2861 goto out; 2862 goto out;
2862 break; 2863 break;
2863 } 2864 }
2864 case KVM_GET_PIT: { 2865 case KVM_GET_PIT: {
2865 r = -EFAULT; 2866 r = -EFAULT;
2866 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state))) 2867 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
2867 goto out; 2868 goto out;
2868 r = -ENXIO; 2869 r = -ENXIO;
2869 if (!kvm->arch.vpit) 2870 if (!kvm->arch.vpit)
2870 goto out; 2871 goto out;
2871 r = kvm_vm_ioctl_get_pit(kvm, &u.ps); 2872 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
2872 if (r) 2873 if (r)
2873 goto out; 2874 goto out;
2874 r = -EFAULT; 2875 r = -EFAULT;
2875 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state))) 2876 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
2876 goto out; 2877 goto out;
2877 r = 0; 2878 r = 0;
2878 break; 2879 break;
2879 } 2880 }
2880 case KVM_SET_PIT: { 2881 case KVM_SET_PIT: {
2881 r = -EFAULT; 2882 r = -EFAULT;
2882 if (copy_from_user(&u.ps, argp, sizeof u.ps)) 2883 if (copy_from_user(&u.ps, argp, sizeof u.ps))
2883 goto out; 2884 goto out;
2884 r = -ENXIO; 2885 r = -ENXIO;
2885 if (!kvm->arch.vpit) 2886 if (!kvm->arch.vpit)
2886 goto out; 2887 goto out;
2887 r = kvm_vm_ioctl_set_pit(kvm, &u.ps); 2888 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
2888 if (r) 2889 if (r)
2889 goto out; 2890 goto out;
2890 r = 0; 2891 r = 0;
2891 break; 2892 break;
2892 } 2893 }
2893 case KVM_GET_PIT2: { 2894 case KVM_GET_PIT2: {
2894 r = -ENXIO; 2895 r = -ENXIO;
2895 if (!kvm->arch.vpit) 2896 if (!kvm->arch.vpit)
2896 goto out; 2897 goto out;
2897 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2); 2898 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
2898 if (r) 2899 if (r)
2899 goto out; 2900 goto out;
2900 r = -EFAULT; 2901 r = -EFAULT;
2901 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2))) 2902 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
2902 goto out; 2903 goto out;
2903 r = 0; 2904 r = 0;
2904 break; 2905 break;
2905 } 2906 }
2906 case KVM_SET_PIT2: { 2907 case KVM_SET_PIT2: {
2907 r = -EFAULT; 2908 r = -EFAULT;
2908 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2))) 2909 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
2909 goto out; 2910 goto out;
2910 r = -ENXIO; 2911 r = -ENXIO;
2911 if (!kvm->arch.vpit) 2912 if (!kvm->arch.vpit)
2912 goto out; 2913 goto out;
2913 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); 2914 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
2914 if (r) 2915 if (r)
2915 goto out; 2916 goto out;
2916 r = 0; 2917 r = 0;
2917 break; 2918 break;
2918 } 2919 }
2919 case KVM_REINJECT_CONTROL: { 2920 case KVM_REINJECT_CONTROL: {
2920 struct kvm_reinject_control control; 2921 struct kvm_reinject_control control;
2921 r = -EFAULT; 2922 r = -EFAULT;
2922 if (copy_from_user(&control, argp, sizeof(control))) 2923 if (copy_from_user(&control, argp, sizeof(control)))
2923 goto out; 2924 goto out;
2924 r = kvm_vm_ioctl_reinject(kvm, &control); 2925 r = kvm_vm_ioctl_reinject(kvm, &control);
2925 if (r) 2926 if (r)
2926 goto out; 2927 goto out;
2927 r = 0; 2928 r = 0;
2928 break; 2929 break;
2929 } 2930 }
2930 case KVM_XEN_HVM_CONFIG: { 2931 case KVM_XEN_HVM_CONFIG: {
2931 r = -EFAULT; 2932 r = -EFAULT;
2932 if (copy_from_user(&kvm->arch.xen_hvm_config, argp, 2933 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
2933 sizeof(struct kvm_xen_hvm_config))) 2934 sizeof(struct kvm_xen_hvm_config)))
2934 goto out; 2935 goto out;
2935 r = -EINVAL; 2936 r = -EINVAL;
2936 if (kvm->arch.xen_hvm_config.flags) 2937 if (kvm->arch.xen_hvm_config.flags)
2937 goto out; 2938 goto out;
2938 r = 0; 2939 r = 0;
2939 break; 2940 break;
2940 } 2941 }
2941 case KVM_SET_CLOCK: { 2942 case KVM_SET_CLOCK: {
2942 struct timespec now; 2943 struct timespec now;
2943 struct kvm_clock_data user_ns; 2944 struct kvm_clock_data user_ns;
2944 u64 now_ns; 2945 u64 now_ns;
2945 s64 delta; 2946 s64 delta;
2946 2947
2947 r = -EFAULT; 2948 r = -EFAULT;
2948 if (copy_from_user(&user_ns, argp, sizeof(user_ns))) 2949 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
2949 goto out; 2950 goto out;
2950 2951
2951 r = -EINVAL; 2952 r = -EINVAL;
2952 if (user_ns.flags) 2953 if (user_ns.flags)
2953 goto out; 2954 goto out;
2954 2955
2955 r = 0; 2956 r = 0;
2956 ktime_get_ts(&now); 2957 ktime_get_ts(&now);
2957 now_ns = timespec_to_ns(&now); 2958 now_ns = timespec_to_ns(&now);
2958 delta = user_ns.clock - now_ns; 2959 delta = user_ns.clock - now_ns;
2959 kvm->arch.kvmclock_offset = delta; 2960 kvm->arch.kvmclock_offset = delta;
2960 break; 2961 break;
2961 } 2962 }
2962 case KVM_GET_CLOCK: { 2963 case KVM_GET_CLOCK: {
2963 struct timespec now; 2964 struct timespec now;
2964 struct kvm_clock_data user_ns; 2965 struct kvm_clock_data user_ns;
2965 u64 now_ns; 2966 u64 now_ns;
2966 2967
2967 ktime_get_ts(&now); 2968 ktime_get_ts(&now);
2968 now_ns = timespec_to_ns(&now); 2969 now_ns = timespec_to_ns(&now);
2969 user_ns.clock = kvm->arch.kvmclock_offset + now_ns; 2970 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
2970 user_ns.flags = 0; 2971 user_ns.flags = 0;
2971 2972
2972 r = -EFAULT; 2973 r = -EFAULT;
2973 if (copy_to_user(argp, &user_ns, sizeof(user_ns))) 2974 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
2974 goto out; 2975 goto out;
2975 r = 0; 2976 r = 0;
2976 break; 2977 break;
2977 } 2978 }
2978 2979
2979 default: 2980 default:
2980 ; 2981 ;
2981 } 2982 }
2982 out: 2983 out:
2983 return r; 2984 return r;
2984 } 2985 }
2985 2986
2986 static void kvm_init_msr_list(void) 2987 static void kvm_init_msr_list(void)
2987 { 2988 {
2988 u32 dummy[2]; 2989 u32 dummy[2];
2989 unsigned i, j; 2990 unsigned i, j;
2990 2991
2991 /* skip the first msrs in the list. KVM-specific */ 2992 /* skip the first msrs in the list. KVM-specific */
2992 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { 2993 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
2993 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) 2994 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
2994 continue; 2995 continue;
2995 if (j < i) 2996 if (j < i)
2996 msrs_to_save[j] = msrs_to_save[i]; 2997 msrs_to_save[j] = msrs_to_save[i];
2997 j++; 2998 j++;
2998 } 2999 }
2999 num_msrs_to_save = j; 3000 num_msrs_to_save = j;
3000 } 3001 }
3001 3002
3002 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, 3003 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3003 const void *v) 3004 const void *v)
3004 { 3005 {
3005 if (vcpu->arch.apic && 3006 if (vcpu->arch.apic &&
3006 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) 3007 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
3007 return 0; 3008 return 0;
3008 3009
3009 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3010 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
3010 } 3011 }
3011 3012
3012 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) 3013 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3013 { 3014 {
3014 if (vcpu->arch.apic && 3015 if (vcpu->arch.apic &&
3015 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) 3016 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
3016 return 0; 3017 return 0;
3017 3018
3018 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3019 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
3019 } 3020 }
3020 3021
3021 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3022 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3022 { 3023 {
3023 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3024 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3024 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); 3025 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3025 } 3026 }
3026 3027
3027 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3028 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3028 { 3029 {
3029 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3030 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3030 access |= PFERR_FETCH_MASK; 3031 access |= PFERR_FETCH_MASK;
3031 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); 3032 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3032 } 3033 }
3033 3034
3034 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3035 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3035 { 3036 {
3036 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3037 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3037 access |= PFERR_WRITE_MASK; 3038 access |= PFERR_WRITE_MASK;
3038 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); 3039 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3039 } 3040 }
3040 3041
3041 /* uses this to access any guest's mapped memory without checking CPL */ 3042 /* uses this to access any guest's mapped memory without checking CPL */
3042 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3043 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3043 { 3044 {
3044 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error); 3045 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
3045 } 3046 }
3046 3047
3047 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, 3048 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3048 struct kvm_vcpu *vcpu, u32 access, 3049 struct kvm_vcpu *vcpu, u32 access,
3049 u32 *error) 3050 u32 *error)
3050 { 3051 {
3051 void *data = val; 3052 void *data = val;
3052 int r = X86EMUL_CONTINUE; 3053 int r = X86EMUL_CONTINUE;
3053 3054
3054 while (bytes) { 3055 while (bytes) {
3055 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error); 3056 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
3056 unsigned offset = addr & (PAGE_SIZE-1); 3057 unsigned offset = addr & (PAGE_SIZE-1);
3057 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); 3058 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3058 int ret; 3059 int ret;
3059 3060
3060 if (gpa == UNMAPPED_GVA) { 3061 if (gpa == UNMAPPED_GVA) {
3061 r = X86EMUL_PROPAGATE_FAULT; 3062 r = X86EMUL_PROPAGATE_FAULT;
3062 goto out; 3063 goto out;
3063 } 3064 }
3064 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); 3065 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3065 if (ret < 0) { 3066 if (ret < 0) {
3066 r = X86EMUL_UNHANDLEABLE; 3067 r = X86EMUL_UNHANDLEABLE;
3067 goto out; 3068 goto out;
3068 } 3069 }
3069 3070
3070 bytes -= toread; 3071 bytes -= toread;
3071 data += toread; 3072 data += toread;
3072 addr += toread; 3073 addr += toread;
3073 } 3074 }
3074 out: 3075 out:
3075 return r; 3076 return r;
3076 } 3077 }
3077 3078
3078 /* used for instruction fetching */ 3079 /* used for instruction fetching */
3079 static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, 3080 static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
3080 struct kvm_vcpu *vcpu, u32 *error) 3081 struct kvm_vcpu *vcpu, u32 *error)
3081 { 3082 {
3082 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3083 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3083 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 3084 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3084 access | PFERR_FETCH_MASK, error); 3085 access | PFERR_FETCH_MASK, error);
3085 } 3086 }
3086 3087
3087 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3088 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
3088 struct kvm_vcpu *vcpu, u32 *error) 3089 struct kvm_vcpu *vcpu, u32 *error)
3089 { 3090 {
3090 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3091 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3091 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, 3092 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3092 error); 3093 error);
3093 } 3094 }
3094 3095
3095 static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, 3096 static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3096 struct kvm_vcpu *vcpu, u32 *error) 3097 struct kvm_vcpu *vcpu, u32 *error)
3097 { 3098 {
3098 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); 3099 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
3099 } 3100 }
3100 3101
3101 static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3102 static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
3102 struct kvm_vcpu *vcpu, u32 *error) 3103 struct kvm_vcpu *vcpu, u32 *error)
3103 { 3104 {
3104 void *data = val; 3105 void *data = val;
3105 int r = X86EMUL_CONTINUE; 3106 int r = X86EMUL_CONTINUE;
3106 3107
3107 while (bytes) { 3108 while (bytes) {
3108 gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); 3109 gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error);
3109 unsigned offset = addr & (PAGE_SIZE-1); 3110 unsigned offset = addr & (PAGE_SIZE-1);
3110 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3111 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3111 int ret; 3112 int ret;
3112 3113
3113 if (gpa == UNMAPPED_GVA) { 3114 if (gpa == UNMAPPED_GVA) {
3114 r = X86EMUL_PROPAGATE_FAULT; 3115 r = X86EMUL_PROPAGATE_FAULT;
3115 goto out; 3116 goto out;
3116 } 3117 }
3117 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); 3118 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
3118 if (ret < 0) { 3119 if (ret < 0) {
3119 r = X86EMUL_UNHANDLEABLE; 3120 r = X86EMUL_UNHANDLEABLE;
3120 goto out; 3121 goto out;
3121 } 3122 }
3122 3123
3123 bytes -= towrite; 3124 bytes -= towrite;
3124 data += towrite; 3125 data += towrite;
3125 addr += towrite; 3126 addr += towrite;
3126 } 3127 }
3127 out: 3128 out:
3128 return r; 3129 return r;
3129 } 3130 }
3130 3131
3131 3132
3132 static int emulator_read_emulated(unsigned long addr, 3133 static int emulator_read_emulated(unsigned long addr,
3133 void *val, 3134 void *val,
3134 unsigned int bytes, 3135 unsigned int bytes,
3135 struct kvm_vcpu *vcpu) 3136 struct kvm_vcpu *vcpu)
3136 { 3137 {
3137 gpa_t gpa; 3138 gpa_t gpa;
3138 u32 error_code; 3139 u32 error_code;
3139 3140
3140 if (vcpu->mmio_read_completed) { 3141 if (vcpu->mmio_read_completed) {
3141 memcpy(val, vcpu->mmio_data, bytes); 3142 memcpy(val, vcpu->mmio_data, bytes);
3142 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, 3143 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
3143 vcpu->mmio_phys_addr, *(u64 *)val); 3144 vcpu->mmio_phys_addr, *(u64 *)val);
3144 vcpu->mmio_read_completed = 0; 3145 vcpu->mmio_read_completed = 0;
3145 return X86EMUL_CONTINUE; 3146 return X86EMUL_CONTINUE;
3146 } 3147 }
3147 3148
3148 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); 3149 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
3149 3150
3150 if (gpa == UNMAPPED_GVA) { 3151 if (gpa == UNMAPPED_GVA) {
3151 kvm_inject_page_fault(vcpu, addr, error_code); 3152 kvm_inject_page_fault(vcpu, addr, error_code);
3152 return X86EMUL_PROPAGATE_FAULT; 3153 return X86EMUL_PROPAGATE_FAULT;
3153 } 3154 }
3154 3155
3155 /* For APIC access vmexit */ 3156 /* For APIC access vmexit */
3156 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3157 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3157 goto mmio; 3158 goto mmio;
3158 3159
3159 if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) 3160 if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
3160 == X86EMUL_CONTINUE) 3161 == X86EMUL_CONTINUE)
3161 return X86EMUL_CONTINUE; 3162 return X86EMUL_CONTINUE;
3162 3163
3163 mmio: 3164 mmio:
3164 /* 3165 /*
3165 * Is this MMIO handled locally? 3166 * Is this MMIO handled locally?
3166 */ 3167 */
3167 if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { 3168 if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
3168 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); 3169 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
3169 return X86EMUL_CONTINUE; 3170 return X86EMUL_CONTINUE;
3170 } 3171 }
3171 3172
3172 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); 3173 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
3173 3174
3174 vcpu->mmio_needed = 1; 3175 vcpu->mmio_needed = 1;
3175 vcpu->mmio_phys_addr = gpa; 3176 vcpu->mmio_phys_addr = gpa;
3176 vcpu->mmio_size = bytes; 3177 vcpu->mmio_size = bytes;
3177 vcpu->mmio_is_write = 0; 3178 vcpu->mmio_is_write = 0;
3178 3179
3179 return X86EMUL_UNHANDLEABLE; 3180 return X86EMUL_UNHANDLEABLE;
3180 } 3181 }
3181 3182
3182 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 3183 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
3183 const void *val, int bytes) 3184 const void *val, int bytes)
3184 { 3185 {
3185 int ret; 3186 int ret;
3186 3187
3187 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); 3188 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
3188 if (ret < 0) 3189 if (ret < 0)
3189 return 0; 3190 return 0;
3190 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); 3191 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
3191 return 1; 3192 return 1;
3192 } 3193 }
3193 3194
3194 static int emulator_write_emulated_onepage(unsigned long addr, 3195 static int emulator_write_emulated_onepage(unsigned long addr,
3195 const void *val, 3196 const void *val,
3196 unsigned int bytes, 3197 unsigned int bytes,
3197 struct kvm_vcpu *vcpu) 3198 struct kvm_vcpu *vcpu)
3198 { 3199 {
3199 gpa_t gpa; 3200 gpa_t gpa;
3200 u32 error_code; 3201 u32 error_code;
3201 3202
3202 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); 3203 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
3203 3204
3204 if (gpa == UNMAPPED_GVA) { 3205 if (gpa == UNMAPPED_GVA) {
3205 kvm_inject_page_fault(vcpu, addr, error_code); 3206 kvm_inject_page_fault(vcpu, addr, error_code);
3206 return X86EMUL_PROPAGATE_FAULT; 3207 return X86EMUL_PROPAGATE_FAULT;
3207 } 3208 }
3208 3209
3209 /* For APIC access vmexit */ 3210 /* For APIC access vmexit */
3210 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3211 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3211 goto mmio; 3212 goto mmio;
3212 3213
3213 if (emulator_write_phys(vcpu, gpa, val, bytes)) 3214 if (emulator_write_phys(vcpu, gpa, val, bytes))
3214 return X86EMUL_CONTINUE; 3215 return X86EMUL_CONTINUE;
3215 3216
3216 mmio: 3217 mmio:
3217 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); 3218 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
3218 /* 3219 /*
3219 * Is this MMIO handled locally? 3220 * Is this MMIO handled locally?
3220 */ 3221 */
3221 if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) 3222 if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
3222 return X86EMUL_CONTINUE; 3223 return X86EMUL_CONTINUE;
3223 3224
3224 vcpu->mmio_needed = 1; 3225 vcpu->mmio_needed = 1;
3225 vcpu->mmio_phys_addr = gpa; 3226 vcpu->mmio_phys_addr = gpa;
3226 vcpu->mmio_size = bytes; 3227 vcpu->mmio_size = bytes;
3227 vcpu->mmio_is_write = 1; 3228 vcpu->mmio_is_write = 1;
3228 memcpy(vcpu->mmio_data, val, bytes); 3229 memcpy(vcpu->mmio_data, val, bytes);
3229 3230
3230 return X86EMUL_CONTINUE; 3231 return X86EMUL_CONTINUE;
3231 } 3232 }
3232 3233
3233 int emulator_write_emulated(unsigned long addr, 3234 int emulator_write_emulated(unsigned long addr,
3234 const void *val, 3235 const void *val,
3235 unsigned int bytes, 3236 unsigned int bytes,
3236 struct kvm_vcpu *vcpu) 3237 struct kvm_vcpu *vcpu)
3237 { 3238 {
3238 /* Crossing a page boundary? */ 3239 /* Crossing a page boundary? */
3239 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 3240 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
3240 int rc, now; 3241 int rc, now;
3241 3242
3242 now = -addr & ~PAGE_MASK; 3243 now = -addr & ~PAGE_MASK;
3243 rc = emulator_write_emulated_onepage(addr, val, now, vcpu); 3244 rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
3244 if (rc != X86EMUL_CONTINUE) 3245 if (rc != X86EMUL_CONTINUE)
3245 return rc; 3246 return rc;
3246 addr += now; 3247 addr += now;
3247 val += now; 3248 val += now;
3248 bytes -= now; 3249 bytes -= now;
3249 } 3250 }
3250 return emulator_write_emulated_onepage(addr, val, bytes, vcpu); 3251 return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
3251 } 3252 }
3252 EXPORT_SYMBOL_GPL(emulator_write_emulated); 3253 EXPORT_SYMBOL_GPL(emulator_write_emulated);
3253 3254
3254 static int emulator_cmpxchg_emulated(unsigned long addr, 3255 static int emulator_cmpxchg_emulated(unsigned long addr,
3255 const void *old, 3256 const void *old,
3256 const void *new, 3257 const void *new,
3257 unsigned int bytes, 3258 unsigned int bytes,
3258 struct kvm_vcpu *vcpu) 3259 struct kvm_vcpu *vcpu)
3259 { 3260 {
3260 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3261 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3261 #ifndef CONFIG_X86_64 3262 #ifndef CONFIG_X86_64
3262 /* guests cmpxchg8b have to be emulated atomically */ 3263 /* guests cmpxchg8b have to be emulated atomically */
3263 if (bytes == 8) { 3264 if (bytes == 8) {
3264 gpa_t gpa; 3265 gpa_t gpa;
3265 struct page *page; 3266 struct page *page;
3266 char *kaddr; 3267 char *kaddr;
3267 u64 val; 3268 u64 val;
3268 3269
3269 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); 3270 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
3270 3271
3271 if (gpa == UNMAPPED_GVA || 3272 if (gpa == UNMAPPED_GVA ||
3272 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3273 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3273 goto emul_write; 3274 goto emul_write;
3274 3275
3275 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) 3276 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
3276 goto emul_write; 3277 goto emul_write;
3277 3278
3278 val = *(u64 *)new; 3279 val = *(u64 *)new;
3279 3280
3280 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3281 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3281 3282
3282 kaddr = kmap_atomic(page, KM_USER0); 3283 kaddr = kmap_atomic(page, KM_USER0);
3283 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 3284 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
3284 kunmap_atomic(kaddr, KM_USER0); 3285 kunmap_atomic(kaddr, KM_USER0);
3285 kvm_release_page_dirty(page); 3286 kvm_release_page_dirty(page);
3286 } 3287 }
3287 emul_write: 3288 emul_write:
3288 #endif 3289 #endif
3289 3290
3290 return emulator_write_emulated(addr, new, bytes, vcpu); 3291 return emulator_write_emulated(addr, new, bytes, vcpu);
3291 } 3292 }
3292 3293
3293 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) 3294 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
3294 { 3295 {
3295 return kvm_x86_ops->get_segment_base(vcpu, seg); 3296 return kvm_x86_ops->get_segment_base(vcpu, seg);
3296 } 3297 }
3297 3298
3298 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) 3299 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
3299 { 3300 {
3300 kvm_mmu_invlpg(vcpu, address); 3301 kvm_mmu_invlpg(vcpu, address);
3301 return X86EMUL_CONTINUE; 3302 return X86EMUL_CONTINUE;
3302 } 3303 }
3303 3304
3304 int emulate_clts(struct kvm_vcpu *vcpu) 3305 int emulate_clts(struct kvm_vcpu *vcpu)
3305 { 3306 {
3306 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); 3307 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
3307 kvm_x86_ops->fpu_activate(vcpu); 3308 kvm_x86_ops->fpu_activate(vcpu);
3308 return X86EMUL_CONTINUE; 3309 return X86EMUL_CONTINUE;
3309 } 3310 }
3310 3311
3311 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3312 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
3312 { 3313 {
3313 return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); 3314 return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest);
3314 } 3315 }
3315 3316
3316 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3317 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
3317 { 3318 {
3318 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3319 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
3319 3320
3320 return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); 3321 return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask);
3321 } 3322 }
3322 3323
3323 void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3324 void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3324 { 3325 {
3325 u8 opcodes[4]; 3326 u8 opcodes[4];
3326 unsigned long rip = kvm_rip_read(vcpu); 3327 unsigned long rip = kvm_rip_read(vcpu);
3327 unsigned long rip_linear; 3328 unsigned long rip_linear;
3328 3329
3329 if (!printk_ratelimit()) 3330 if (!printk_ratelimit())
3330 return; 3331 return;
3331 3332
3332 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 3333 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
3333 3334
3334 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); 3335 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
3335 3336
3336 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 3337 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
3337 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 3338 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
3338 } 3339 }
3339 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3340 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
3340 3341
3341 static struct x86_emulate_ops emulate_ops = { 3342 static struct x86_emulate_ops emulate_ops = {
3342 .read_std = kvm_read_guest_virt_system, 3343 .read_std = kvm_read_guest_virt_system,
3343 .fetch = kvm_fetch_guest_virt, 3344 .fetch = kvm_fetch_guest_virt,
3344 .read_emulated = emulator_read_emulated, 3345 .read_emulated = emulator_read_emulated,
3345 .write_emulated = emulator_write_emulated, 3346 .write_emulated = emulator_write_emulated,
3346 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3347 .cmpxchg_emulated = emulator_cmpxchg_emulated,
3347 }; 3348 };
3348 3349
3349 static void cache_all_regs(struct kvm_vcpu *vcpu) 3350 static void cache_all_regs(struct kvm_vcpu *vcpu)
3350 { 3351 {
3351 kvm_register_read(vcpu, VCPU_REGS_RAX); 3352 kvm_register_read(vcpu, VCPU_REGS_RAX);
3352 kvm_register_read(vcpu, VCPU_REGS_RSP); 3353 kvm_register_read(vcpu, VCPU_REGS_RSP);
3353 kvm_register_read(vcpu, VCPU_REGS_RIP); 3354 kvm_register_read(vcpu, VCPU_REGS_RIP);
3354 vcpu->arch.regs_dirty = ~0; 3355 vcpu->arch.regs_dirty = ~0;
3355 } 3356 }
3356 3357
3357 int emulate_instruction(struct kvm_vcpu *vcpu, 3358 int emulate_instruction(struct kvm_vcpu *vcpu,
3358 unsigned long cr2, 3359 unsigned long cr2,
3359 u16 error_code, 3360 u16 error_code,
3360 int emulation_type) 3361 int emulation_type)
3361 { 3362 {
3362 int r, shadow_mask; 3363 int r, shadow_mask;
3363 struct decode_cache *c; 3364 struct decode_cache *c;
3364 struct kvm_run *run = vcpu->run; 3365 struct kvm_run *run = vcpu->run;
3365 3366
3366 kvm_clear_exception_queue(vcpu); 3367 kvm_clear_exception_queue(vcpu);
3367 vcpu->arch.mmio_fault_cr2 = cr2; 3368 vcpu->arch.mmio_fault_cr2 = cr2;
3368 /* 3369 /*
3369 * TODO: fix emulate.c to use guest_read/write_register 3370 * TODO: fix emulate.c to use guest_read/write_register
3370 * instead of direct ->regs accesses, can save hundred cycles 3371 * instead of direct ->regs accesses, can save hundred cycles
3371 * on Intel for instructions that don't read/change RSP, for 3372 * on Intel for instructions that don't read/change RSP, for
3372 * for example. 3373 * for example.
3373 */ 3374 */
3374 cache_all_regs(vcpu); 3375 cache_all_regs(vcpu);
3375 3376
3376 vcpu->mmio_is_write = 0; 3377 vcpu->mmio_is_write = 0;
3377 vcpu->arch.pio.string = 0; 3378 vcpu->arch.pio.string = 0;
3378 3379
3379 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 3380 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
3380 int cs_db, cs_l; 3381 int cs_db, cs_l;
3381 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3382 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3382 3383
3383 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3384 vcpu->arch.emulate_ctxt.vcpu = vcpu;
3384 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 3385 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
3385 vcpu->arch.emulate_ctxt.mode = 3386 vcpu->arch.emulate_ctxt.mode =
3386 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 3387 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
3387 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3388 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
3388 ? X86EMUL_MODE_VM86 : cs_l 3389 ? X86EMUL_MODE_VM86 : cs_l
3389 ? X86EMUL_MODE_PROT64 : cs_db 3390 ? X86EMUL_MODE_PROT64 : cs_db
3390 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3391 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
3391 3392
3392 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3393 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3393 3394
3394 /* Only allow emulation of specific instructions on #UD 3395 /* Only allow emulation of specific instructions on #UD
3395 * (namely VMMCALL, sysenter, sysexit, syscall)*/ 3396 * (namely VMMCALL, sysenter, sysexit, syscall)*/
3396 c = &vcpu->arch.emulate_ctxt.decode; 3397 c = &vcpu->arch.emulate_ctxt.decode;
3397 if (emulation_type & EMULTYPE_TRAP_UD) { 3398 if (emulation_type & EMULTYPE_TRAP_UD) {
3398 if (!c->twobyte) 3399 if (!c->twobyte)
3399 return EMULATE_FAIL; 3400 return EMULATE_FAIL;
3400 switch (c->b) { 3401 switch (c->b) {
3401 case 0x01: /* VMMCALL */ 3402 case 0x01: /* VMMCALL */
3402 if (c->modrm_mod != 3 || c->modrm_rm != 1) 3403 if (c->modrm_mod != 3 || c->modrm_rm != 1)
3403 return EMULATE_FAIL; 3404 return EMULATE_FAIL;
3404 break; 3405 break;
3405 case 0x34: /* sysenter */ 3406 case 0x34: /* sysenter */
3406 case 0x35: /* sysexit */ 3407 case 0x35: /* sysexit */
3407 if (c->modrm_mod != 0 || c->modrm_rm != 0) 3408 if (c->modrm_mod != 0 || c->modrm_rm != 0)
3408 return EMULATE_FAIL; 3409 return EMULATE_FAIL;
3409 break; 3410 break;
3410 case 0x05: /* syscall */ 3411 case 0x05: /* syscall */
3411 if (c->modrm_mod != 0 || c->modrm_rm != 0) 3412 if (c->modrm_mod != 0 || c->modrm_rm != 0)
3412 return EMULATE_FAIL; 3413 return EMULATE_FAIL;
3413 break; 3414 break;
3414 default: 3415 default:
3415 return EMULATE_FAIL; 3416 return EMULATE_FAIL;
3416 } 3417 }
3417 3418
3418 if (!(c->modrm_reg == 0 || c->modrm_reg == 3)) 3419 if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
3419 return EMULATE_FAIL; 3420 return EMULATE_FAIL;
3420 } 3421 }
3421 3422
3422 ++vcpu->stat.insn_emulation; 3423 ++vcpu->stat.insn_emulation;
3423 if (r) { 3424 if (r) {
3424 ++vcpu->stat.insn_emulation_fail; 3425 ++vcpu->stat.insn_emulation_fail;
3425 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3426 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3426 return EMULATE_DONE; 3427 return EMULATE_DONE;
3427 return EMULATE_FAIL; 3428 return EMULATE_FAIL;
3428 } 3429 }
3429 } 3430 }
3430 3431
3431 if (emulation_type & EMULTYPE_SKIP) { 3432 if (emulation_type & EMULTYPE_SKIP) {
3432 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip); 3433 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
3433 return EMULATE_DONE; 3434 return EMULATE_DONE;
3434 } 3435 }
3435 3436
3436 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3437 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3437 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; 3438 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
3438 3439
3439 if (r == 0) 3440 if (r == 0)
3440 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); 3441 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
3441 3442
3442 if (vcpu->arch.pio.string) 3443 if (vcpu->arch.pio.string)
3443 return EMULATE_DO_MMIO; 3444 return EMULATE_DO_MMIO;
3444 3445
3445 if ((r || vcpu->mmio_is_write) && run) { 3446 if ((r || vcpu->mmio_is_write) && run) {
3446 run->exit_reason = KVM_EXIT_MMIO; 3447 run->exit_reason = KVM_EXIT_MMIO;
3447 run->mmio.phys_addr = vcpu->mmio_phys_addr; 3448 run->mmio.phys_addr = vcpu->mmio_phys_addr;
3448 memcpy(run->mmio.data, vcpu->mmio_data, 8); 3449 memcpy(run->mmio.data, vcpu->mmio_data, 8);
3449 run->mmio.len = vcpu->mmio_size; 3450 run->mmio.len = vcpu->mmio_size;
3450 run->mmio.is_write = vcpu->mmio_is_write; 3451 run->mmio.is_write = vcpu->mmio_is_write;
3451 } 3452 }
3452 3453
3453 if (r) { 3454 if (r) {
3454 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3455 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3455 return EMULATE_DONE; 3456 return EMULATE_DONE;
3456 if (!vcpu->mmio_needed) { 3457 if (!vcpu->mmio_needed) {
3457 kvm_report_emulation_failure(vcpu, "mmio"); 3458 kvm_report_emulation_failure(vcpu, "mmio");
3458 return EMULATE_FAIL; 3459 return EMULATE_FAIL;
3459 } 3460 }
3460 return EMULATE_DO_MMIO; 3461 return EMULATE_DO_MMIO;
3461 } 3462 }
3462 3463
3463 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); 3464 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
3464 3465
3465 if (vcpu->mmio_is_write) { 3466 if (vcpu->mmio_is_write) {
3466 vcpu->mmio_needed = 0; 3467 vcpu->mmio_needed = 0;
3467 return EMULATE_DO_MMIO; 3468 return EMULATE_DO_MMIO;
3468 } 3469 }
3469 3470
3470 return EMULATE_DONE; 3471 return EMULATE_DONE;
3471 } 3472 }
3472 EXPORT_SYMBOL_GPL(emulate_instruction); 3473 EXPORT_SYMBOL_GPL(emulate_instruction);
3473 3474
3474 static int pio_copy_data(struct kvm_vcpu *vcpu) 3475 static int pio_copy_data(struct kvm_vcpu *vcpu)
3475 { 3476 {
3476 void *p = vcpu->arch.pio_data; 3477 void *p = vcpu->arch.pio_data;
3477 gva_t q = vcpu->arch.pio.guest_gva; 3478 gva_t q = vcpu->arch.pio.guest_gva;
3478 unsigned bytes; 3479 unsigned bytes;
3479 int ret; 3480 int ret;
3480 u32 error_code; 3481 u32 error_code;
3481 3482
3482 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 3483 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
3483 if (vcpu->arch.pio.in) 3484 if (vcpu->arch.pio.in)
3484 ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); 3485 ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
3485 else 3486 else
3486 ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); 3487 ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
3487 3488
3488 if (ret == X86EMUL_PROPAGATE_FAULT) 3489 if (ret == X86EMUL_PROPAGATE_FAULT)
3489 kvm_inject_page_fault(vcpu, q, error_code); 3490 kvm_inject_page_fault(vcpu, q, error_code);
3490 3491
3491 return ret; 3492 return ret;
3492 } 3493 }
3493 3494
3494 int complete_pio(struct kvm_vcpu *vcpu) 3495 int complete_pio(struct kvm_vcpu *vcpu)
3495 { 3496 {
3496 struct kvm_pio_request *io = &vcpu->arch.pio; 3497 struct kvm_pio_request *io = &vcpu->arch.pio;
3497 long delta; 3498 long delta;
3498 int r; 3499 int r;
3499 unsigned long val; 3500 unsigned long val;
3500 3501
3501 if (!io->string) { 3502 if (!io->string) {
3502 if (io->in) { 3503 if (io->in) {
3503 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3504 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3504 memcpy(&val, vcpu->arch.pio_data, io->size); 3505 memcpy(&val, vcpu->arch.pio_data, io->size);
3505 kvm_register_write(vcpu, VCPU_REGS_RAX, val); 3506 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
3506 } 3507 }
3507 } else { 3508 } else {
3508 if (io->in) { 3509 if (io->in) {
3509 r = pio_copy_data(vcpu); 3510 r = pio_copy_data(vcpu);
3510 if (r) 3511 if (r)
3511 goto out; 3512 goto out;
3512 } 3513 }
3513 3514
3514 delta = 1; 3515 delta = 1;
3515 if (io->rep) { 3516 if (io->rep) {
3516 delta *= io->cur_count; 3517 delta *= io->cur_count;
3517 /* 3518 /*
3518 * The size of the register should really depend on 3519 * The size of the register should really depend on
3519 * current address size. 3520 * current address size.
3520 */ 3521 */
3521 val = kvm_register_read(vcpu, VCPU_REGS_RCX); 3522 val = kvm_register_read(vcpu, VCPU_REGS_RCX);
3522 val -= delta; 3523 val -= delta;
3523 kvm_register_write(vcpu, VCPU_REGS_RCX, val); 3524 kvm_register_write(vcpu, VCPU_REGS_RCX, val);
3524 } 3525 }
3525 if (io->down) 3526 if (io->down)
3526 delta = -delta; 3527 delta = -delta;
3527 delta *= io->size; 3528 delta *= io->size;
3528 if (io->in) { 3529 if (io->in) {
3529 val = kvm_register_read(vcpu, VCPU_REGS_RDI); 3530 val = kvm_register_read(vcpu, VCPU_REGS_RDI);
3530 val += delta; 3531 val += delta;
3531 kvm_register_write(vcpu, VCPU_REGS_RDI, val); 3532 kvm_register_write(vcpu, VCPU_REGS_RDI, val);
3532 } else { 3533 } else {
3533 val = kvm_register_read(vcpu, VCPU_REGS_RSI); 3534 val = kvm_register_read(vcpu, VCPU_REGS_RSI);
3534 val += delta; 3535 val += delta;
3535 kvm_register_write(vcpu, VCPU_REGS_RSI, val); 3536 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
3536 } 3537 }
3537 } 3538 }
3538 out: 3539 out:
3539 io->count -= io->cur_count; 3540 io->count -= io->cur_count;
3540 io->cur_count = 0; 3541 io->cur_count = 0;
3541 3542
3542 return 0; 3543 return 0;
3543 } 3544 }
3544 3545
3545 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) 3546 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3546 { 3547 {
3547 /* TODO: String I/O for in kernel device */ 3548 /* TODO: String I/O for in kernel device */
3548 int r; 3549 int r;
3549 3550
3550 if (vcpu->arch.pio.in) 3551 if (vcpu->arch.pio.in)
3551 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, 3552 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3552 vcpu->arch.pio.size, pd); 3553 vcpu->arch.pio.size, pd);
3553 else 3554 else
3554 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, 3555 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3555 vcpu->arch.pio.port, vcpu->arch.pio.size, 3556 vcpu->arch.pio.port, vcpu->arch.pio.size,
3556 pd); 3557 pd);
3557 return r; 3558 return r;
3558 } 3559 }
3559 3560
3560 static int pio_string_write(struct kvm_vcpu *vcpu) 3561 static int pio_string_write(struct kvm_vcpu *vcpu)
3561 { 3562 {
3562 struct kvm_pio_request *io = &vcpu->arch.pio; 3563 struct kvm_pio_request *io = &vcpu->arch.pio;
3563 void *pd = vcpu->arch.pio_data; 3564 void *pd = vcpu->arch.pio_data;
3564 int i, r = 0; 3565 int i, r = 0;
3565 3566
3566 for (i = 0; i < io->cur_count; i++) { 3567 for (i = 0; i < io->cur_count; i++) {
3567 if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, 3568 if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3568 io->port, io->size, pd)) { 3569 io->port, io->size, pd)) {
3569 r = -EOPNOTSUPP; 3570 r = -EOPNOTSUPP;
3570 break; 3571 break;
3571 } 3572 }
3572 pd += io->size; 3573 pd += io->size;
3573 } 3574 }
3574 return r; 3575 return r;
3575 } 3576 }
3576 3577
3577 int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) 3578 int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3578 { 3579 {
3579 unsigned long val; 3580 unsigned long val;
3580 3581
3581 trace_kvm_pio(!in, port, size, 1); 3582 trace_kvm_pio(!in, port, size, 1);
3582 3583
3583 vcpu->run->exit_reason = KVM_EXIT_IO; 3584 vcpu->run->exit_reason = KVM_EXIT_IO;
3584 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3585 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3585 vcpu->run->io.size = vcpu->arch.pio.size = size; 3586 vcpu->run->io.size = vcpu->arch.pio.size = size;
3586 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 3587 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3587 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; 3588 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
3588 vcpu->run->io.port = vcpu->arch.pio.port = port; 3589 vcpu->run->io.port = vcpu->arch.pio.port = port;
3589 vcpu->arch.pio.in = in; 3590 vcpu->arch.pio.in = in;
3590 vcpu->arch.pio.string = 0; 3591 vcpu->arch.pio.string = 0;
3591 vcpu->arch.pio.down = 0; 3592 vcpu->arch.pio.down = 0;
3592 vcpu->arch.pio.rep = 0; 3593 vcpu->arch.pio.rep = 0;
3593 3594
3594 if (!vcpu->arch.pio.in) { 3595 if (!vcpu->arch.pio.in) {
3595 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3596 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3596 memcpy(vcpu->arch.pio_data, &val, 4); 3597 memcpy(vcpu->arch.pio_data, &val, 4);
3597 } 3598 }
3598 3599
3599 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3600 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3600 complete_pio(vcpu); 3601 complete_pio(vcpu);
3601 return 1; 3602 return 1;
3602 } 3603 }
3603 return 0; 3604 return 0;
3604 } 3605 }
3605 EXPORT_SYMBOL_GPL(kvm_emulate_pio); 3606 EXPORT_SYMBOL_GPL(kvm_emulate_pio);
3606 3607
3607 int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, 3608 int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3608 int size, unsigned long count, int down, 3609 int size, unsigned long count, int down,
3609 gva_t address, int rep, unsigned port) 3610 gva_t address, int rep, unsigned port)
3610 { 3611 {
3611 unsigned now, in_page; 3612 unsigned now, in_page;
3612 int ret = 0; 3613 int ret = 0;
3613 3614
3614 trace_kvm_pio(!in, port, size, count); 3615 trace_kvm_pio(!in, port, size, count);
3615 3616
3616 vcpu->run->exit_reason = KVM_EXIT_IO; 3617 vcpu->run->exit_reason = KVM_EXIT_IO;
3617 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3618 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3618 vcpu->run->io.size = vcpu->arch.pio.size = size; 3619 vcpu->run->io.size = vcpu->arch.pio.size = size;
3619 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 3620 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3620 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; 3621 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
3621 vcpu->run->io.port = vcpu->arch.pio.port = port; 3622 vcpu->run->io.port = vcpu->arch.pio.port = port;
3622 vcpu->arch.pio.in = in; 3623 vcpu->arch.pio.in = in;
3623 vcpu->arch.pio.string = 1; 3624 vcpu->arch.pio.string = 1;
3624 vcpu->arch.pio.down = down; 3625 vcpu->arch.pio.down = down;
3625 vcpu->arch.pio.rep = rep; 3626 vcpu->arch.pio.rep = rep;
3626 3627
3627 if (!count) { 3628 if (!count) {
3628 kvm_x86_ops->skip_emulated_instruction(vcpu); 3629 kvm_x86_ops->skip_emulated_instruction(vcpu);
3629 return 1; 3630 return 1;
3630 } 3631 }
3631 3632
3632 if (!down) 3633 if (!down)
3633 in_page = PAGE_SIZE - offset_in_page(address); 3634 in_page = PAGE_SIZE - offset_in_page(address);
3634 else 3635 else
3635 in_page = offset_in_page(address) + size; 3636 in_page = offset_in_page(address) + size;
3636 now = min(count, (unsigned long)in_page / size); 3637 now = min(count, (unsigned long)in_page / size);
3637 if (!now) 3638 if (!now)
3638 now = 1; 3639 now = 1;
3639 if (down) { 3640 if (down) {
3640 /* 3641 /*
3641 * String I/O in reverse. Yuck. Kill the guest, fix later. 3642 * String I/O in reverse. Yuck. Kill the guest, fix later.
3642 */ 3643 */
3643 pr_unimpl(vcpu, "guest string pio down\n"); 3644 pr_unimpl(vcpu, "guest string pio down\n");
3644 kvm_inject_gp(vcpu, 0); 3645 kvm_inject_gp(vcpu, 0);
3645 return 1; 3646 return 1;
3646 } 3647 }
3647 vcpu->run->io.count = now; 3648 vcpu->run->io.count = now;
3648 vcpu->arch.pio.cur_count = now; 3649 vcpu->arch.pio.cur_count = now;
3649 3650
3650 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) 3651 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
3651 kvm_x86_ops->skip_emulated_instruction(vcpu); 3652 kvm_x86_ops->skip_emulated_instruction(vcpu);
3652 3653
3653 vcpu->arch.pio.guest_gva = address; 3654 vcpu->arch.pio.guest_gva = address;
3654 3655
3655 if (!vcpu->arch.pio.in) { 3656 if (!vcpu->arch.pio.in) {
3656 /* string PIO write */ 3657 /* string PIO write */
3657 ret = pio_copy_data(vcpu); 3658 ret = pio_copy_data(vcpu);
3658 if (ret == X86EMUL_PROPAGATE_FAULT) 3659 if (ret == X86EMUL_PROPAGATE_FAULT)
3659 return 1; 3660 return 1;
3660 if (ret == 0 && !pio_string_write(vcpu)) { 3661 if (ret == 0 && !pio_string_write(vcpu)) {
3661 complete_pio(vcpu); 3662 complete_pio(vcpu);
3662 if (vcpu->arch.pio.count == 0) 3663 if (vcpu->arch.pio.count == 0)
3663 ret = 1; 3664 ret = 1;
3664 } 3665 }
3665 } 3666 }
3666 /* no string PIO read support yet */ 3667 /* no string PIO read support yet */
3667 3668
3668 return ret; 3669 return ret;
3669 } 3670 }
3670 EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); 3671 EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
3671 3672
3672 static void bounce_off(void *info) 3673 static void bounce_off(void *info)
3673 { 3674 {
3674 /* nothing */ 3675 /* nothing */
3675 } 3676 }
3676 3677
3677 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 3678 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
3678 void *data) 3679 void *data)
3679 { 3680 {
3680 struct cpufreq_freqs *freq = data; 3681 struct cpufreq_freqs *freq = data;
3681 struct kvm *kvm; 3682 struct kvm *kvm;
3682 struct kvm_vcpu *vcpu; 3683 struct kvm_vcpu *vcpu;
3683 int i, send_ipi = 0; 3684 int i, send_ipi = 0;
3684 3685
3685 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) 3686 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
3686 return 0; 3687 return 0;
3687 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) 3688 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
3688 return 0; 3689 return 0;
3689 per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; 3690 per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
3690 3691
3691 spin_lock(&kvm_lock); 3692 spin_lock(&kvm_lock);
3692 list_for_each_entry(kvm, &vm_list, vm_list) { 3693 list_for_each_entry(kvm, &vm_list, vm_list) {
3693 kvm_for_each_vcpu(i, vcpu, kvm) { 3694 kvm_for_each_vcpu(i, vcpu, kvm) {
3694 if (vcpu->cpu != freq->cpu) 3695 if (vcpu->cpu != freq->cpu)
3695 continue; 3696 continue;
3696 if (!kvm_request_guest_time_update(vcpu)) 3697 if (!kvm_request_guest_time_update(vcpu))
3697 continue; 3698 continue;
3698 if (vcpu->cpu != smp_processor_id()) 3699 if (vcpu->cpu != smp_processor_id())
3699 send_ipi++; 3700 send_ipi++;
3700 } 3701 }
3701 } 3702 }
3702 spin_unlock(&kvm_lock); 3703 spin_unlock(&kvm_lock);
3703 3704
3704 if (freq->old < freq->new && send_ipi) { 3705 if (freq->old < freq->new && send_ipi) {
3705 /* 3706 /*
3706 * We upscale the frequency. Must make the guest 3707 * We upscale the frequency. Must make the guest
3707 * doesn't see old kvmclock values while running with 3708 * doesn't see old kvmclock values while running with
3708 * the new frequency, otherwise we risk the guest sees 3709 * the new frequency, otherwise we risk the guest sees
3709 * time go backwards. 3710 * time go backwards.
3710 * 3711 *
3711 * In case we update the frequency for another cpu 3712 * In case we update the frequency for another cpu
3712 * (which might be in guest context) send an interrupt 3713 * (which might be in guest context) send an interrupt
3713 * to kick the cpu out of guest context. Next time 3714 * to kick the cpu out of guest context. Next time
3714 * guest context is entered kvmclock will be updated, 3715 * guest context is entered kvmclock will be updated,
3715 * so the guest will not see stale values. 3716 * so the guest will not see stale values.
3716 */ 3717 */
3717 smp_call_function_single(freq->cpu, bounce_off, NULL, 1); 3718 smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
3718 } 3719 }
3719 return 0; 3720 return 0;
3720 } 3721 }
3721 3722
3722 static struct notifier_block kvmclock_cpufreq_notifier_block = { 3723 static struct notifier_block kvmclock_cpufreq_notifier_block = {
3723 .notifier_call = kvmclock_cpufreq_notifier 3724 .notifier_call = kvmclock_cpufreq_notifier
3724 }; 3725 };
3725 3726
3726 static void kvm_timer_init(void) 3727 static void kvm_timer_init(void)
3727 { 3728 {
3728 int cpu; 3729 int cpu;
3729 3730
3730 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { 3731 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
3731 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 3732 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
3732 CPUFREQ_TRANSITION_NOTIFIER); 3733 CPUFREQ_TRANSITION_NOTIFIER);
3733 for_each_online_cpu(cpu) { 3734 for_each_online_cpu(cpu) {
3734 unsigned long khz = cpufreq_get(cpu); 3735 unsigned long khz = cpufreq_get(cpu);
3735 if (!khz) 3736 if (!khz)
3736 khz = tsc_khz; 3737 khz = tsc_khz;
3737 per_cpu(cpu_tsc_khz, cpu) = khz; 3738 per_cpu(cpu_tsc_khz, cpu) = khz;
3738 } 3739 }
3739 } else { 3740 } else {
3740 for_each_possible_cpu(cpu) 3741 for_each_possible_cpu(cpu)
3741 per_cpu(cpu_tsc_khz, cpu) = tsc_khz; 3742 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
3742 } 3743 }
3743 } 3744 }
3744 3745
3745 int kvm_arch_init(void *opaque) 3746 int kvm_arch_init(void *opaque)
3746 { 3747 {
3747 int r; 3748 int r;
3748 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; 3749 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
3749 3750
3750 if (kvm_x86_ops) { 3751 if (kvm_x86_ops) {
3751 printk(KERN_ERR "kvm: already loaded the other module\n"); 3752 printk(KERN_ERR "kvm: already loaded the other module\n");
3752 r = -EEXIST; 3753 r = -EEXIST;
3753 goto out; 3754 goto out;
3754 } 3755 }
3755 3756
3756 if (!ops->cpu_has_kvm_support()) { 3757 if (!ops->cpu_has_kvm_support()) {
3757 printk(KERN_ERR "kvm: no hardware support\n"); 3758 printk(KERN_ERR "kvm: no hardware support\n");
3758 r = -EOPNOTSUPP; 3759 r = -EOPNOTSUPP;
3759 goto out; 3760 goto out;
3760 } 3761 }
3761 if (ops->disabled_by_bios()) { 3762 if (ops->disabled_by_bios()) {
3762 printk(KERN_ERR "kvm: disabled by bios\n"); 3763 printk(KERN_ERR "kvm: disabled by bios\n");
3763 r = -EOPNOTSUPP; 3764 r = -EOPNOTSUPP;
3764 goto out; 3765 goto out;
3765 } 3766 }
3766 3767
3767 r = kvm_mmu_module_init(); 3768 r = kvm_mmu_module_init();
3768 if (r) 3769 if (r)
3769 goto out; 3770 goto out;
3770 3771
3771 kvm_init_msr_list(); 3772 kvm_init_msr_list();
3772 3773
3773 kvm_x86_ops = ops; 3774 kvm_x86_ops = ops;
3774 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 3775 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
3775 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 3776 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
3776 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 3777 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
3777 PT_DIRTY_MASK, PT64_NX_MASK, 0); 3778 PT_DIRTY_MASK, PT64_NX_MASK, 0);
3778 3779
3779 kvm_timer_init(); 3780 kvm_timer_init();
3780 3781
3781 return 0; 3782 return 0;
3782 3783
3783 out: 3784 out:
3784 return r; 3785 return r;
3785 } 3786 }
3786 3787
3787 void kvm_arch_exit(void) 3788 void kvm_arch_exit(void)
3788 { 3789 {
3789 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 3790 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
3790 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, 3791 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
3791 CPUFREQ_TRANSITION_NOTIFIER); 3792 CPUFREQ_TRANSITION_NOTIFIER);
3792 kvm_x86_ops = NULL; 3793 kvm_x86_ops = NULL;
3793 kvm_mmu_module_exit(); 3794 kvm_mmu_module_exit();
3794 } 3795 }
3795 3796
3796 int kvm_emulate_halt(struct kvm_vcpu *vcpu) 3797 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
3797 { 3798 {
3798 ++vcpu->stat.halt_exits; 3799 ++vcpu->stat.halt_exits;
3799 if (irqchip_in_kernel(vcpu->kvm)) { 3800 if (irqchip_in_kernel(vcpu->kvm)) {
3800 vcpu->arch.mp_state = KVM_MP_STATE_HALTED; 3801 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
3801 return 1; 3802 return 1;
3802 } else { 3803 } else {
3803 vcpu->run->exit_reason = KVM_EXIT_HLT; 3804 vcpu->run->exit_reason = KVM_EXIT_HLT;
3804 return 0; 3805 return 0;
3805 } 3806 }
3806 } 3807 }
3807 EXPORT_SYMBOL_GPL(kvm_emulate_halt); 3808 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
3808 3809
3809 static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, 3810 static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
3810 unsigned long a1) 3811 unsigned long a1)
3811 { 3812 {
3812 if (is_long_mode(vcpu)) 3813 if (is_long_mode(vcpu))
3813 return a0; 3814 return a0;
3814 else 3815 else
3815 return a0 | ((gpa_t)a1 << 32); 3816 return a0 | ((gpa_t)a1 << 32);
3816 } 3817 }
3817 3818
3818 int kvm_hv_hypercall(struct kvm_vcpu *vcpu) 3819 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
3819 { 3820 {
3820 u64 param, ingpa, outgpa, ret; 3821 u64 param, ingpa, outgpa, ret;
3821 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; 3822 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
3822 bool fast, longmode; 3823 bool fast, longmode;
3823 int cs_db, cs_l; 3824 int cs_db, cs_l;
3824 3825
3825 /* 3826 /*
3826 * hypercall generates UD from non zero cpl and real mode 3827 * hypercall generates UD from non zero cpl and real mode
3827 * per HYPER-V spec 3828 * per HYPER-V spec
3828 */ 3829 */
3829 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { 3830 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
3830 kvm_queue_exception(vcpu, UD_VECTOR); 3831 kvm_queue_exception(vcpu, UD_VECTOR);
3831 return 0; 3832 return 0;
3832 } 3833 }
3833 3834
3834 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3835 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3835 longmode = is_long_mode(vcpu) && cs_l == 1; 3836 longmode = is_long_mode(vcpu) && cs_l == 1;
3836 3837
3837 if (!longmode) { 3838 if (!longmode) {
3838 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | 3839 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
3839 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); 3840 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
3840 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | 3841 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
3841 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); 3842 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
3842 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | 3843 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
3843 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); 3844 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
3844 } 3845 }
3845 #ifdef CONFIG_X86_64 3846 #ifdef CONFIG_X86_64
3846 else { 3847 else {
3847 param = kvm_register_read(vcpu, VCPU_REGS_RCX); 3848 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
3848 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); 3849 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
3849 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); 3850 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
3850 } 3851 }
3851 #endif 3852 #endif
3852 3853
3853 code = param & 0xffff; 3854 code = param & 0xffff;
3854 fast = (param >> 16) & 0x1; 3855 fast = (param >> 16) & 0x1;
3855 rep_cnt = (param >> 32) & 0xfff; 3856 rep_cnt = (param >> 32) & 0xfff;
3856 rep_idx = (param >> 48) & 0xfff; 3857 rep_idx = (param >> 48) & 0xfff;
3857 3858
3858 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); 3859 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
3859 3860
3860 switch (code) { 3861 switch (code) {
3861 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: 3862 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
3862 kvm_vcpu_on_spin(vcpu); 3863 kvm_vcpu_on_spin(vcpu);
3863 break; 3864 break;
3864 default: 3865 default:
3865 res = HV_STATUS_INVALID_HYPERCALL_CODE; 3866 res = HV_STATUS_INVALID_HYPERCALL_CODE;
3866 break; 3867 break;
3867 } 3868 }
3868 3869
3869 ret = res | (((u64)rep_done & 0xfff) << 32); 3870 ret = res | (((u64)rep_done & 0xfff) << 32);
3870 if (longmode) { 3871 if (longmode) {
3871 kvm_register_write(vcpu, VCPU_REGS_RAX, ret); 3872 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
3872 } else { 3873 } else {
3873 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); 3874 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
3874 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); 3875 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
3875 } 3876 }
3876 3877
3877 return 1; 3878 return 1;
3878 } 3879 }
3879 3880
3880 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 3881 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
3881 { 3882 {
3882 unsigned long nr, a0, a1, a2, a3, ret; 3883 unsigned long nr, a0, a1, a2, a3, ret;
3883 int r = 1; 3884 int r = 1;
3884 3885
3885 if (kvm_hv_hypercall_enabled(vcpu->kvm)) 3886 if (kvm_hv_hypercall_enabled(vcpu->kvm))
3886 return kvm_hv_hypercall(vcpu); 3887 return kvm_hv_hypercall(vcpu);
3887 3888
3888 nr = kvm_register_read(vcpu, VCPU_REGS_RAX); 3889 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
3889 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); 3890 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
3890 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); 3891 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
3891 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); 3892 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
3892 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); 3893 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
3893 3894
3894 trace_kvm_hypercall(nr, a0, a1, a2, a3); 3895 trace_kvm_hypercall(nr, a0, a1, a2, a3);
3895 3896
3896 if (!is_long_mode(vcpu)) { 3897 if (!is_long_mode(vcpu)) {
3897 nr &= 0xFFFFFFFF; 3898 nr &= 0xFFFFFFFF;
3898 a0 &= 0xFFFFFFFF; 3899 a0 &= 0xFFFFFFFF;
3899 a1 &= 0xFFFFFFFF; 3900 a1 &= 0xFFFFFFFF;
3900 a2 &= 0xFFFFFFFF; 3901 a2 &= 0xFFFFFFFF;
3901 a3 &= 0xFFFFFFFF; 3902 a3 &= 0xFFFFFFFF;
3902 } 3903 }
3903 3904
3904 if (kvm_x86_ops->get_cpl(vcpu) != 0) { 3905 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
3905 ret = -KVM_EPERM; 3906 ret = -KVM_EPERM;
3906 goto out; 3907 goto out;
3907 } 3908 }
3908 3909
3909 switch (nr) { 3910 switch (nr) {
3910 case KVM_HC_VAPIC_POLL_IRQ: 3911 case KVM_HC_VAPIC_POLL_IRQ:
3911 ret = 0; 3912 ret = 0;
3912 break; 3913 break;
3913 case KVM_HC_MMU_OP: 3914 case KVM_HC_MMU_OP:
3914 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret); 3915 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
3915 break; 3916 break;
3916 default: 3917 default:
3917 ret = -KVM_ENOSYS; 3918 ret = -KVM_ENOSYS;
3918 break; 3919 break;
3919 } 3920 }
3920 out: 3921 out:
3921 kvm_register_write(vcpu, VCPU_REGS_RAX, ret); 3922 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
3922 ++vcpu->stat.hypercalls; 3923 ++vcpu->stat.hypercalls;
3923 return r; 3924 return r;
3924 } 3925 }
3925 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); 3926 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
3926 3927
3927 int kvm_fix_hypercall(struct kvm_vcpu *vcpu) 3928 int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3928 { 3929 {
3929 char instruction[3]; 3930 char instruction[3];
3930 unsigned long rip = kvm_rip_read(vcpu); 3931 unsigned long rip = kvm_rip_read(vcpu);
3931 3932
3932 /* 3933 /*
3933 * Blow out the MMU to ensure that no other VCPU has an active mapping 3934 * Blow out the MMU to ensure that no other VCPU has an active mapping
3934 * to ensure that the updated hypercall appears atomically across all 3935 * to ensure that the updated hypercall appears atomically across all
3935 * VCPUs. 3936 * VCPUs.
3936 */ 3937 */
3937 kvm_mmu_zap_all(vcpu->kvm); 3938 kvm_mmu_zap_all(vcpu->kvm);
3938 3939
3939 kvm_x86_ops->patch_hypercall(vcpu, instruction); 3940 kvm_x86_ops->patch_hypercall(vcpu, instruction);
3940 3941
3941 return emulator_write_emulated(rip, instruction, 3, vcpu); 3942 return emulator_write_emulated(rip, instruction, 3, vcpu);
3942 } 3943 }
3943 3944
3944 static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3945 static u64 mk_cr_64(u64 curr_cr, u32 new_val)
3945 { 3946 {
3946 return (curr_cr & ~((1ULL << 32) - 1)) | new_val; 3947 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
3947 } 3948 }
3948 3949
3949 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 3950 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
3950 { 3951 {
3951 struct descriptor_table dt = { limit, base }; 3952 struct descriptor_table dt = { limit, base };
3952 3953
3953 kvm_x86_ops->set_gdt(vcpu, &dt); 3954 kvm_x86_ops->set_gdt(vcpu, &dt);
3954 } 3955 }
3955 3956
3956 void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 3957 void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
3957 { 3958 {
3958 struct descriptor_table dt = { limit, base }; 3959 struct descriptor_table dt = { limit, base };
3959 3960
3960 kvm_x86_ops->set_idt(vcpu, &dt); 3961 kvm_x86_ops->set_idt(vcpu, &dt);
3961 } 3962 }
3962 3963
3963 void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, 3964 void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
3964 unsigned long *rflags) 3965 unsigned long *rflags)
3965 { 3966 {
3966 kvm_lmsw(vcpu, msw); 3967 kvm_lmsw(vcpu, msw);
3967 *rflags = kvm_get_rflags(vcpu); 3968 *rflags = kvm_get_rflags(vcpu);
3968 } 3969 }
3969 3970
3970 unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 3971 unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
3971 { 3972 {
3972 unsigned long value; 3973 unsigned long value;
3973 3974
3974 switch (cr) { 3975 switch (cr) {
3975 case 0: 3976 case 0:
3976 value = kvm_read_cr0(vcpu); 3977 value = kvm_read_cr0(vcpu);
3977 break; 3978 break;
3978 case 2: 3979 case 2:
3979 value = vcpu->arch.cr2; 3980 value = vcpu->arch.cr2;
3980 break; 3981 break;
3981 case 3: 3982 case 3:
3982 value = vcpu->arch.cr3; 3983 value = vcpu->arch.cr3;
3983 break; 3984 break;
3984 case 4: 3985 case 4:
3985 value = kvm_read_cr4(vcpu); 3986 value = kvm_read_cr4(vcpu);
3986 break; 3987 break;
3987 case 8: 3988 case 8:
3988 value = kvm_get_cr8(vcpu); 3989 value = kvm_get_cr8(vcpu);
3989 break; 3990 break;
3990 default: 3991 default:
3991 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 3992 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3992 return 0; 3993 return 0;
3993 } 3994 }
3994 3995
3995 return value; 3996 return value;
3996 } 3997 }
3997 3998
3998 void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, 3999 void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3999 unsigned long *rflags) 4000 unsigned long *rflags)
4000 { 4001 {
4001 switch (cr) { 4002 switch (cr) {
4002 case 0: 4003 case 0:
4003 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); 4004 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4004 *rflags = kvm_get_rflags(vcpu); 4005 *rflags = kvm_get_rflags(vcpu);
4005 break; 4006 break;
4006 case 2: 4007 case 2:
4007 vcpu->arch.cr2 = val; 4008 vcpu->arch.cr2 = val;
4008 break; 4009 break;
4009 case 3: 4010 case 3:
4010 kvm_set_cr3(vcpu, val); 4011 kvm_set_cr3(vcpu, val);
4011 break; 4012 break;
4012 case 4: 4013 case 4:
4013 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); 4014 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4014 break; 4015 break;
4015 case 8: 4016 case 8:
4016 kvm_set_cr8(vcpu, val & 0xfUL); 4017 kvm_set_cr8(vcpu, val & 0xfUL);
4017 break; 4018 break;
4018 default: 4019 default:
4019 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 4020 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4020 } 4021 }
4021 } 4022 }
4022 4023
4023 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) 4024 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
4024 { 4025 {
4025 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; 4026 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
4026 int j, nent = vcpu->arch.cpuid_nent; 4027 int j, nent = vcpu->arch.cpuid_nent;
4027 4028
4028 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; 4029 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
4029 /* when no next entry is found, the current entry[i] is reselected */ 4030 /* when no next entry is found, the current entry[i] is reselected */
4030 for (j = i + 1; ; j = (j + 1) % nent) { 4031 for (j = i + 1; ; j = (j + 1) % nent) {
4031 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; 4032 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
4032 if (ej->function == e->function) { 4033 if (ej->function == e->function) {
4033 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; 4034 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
4034 return j; 4035 return j;
4035 } 4036 }
4036 } 4037 }
4037 return 0; /* silence gcc, even though control never reaches here */ 4038 return 0; /* silence gcc, even though control never reaches here */
4038 } 4039 }
4039 4040
4040 /* find an entry with matching function, matching index (if needed), and that 4041 /* find an entry with matching function, matching index (if needed), and that
4041 * should be read next (if it's stateful) */ 4042 * should be read next (if it's stateful) */
4042 static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, 4043 static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
4043 u32 function, u32 index) 4044 u32 function, u32 index)
4044 { 4045 {
4045 if (e->function != function) 4046 if (e->function != function)
4046 return 0; 4047 return 0;
4047 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) 4048 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
4048 return 0; 4049 return 0;
4049 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && 4050 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
4050 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) 4051 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
4051 return 0; 4052 return 0;
4052 return 1; 4053 return 1;
4053 } 4054 }
4054 4055
4055 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 4056 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
4056 u32 function, u32 index) 4057 u32 function, u32 index)
4057 { 4058 {
4058 int i; 4059 int i;
4059 struct kvm_cpuid_entry2 *best = NULL; 4060 struct kvm_cpuid_entry2 *best = NULL;
4060 4061
4061 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { 4062 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
4062 struct kvm_cpuid_entry2 *e; 4063 struct kvm_cpuid_entry2 *e;
4063 4064
4064 e = &vcpu->arch.cpuid_entries[i]; 4065 e = &vcpu->arch.cpuid_entries[i];
4065 if (is_matching_cpuid_entry(e, function, index)) { 4066 if (is_matching_cpuid_entry(e, function, index)) {
4066 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) 4067 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
4067 move_to_next_stateful_cpuid_entry(vcpu, i); 4068 move_to_next_stateful_cpuid_entry(vcpu, i);
4068 best = e; 4069 best = e;
4069 break; 4070 break;
4070 } 4071 }
4071 /* 4072 /*
4072 * Both basic or both extended? 4073 * Both basic or both extended?
4073 */ 4074 */
4074 if (((e->function ^ function) & 0x80000000) == 0) 4075 if (((e->function ^ function) & 0x80000000) == 0)
4075 if (!best || e->function > best->function) 4076 if (!best || e->function > best->function)
4076 best = e; 4077 best = e;
4077 } 4078 }
4078 return best; 4079 return best;
4079 } 4080 }
4080 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); 4081 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
4081 4082
4082 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) 4083 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
4083 { 4084 {
4084 struct kvm_cpuid_entry2 *best; 4085 struct kvm_cpuid_entry2 *best;
4085 4086
4086 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); 4087 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
4087 if (best) 4088 if (best)
4088 return best->eax & 0xff; 4089 return best->eax & 0xff;
4089 return 36; 4090 return 36;
4090 } 4091 }
4091 4092
4092 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 4093 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
4093 { 4094 {
4094 u32 function, index; 4095 u32 function, index;
4095 struct kvm_cpuid_entry2 *best; 4096 struct kvm_cpuid_entry2 *best;
4096 4097
4097 function = kvm_register_read(vcpu, VCPU_REGS_RAX); 4098 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
4098 index = kvm_register_read(vcpu, VCPU_REGS_RCX); 4099 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
4099 kvm_register_write(vcpu, VCPU_REGS_RAX, 0); 4100 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
4100 kvm_register_write(vcpu, VCPU_REGS_RBX, 0); 4101 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
4101 kvm_register_write(vcpu, VCPU_REGS_RCX, 0); 4102 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
4102 kvm_register_write(vcpu, VCPU_REGS_RDX, 0); 4103 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
4103 best = kvm_find_cpuid_entry(vcpu, function, index); 4104 best = kvm_find_cpuid_entry(vcpu, function, index);
4104 if (best) { 4105 if (best) {
4105 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); 4106 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
4106 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); 4107 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
4107 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); 4108 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
4108 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); 4109 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
4109 } 4110 }
4110 kvm_x86_ops->skip_emulated_instruction(vcpu); 4111 kvm_x86_ops->skip_emulated_instruction(vcpu);
4111 trace_kvm_cpuid(function, 4112 trace_kvm_cpuid(function,
4112 kvm_register_read(vcpu, VCPU_REGS_RAX), 4113 kvm_register_read(vcpu, VCPU_REGS_RAX),
4113 kvm_register_read(vcpu, VCPU_REGS_RBX), 4114 kvm_register_read(vcpu, VCPU_REGS_RBX),
4114 kvm_register_read(vcpu, VCPU_REGS_RCX), 4115 kvm_register_read(vcpu, VCPU_REGS_RCX),
4115 kvm_register_read(vcpu, VCPU_REGS_RDX)); 4116 kvm_register_read(vcpu, VCPU_REGS_RDX));
4116 } 4117 }
4117 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 4118 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
4118 4119
4119 /* 4120 /*
4120 * Check if userspace requested an interrupt window, and that the 4121 * Check if userspace requested an interrupt window, and that the
4121 * interrupt window is open. 4122 * interrupt window is open.
4122 * 4123 *
4123 * No need to exit to userspace if we already have an interrupt queued. 4124 * No need to exit to userspace if we already have an interrupt queued.
4124 */ 4125 */
4125 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) 4126 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
4126 { 4127 {
4127 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && 4128 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
4128 vcpu->run->request_interrupt_window && 4129 vcpu->run->request_interrupt_window &&
4129 kvm_arch_interrupt_allowed(vcpu)); 4130 kvm_arch_interrupt_allowed(vcpu));
4130 } 4131 }
4131 4132
4132 static void post_kvm_run_save(struct kvm_vcpu *vcpu) 4133 static void post_kvm_run_save(struct kvm_vcpu *vcpu)
4133 { 4134 {
4134 struct kvm_run *kvm_run = vcpu->run; 4135 struct kvm_run *kvm_run = vcpu->run;
4135 4136
4136 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; 4137 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
4137 kvm_run->cr8 = kvm_get_cr8(vcpu); 4138 kvm_run->cr8 = kvm_get_cr8(vcpu);
4138 kvm_run->apic_base = kvm_get_apic_base(vcpu); 4139 kvm_run->apic_base = kvm_get_apic_base(vcpu);
4139 if (irqchip_in_kernel(vcpu->kvm)) 4140 if (irqchip_in_kernel(vcpu->kvm))
4140 kvm_run->ready_for_interrupt_injection = 1; 4141 kvm_run->ready_for_interrupt_injection = 1;
4141 else 4142 else
4142 kvm_run->ready_for_interrupt_injection = 4143 kvm_run->ready_for_interrupt_injection =
4143 kvm_arch_interrupt_allowed(vcpu) && 4144 kvm_arch_interrupt_allowed(vcpu) &&
4144 !kvm_cpu_has_interrupt(vcpu) && 4145 !kvm_cpu_has_interrupt(vcpu) &&
4145 !kvm_event_needs_reinjection(vcpu); 4146 !kvm_event_needs_reinjection(vcpu);
4146 } 4147 }
4147 4148
4148 static void vapic_enter(struct kvm_vcpu *vcpu) 4149 static void vapic_enter(struct kvm_vcpu *vcpu)
4149 { 4150 {
4150 struct kvm_lapic *apic = vcpu->arch.apic; 4151 struct kvm_lapic *apic = vcpu->arch.apic;
4151 struct page *page; 4152 struct page *page;
4152 4153
4153 if (!apic || !apic->vapic_addr) 4154 if (!apic || !apic->vapic_addr)
4154 return; 4155 return;
4155 4156
4156 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 4157 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
4157 4158
4158 vcpu->arch.apic->vapic_page = page; 4159 vcpu->arch.apic->vapic_page = page;
4159 } 4160 }
4160 4161
4161 static void vapic_exit(struct kvm_vcpu *vcpu) 4162 static void vapic_exit(struct kvm_vcpu *vcpu)
4162 { 4163 {
4163 struct kvm_lapic *apic = vcpu->arch.apic; 4164 struct kvm_lapic *apic = vcpu->arch.apic;
4164 int idx; 4165 int idx;
4165 4166
4166 if (!apic || !apic->vapic_addr) 4167 if (!apic || !apic->vapic_addr)
4167 return; 4168 return;
4168 4169
4169 idx = srcu_read_lock(&vcpu->kvm->srcu); 4170 idx = srcu_read_lock(&vcpu->kvm->srcu);
4170 kvm_release_page_dirty(apic->vapic_page); 4171 kvm_release_page_dirty(apic->vapic_page);
4171 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 4172 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
4172 srcu_read_unlock(&vcpu->kvm->srcu, idx); 4173 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4173 } 4174 }
4174 4175
4175 static void update_cr8_intercept(struct kvm_vcpu *vcpu) 4176 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
4176 { 4177 {
4177 int max_irr, tpr; 4178 int max_irr, tpr;
4178 4179
4179 if (!kvm_x86_ops->update_cr8_intercept) 4180 if (!kvm_x86_ops->update_cr8_intercept)
4180 return; 4181 return;
4181 4182
4182 if (!vcpu->arch.apic) 4183 if (!vcpu->arch.apic)
4183 return; 4184 return;
4184 4185
4185 if (!vcpu->arch.apic->vapic_addr) 4186 if (!vcpu->arch.apic->vapic_addr)
4186 max_irr = kvm_lapic_find_highest_irr(vcpu); 4187 max_irr = kvm_lapic_find_highest_irr(vcpu);
4187 else 4188 else
4188 max_irr = -1; 4189 max_irr = -1;
4189 4190
4190 if (max_irr != -1) 4191 if (max_irr != -1)
4191 max_irr >>= 4; 4192 max_irr >>= 4;
4192 4193
4193 tpr = kvm_lapic_get_cr8(vcpu); 4194 tpr = kvm_lapic_get_cr8(vcpu);
4194 4195
4195 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); 4196 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
4196 } 4197 }
4197 4198
4198 static void inject_pending_event(struct kvm_vcpu *vcpu) 4199 static void inject_pending_event(struct kvm_vcpu *vcpu)
4199 { 4200 {
4200 /* try to reinject previous events if any */ 4201 /* try to reinject previous events if any */
4201 if (vcpu->arch.exception.pending) { 4202 if (vcpu->arch.exception.pending) {
4202 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 4203 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
4203 vcpu->arch.exception.has_error_code, 4204 vcpu->arch.exception.has_error_code,
4204 vcpu->arch.exception.error_code); 4205 vcpu->arch.exception.error_code);
4205 return; 4206 return;
4206 } 4207 }
4207 4208
4208 if (vcpu->arch.nmi_injected) { 4209 if (vcpu->arch.nmi_injected) {
4209 kvm_x86_ops->set_nmi(vcpu); 4210 kvm_x86_ops->set_nmi(vcpu);
4210 return; 4211 return;
4211 } 4212 }
4212 4213
4213 if (vcpu->arch.interrupt.pending) { 4214 if (vcpu->arch.interrupt.pending) {
4214 kvm_x86_ops->set_irq(vcpu); 4215 kvm_x86_ops->set_irq(vcpu);
4215 return; 4216 return;
4216 } 4217 }
4217 4218
4218 /* try to inject new event if pending */ 4219 /* try to inject new event if pending */
4219 if (vcpu->arch.nmi_pending) { 4220 if (vcpu->arch.nmi_pending) {
4220 if (kvm_x86_ops->nmi_allowed(vcpu)) { 4221 if (kvm_x86_ops->nmi_allowed(vcpu)) {
4221 vcpu->arch.nmi_pending = false; 4222 vcpu->arch.nmi_pending = false;
4222 vcpu->arch.nmi_injected = true; 4223 vcpu->arch.nmi_injected = true;
4223 kvm_x86_ops->set_nmi(vcpu); 4224 kvm_x86_ops->set_nmi(vcpu);
4224 } 4225 }
4225 } else if (kvm_cpu_has_interrupt(vcpu)) { 4226 } else if (kvm_cpu_has_interrupt(vcpu)) {
4226 if (kvm_x86_ops->interrupt_allowed(vcpu)) { 4227 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
4227 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), 4228 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
4228 false); 4229 false);
4229 kvm_x86_ops->set_irq(vcpu); 4230 kvm_x86_ops->set_irq(vcpu);
4230 } 4231 }
4231 } 4232 }
4232 } 4233 }
4233 4234
4234 static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 4235 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
4235 { 4236 {
4236 int r; 4237 int r;
4237 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && 4238 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
4238 vcpu->run->request_interrupt_window; 4239 vcpu->run->request_interrupt_window;
4239 4240
4240 if (vcpu->requests) 4241 if (vcpu->requests)
4241 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 4242 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
4242 kvm_mmu_unload(vcpu); 4243 kvm_mmu_unload(vcpu);
4243 4244
4244 r = kvm_mmu_reload(vcpu); 4245 r = kvm_mmu_reload(vcpu);
4245 if (unlikely(r)) 4246 if (unlikely(r))
4246 goto out; 4247 goto out;
4247 4248
4248 if (vcpu->requests) { 4249 if (vcpu->requests) {
4249 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 4250 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
4250 __kvm_migrate_timers(vcpu); 4251 __kvm_migrate_timers(vcpu);
4251 if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) 4252 if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
4252 kvm_write_guest_time(vcpu); 4253 kvm_write_guest_time(vcpu);
4253 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) 4254 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
4254 kvm_mmu_sync_roots(vcpu); 4255 kvm_mmu_sync_roots(vcpu);
4255 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 4256 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
4256 kvm_x86_ops->tlb_flush(vcpu); 4257 kvm_x86_ops->tlb_flush(vcpu);
4257 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 4258 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
4258 &vcpu->requests)) { 4259 &vcpu->requests)) {
4259 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; 4260 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
4260 r = 0; 4261 r = 0;
4261 goto out; 4262 goto out;
4262 } 4263 }
4263 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { 4264 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
4264 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; 4265 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
4265 r = 0; 4266 r = 0;
4266 goto out; 4267 goto out;
4267 } 4268 }
4268 if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { 4269 if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
4269 vcpu->fpu_active = 0; 4270 vcpu->fpu_active = 0;
4270 kvm_x86_ops->fpu_deactivate(vcpu); 4271 kvm_x86_ops->fpu_deactivate(vcpu);
4271 } 4272 }
4272 } 4273 }
4273 4274
4274 preempt_disable(); 4275 preempt_disable();
4275 4276
4276 kvm_x86_ops->prepare_guest_switch(vcpu); 4277 kvm_x86_ops->prepare_guest_switch(vcpu);
4277 if (vcpu->fpu_active) 4278 if (vcpu->fpu_active)
4278 kvm_load_guest_fpu(vcpu); 4279 kvm_load_guest_fpu(vcpu);
4279 4280
4280 local_irq_disable(); 4281 local_irq_disable();
4281 4282
4282 clear_bit(KVM_REQ_KICK, &vcpu->requests); 4283 clear_bit(KVM_REQ_KICK, &vcpu->requests);
4283 smp_mb__after_clear_bit(); 4284 smp_mb__after_clear_bit();
4284 4285
4285 if (vcpu->requests || need_resched() || signal_pending(current)) { 4286 if (vcpu->requests || need_resched() || signal_pending(current)) {
4286 set_bit(KVM_REQ_KICK, &vcpu->requests); 4287 set_bit(KVM_REQ_KICK, &vcpu->requests);
4287 local_irq_enable(); 4288 local_irq_enable();
4288 preempt_enable(); 4289 preempt_enable();
4289 r = 1; 4290 r = 1;
4290 goto out; 4291 goto out;
4291 } 4292 }
4292 4293
4293 inject_pending_event(vcpu); 4294 inject_pending_event(vcpu);
4294 4295
4295 /* enable NMI/IRQ window open exits if needed */ 4296 /* enable NMI/IRQ window open exits if needed */
4296 if (vcpu->arch.nmi_pending) 4297 if (vcpu->arch.nmi_pending)
4297 kvm_x86_ops->enable_nmi_window(vcpu); 4298 kvm_x86_ops->enable_nmi_window(vcpu);
4298 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) 4299 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
4299 kvm_x86_ops->enable_irq_window(vcpu); 4300 kvm_x86_ops->enable_irq_window(vcpu);
4300 4301
4301 if (kvm_lapic_enabled(vcpu)) { 4302 if (kvm_lapic_enabled(vcpu)) {
4302 update_cr8_intercept(vcpu); 4303 update_cr8_intercept(vcpu);
4303 kvm_lapic_sync_to_vapic(vcpu); 4304 kvm_lapic_sync_to_vapic(vcpu);
4304 } 4305 }
4305 4306
4306 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4307 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4307 4308
4308 kvm_guest_enter(); 4309 kvm_guest_enter();
4309 4310
4310 if (unlikely(vcpu->arch.switch_db_regs)) { 4311 if (unlikely(vcpu->arch.switch_db_regs)) {
4311 set_debugreg(0, 7); 4312 set_debugreg(0, 7);
4312 set_debugreg(vcpu->arch.eff_db[0], 0); 4313 set_debugreg(vcpu->arch.eff_db[0], 0);
4313 set_debugreg(vcpu->arch.eff_db[1], 1); 4314 set_debugreg(vcpu->arch.eff_db[1], 1);
4314 set_debugreg(vcpu->arch.eff_db[2], 2); 4315 set_debugreg(vcpu->arch.eff_db[2], 2);
4315 set_debugreg(vcpu->arch.eff_db[3], 3); 4316 set_debugreg(vcpu->arch.eff_db[3], 3);
4316 } 4317 }
4317 4318
4318 trace_kvm_entry(vcpu->vcpu_id); 4319 trace_kvm_entry(vcpu->vcpu_id);
4319 kvm_x86_ops->run(vcpu); 4320 kvm_x86_ops->run(vcpu);
4320 4321
4321 /* 4322 /*
4322 * If the guest has used debug registers, at least dr7 4323 * If the guest has used debug registers, at least dr7
4323 * will be disabled while returning to the host. 4324 * will be disabled while returning to the host.
4324 * If we don't have active breakpoints in the host, we don't 4325 * If we don't have active breakpoints in the host, we don't
4325 * care about the messed up debug address registers. But if 4326 * care about the messed up debug address registers. But if
4326 * we have some of them active, restore the old state. 4327 * we have some of them active, restore the old state.
4327 */ 4328 */
4328 if (hw_breakpoint_active()) 4329 if (hw_breakpoint_active())
4329 hw_breakpoint_restore(); 4330 hw_breakpoint_restore();
4330 4331
4331 set_bit(KVM_REQ_KICK, &vcpu->requests); 4332 set_bit(KVM_REQ_KICK, &vcpu->requests);
4332 local_irq_enable(); 4333 local_irq_enable();
4333 4334
4334 ++vcpu->stat.exits; 4335 ++vcpu->stat.exits;
4335 4336
4336 /* 4337 /*
4337 * We must have an instruction between local_irq_enable() and 4338 * We must have an instruction between local_irq_enable() and
4338 * kvm_guest_exit(), so the timer interrupt isn't delayed by 4339 * kvm_guest_exit(), so the timer interrupt isn't delayed by
4339 * the interrupt shadow. The stat.exits increment will do nicely. 4340 * the interrupt shadow. The stat.exits increment will do nicely.
4340 * But we need to prevent reordering, hence this barrier(): 4341 * But we need to prevent reordering, hence this barrier():
4341 */ 4342 */
4342 barrier(); 4343 barrier();
4343 4344
4344 kvm_guest_exit(); 4345 kvm_guest_exit();
4345 4346
4346 preempt_enable(); 4347 preempt_enable();
4347 4348
4348 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4349 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4349 4350
4350 /* 4351 /*
4351 * Profile KVM exit RIPs: 4352 * Profile KVM exit RIPs:
4352 */ 4353 */
4353 if (unlikely(prof_on == KVM_PROFILING)) { 4354 if (unlikely(prof_on == KVM_PROFILING)) {
4354 unsigned long rip = kvm_rip_read(vcpu); 4355 unsigned long rip = kvm_rip_read(vcpu);
4355 profile_hit(KVM_PROFILING, (void *)rip); 4356 profile_hit(KVM_PROFILING, (void *)rip);
4356 } 4357 }
4357 4358
4358 4359
4359 kvm_lapic_sync_from_vapic(vcpu); 4360 kvm_lapic_sync_from_vapic(vcpu);
4360 4361
4361 r = kvm_x86_ops->handle_exit(vcpu); 4362 r = kvm_x86_ops->handle_exit(vcpu);
4362 out: 4363 out:
4363 return r; 4364 return r;
4364 } 4365 }
4365 4366
4366 4367
4367 static int __vcpu_run(struct kvm_vcpu *vcpu) 4368 static int __vcpu_run(struct kvm_vcpu *vcpu)
4368 { 4369 {
4369 int r; 4370 int r;
4370 struct kvm *kvm = vcpu->kvm; 4371 struct kvm *kvm = vcpu->kvm;
4371 4372
4372 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { 4373 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
4373 pr_debug("vcpu %d received sipi with vector # %x\n", 4374 pr_debug("vcpu %d received sipi with vector # %x\n",
4374 vcpu->vcpu_id, vcpu->arch.sipi_vector); 4375 vcpu->vcpu_id, vcpu->arch.sipi_vector);
4375 kvm_lapic_reset(vcpu); 4376 kvm_lapic_reset(vcpu);
4376 r = kvm_arch_vcpu_reset(vcpu); 4377 r = kvm_arch_vcpu_reset(vcpu);
4377 if (r) 4378 if (r)
4378 return r; 4379 return r;
4379 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4380 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4380 } 4381 }
4381 4382
4382 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4383 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4383 vapic_enter(vcpu); 4384 vapic_enter(vcpu);
4384 4385
4385 r = 1; 4386 r = 1;
4386 while (r > 0) { 4387 while (r > 0) {
4387 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 4388 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
4388 r = vcpu_enter_guest(vcpu); 4389 r = vcpu_enter_guest(vcpu);
4389 else { 4390 else {
4390 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4391 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4391 kvm_vcpu_block(vcpu); 4392 kvm_vcpu_block(vcpu);
4392 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4393 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4393 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 4394 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
4394 { 4395 {
4395 switch(vcpu->arch.mp_state) { 4396 switch(vcpu->arch.mp_state) {
4396 case KVM_MP_STATE_HALTED: 4397 case KVM_MP_STATE_HALTED:
4397 vcpu->arch.mp_state = 4398 vcpu->arch.mp_state =
4398 KVM_MP_STATE_RUNNABLE; 4399 KVM_MP_STATE_RUNNABLE;
4399 case KVM_MP_STATE_RUNNABLE: 4400 case KVM_MP_STATE_RUNNABLE:
4400 break; 4401 break;
4401 case KVM_MP_STATE_SIPI_RECEIVED: 4402 case KVM_MP_STATE_SIPI_RECEIVED:
4402 default: 4403 default:
4403 r = -EINTR; 4404 r = -EINTR;
4404 break; 4405 break;
4405 } 4406 }
4406 } 4407 }
4407 } 4408 }
4408 4409
4409 if (r <= 0) 4410 if (r <= 0)
4410 break; 4411 break;
4411 4412
4412 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); 4413 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
4413 if (kvm_cpu_has_pending_timer(vcpu)) 4414 if (kvm_cpu_has_pending_timer(vcpu))
4414 kvm_inject_pending_timer_irqs(vcpu); 4415 kvm_inject_pending_timer_irqs(vcpu);
4415 4416
4416 if (dm_request_for_irq_injection(vcpu)) { 4417 if (dm_request_for_irq_injection(vcpu)) {
4417 r = -EINTR; 4418 r = -EINTR;
4418 vcpu->run->exit_reason = KVM_EXIT_INTR; 4419 vcpu->run->exit_reason = KVM_EXIT_INTR;
4419 ++vcpu->stat.request_irq_exits; 4420 ++vcpu->stat.request_irq_exits;
4420 } 4421 }
4421 if (signal_pending(current)) { 4422 if (signal_pending(current)) {
4422 r = -EINTR; 4423 r = -EINTR;
4423 vcpu->run->exit_reason = KVM_EXIT_INTR; 4424 vcpu->run->exit_reason = KVM_EXIT_INTR;
4424 ++vcpu->stat.signal_exits; 4425 ++vcpu->stat.signal_exits;
4425 } 4426 }
4426 if (need_resched()) { 4427 if (need_resched()) {
4427 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4428 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4428 kvm_resched(vcpu); 4429 kvm_resched(vcpu);
4429 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 4430 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4430 } 4431 }
4431 } 4432 }
4432 4433
4433 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4434 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4434 post_kvm_run_save(vcpu); 4435 post_kvm_run_save(vcpu);
4435 4436
4436 vapic_exit(vcpu); 4437 vapic_exit(vcpu);
4437 4438
4438 return r; 4439 return r;
4439 } 4440 }
4440 4441
4441 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 4442 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4442 { 4443 {
4443 int r; 4444 int r;
4444 sigset_t sigsaved; 4445 sigset_t sigsaved;
4445 4446
4446 vcpu_load(vcpu); 4447 vcpu_load(vcpu);
4447 4448
4448 if (vcpu->sigset_active) 4449 if (vcpu->sigset_active)
4449 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 4450 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
4450 4451
4451 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 4452 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
4452 kvm_vcpu_block(vcpu); 4453 kvm_vcpu_block(vcpu);
4453 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 4454 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
4454 r = -EAGAIN; 4455 r = -EAGAIN;
4455 goto out; 4456 goto out;
4456 } 4457 }
4457 4458
4458 /* re-sync apic's tpr */ 4459 /* re-sync apic's tpr */
4459 if (!irqchip_in_kernel(vcpu->kvm)) 4460 if (!irqchip_in_kernel(vcpu->kvm))
4460 kvm_set_cr8(vcpu, kvm_run->cr8); 4461 kvm_set_cr8(vcpu, kvm_run->cr8);
4461 4462
4462 if (vcpu->arch.pio.cur_count) { 4463 if (vcpu->arch.pio.cur_count) {
4463 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4464 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4464 r = complete_pio(vcpu); 4465 r = complete_pio(vcpu);
4465 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4466 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4466 if (r) 4467 if (r)
4467 goto out; 4468 goto out;
4468 } 4469 }
4469 if (vcpu->mmio_needed) { 4470 if (vcpu->mmio_needed) {
4470 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); 4471 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
4471 vcpu->mmio_read_completed = 1; 4472 vcpu->mmio_read_completed = 1;
4472 vcpu->mmio_needed = 0; 4473 vcpu->mmio_needed = 0;
4473 4474
4474 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4475 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4475 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, 4476 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
4476 EMULTYPE_NO_DECODE); 4477 EMULTYPE_NO_DECODE);
4477 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4478 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4478 if (r == EMULATE_DO_MMIO) { 4479 if (r == EMULATE_DO_MMIO) {
4479 /* 4480 /*
4480 * Read-modify-write. Back to userspace. 4481 * Read-modify-write. Back to userspace.
4481 */ 4482 */
4482 r = 0; 4483 r = 0;
4483 goto out; 4484 goto out;
4484 } 4485 }
4485 } 4486 }
4486 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) 4487 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
4487 kvm_register_write(vcpu, VCPU_REGS_RAX, 4488 kvm_register_write(vcpu, VCPU_REGS_RAX,
4488 kvm_run->hypercall.ret); 4489 kvm_run->hypercall.ret);
4489 4490
4490 r = __vcpu_run(vcpu); 4491 r = __vcpu_run(vcpu);
4491 4492
4492 out: 4493 out:
4493 if (vcpu->sigset_active) 4494 if (vcpu->sigset_active)
4494 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 4495 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
4495 4496
4496 vcpu_put(vcpu); 4497 vcpu_put(vcpu);
4497 return r; 4498 return r;
4498 } 4499 }
4499 4500
4500 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4501 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4501 { 4502 {
4502 vcpu_load(vcpu); 4503 vcpu_load(vcpu);
4503 4504
4504 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4505 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4505 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); 4506 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
4506 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); 4507 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4507 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX); 4508 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
4508 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI); 4509 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
4509 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI); 4510 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
4510 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); 4511 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
4511 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP); 4512 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
4512 #ifdef CONFIG_X86_64 4513 #ifdef CONFIG_X86_64
4513 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8); 4514 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
4514 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9); 4515 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
4515 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10); 4516 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
4516 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11); 4517 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
4517 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12); 4518 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
4518 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13); 4519 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
4519 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14); 4520 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
4520 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15); 4521 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
4521 #endif 4522 #endif
4522 4523
4523 regs->rip = kvm_rip_read(vcpu); 4524 regs->rip = kvm_rip_read(vcpu);
4524 regs->rflags = kvm_get_rflags(vcpu); 4525 regs->rflags = kvm_get_rflags(vcpu);
4525 4526
4526 vcpu_put(vcpu); 4527 vcpu_put(vcpu);
4527 4528
4528 return 0; 4529 return 0;
4529 } 4530 }
4530 4531
4531 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 4532 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4532 { 4533 {
4533 vcpu_load(vcpu); 4534 vcpu_load(vcpu);
4534 4535
4535 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); 4536 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
4536 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); 4537 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
4537 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); 4538 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
4538 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx); 4539 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
4539 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi); 4540 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
4540 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi); 4541 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
4541 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); 4542 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
4542 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp); 4543 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
4543 #ifdef CONFIG_X86_64 4544 #ifdef CONFIG_X86_64
4544 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8); 4545 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
4545 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9); 4546 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
4546 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10); 4547 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
4547 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11); 4548 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
4548 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12); 4549 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
4549 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); 4550 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
4550 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); 4551 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
4551 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); 4552 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
4552 #endif 4553 #endif
4553 4554
4554 kvm_rip_write(vcpu, regs->rip); 4555 kvm_rip_write(vcpu, regs->rip);
4555 kvm_set_rflags(vcpu, regs->rflags); 4556 kvm_set_rflags(vcpu, regs->rflags);
4556 4557
4557 vcpu->arch.exception.pending = false; 4558 vcpu->arch.exception.pending = false;
4558 4559
4559 vcpu_put(vcpu); 4560 vcpu_put(vcpu);
4560 4561
4561 return 0; 4562 return 0;
4562 } 4563 }
4563 4564
4564 void kvm_get_segment(struct kvm_vcpu *vcpu, 4565 void kvm_get_segment(struct kvm_vcpu *vcpu,
4565 struct kvm_segment *var, int seg) 4566 struct kvm_segment *var, int seg)
4566 { 4567 {
4567 kvm_x86_ops->get_segment(vcpu, var, seg); 4568 kvm_x86_ops->get_segment(vcpu, var, seg);
4568 } 4569 }
4569 4570
4570 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 4571 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
4571 { 4572 {
4572 struct kvm_segment cs; 4573 struct kvm_segment cs;
4573 4574
4574 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); 4575 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
4575 *db = cs.db; 4576 *db = cs.db;
4576 *l = cs.l; 4577 *l = cs.l;
4577 } 4578 }
4578 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); 4579 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
4579 4580
4580 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4581 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4581 struct kvm_sregs *sregs) 4582 struct kvm_sregs *sregs)
4582 { 4583 {
4583 struct descriptor_table dt; 4584 struct descriptor_table dt;
4584 4585
4585 vcpu_load(vcpu); 4586 vcpu_load(vcpu);
4586 4587
4587 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 4588 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
4588 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 4589 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
4589 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); 4590 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
4590 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 4591 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
4591 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 4592 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
4592 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 4593 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
4593 4594
4594 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 4595 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
4595 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 4596 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
4596 4597
4597 kvm_x86_ops->get_idt(vcpu, &dt); 4598 kvm_x86_ops->get_idt(vcpu, &dt);
4598 sregs->idt.limit = dt.limit; 4599 sregs->idt.limit = dt.limit;
4599 sregs->idt.base = dt.base; 4600 sregs->idt.base = dt.base;
4600 kvm_x86_ops->get_gdt(vcpu, &dt); 4601 kvm_x86_ops->get_gdt(vcpu, &dt);
4601 sregs->gdt.limit = dt.limit; 4602 sregs->gdt.limit = dt.limit;
4602 sregs->gdt.base = dt.base; 4603 sregs->gdt.base = dt.base;
4603 4604
4604 sregs->cr0 = kvm_read_cr0(vcpu); 4605 sregs->cr0 = kvm_read_cr0(vcpu);
4605 sregs->cr2 = vcpu->arch.cr2; 4606 sregs->cr2 = vcpu->arch.cr2;
4606 sregs->cr3 = vcpu->arch.cr3; 4607 sregs->cr3 = vcpu->arch.cr3;
4607 sregs->cr4 = kvm_read_cr4(vcpu); 4608 sregs->cr4 = kvm_read_cr4(vcpu);
4608 sregs->cr8 = kvm_get_cr8(vcpu); 4609 sregs->cr8 = kvm_get_cr8(vcpu);
4609 sregs->efer = vcpu->arch.efer; 4610 sregs->efer = vcpu->arch.efer;
4610 sregs->apic_base = kvm_get_apic_base(vcpu); 4611 sregs->apic_base = kvm_get_apic_base(vcpu);
4611 4612
4612 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); 4613 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
4613 4614
4614 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) 4615 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
4615 set_bit(vcpu->arch.interrupt.nr, 4616 set_bit(vcpu->arch.interrupt.nr,
4616 (unsigned long *)sregs->interrupt_bitmap); 4617 (unsigned long *)sregs->interrupt_bitmap);
4617 4618
4618 vcpu_put(vcpu); 4619 vcpu_put(vcpu);
4619 4620
4620 return 0; 4621 return 0;
4621 } 4622 }
4622 4623
4623 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 4624 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4624 struct kvm_mp_state *mp_state) 4625 struct kvm_mp_state *mp_state)
4625 { 4626 {
4626 vcpu_load(vcpu); 4627 vcpu_load(vcpu);
4627 mp_state->mp_state = vcpu->arch.mp_state; 4628 mp_state->mp_state = vcpu->arch.mp_state;
4628 vcpu_put(vcpu); 4629 vcpu_put(vcpu);
4629 return 0; 4630 return 0;
4630 } 4631 }
4631 4632
4632 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 4633 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4633 struct kvm_mp_state *mp_state) 4634 struct kvm_mp_state *mp_state)
4634 { 4635 {
4635 vcpu_load(vcpu); 4636 vcpu_load(vcpu);
4636 vcpu->arch.mp_state = mp_state->mp_state; 4637 vcpu->arch.mp_state = mp_state->mp_state;
4637 vcpu_put(vcpu); 4638 vcpu_put(vcpu);
4638 return 0; 4639 return 0;
4639 } 4640 }
4640 4641
4641 static void kvm_set_segment(struct kvm_vcpu *vcpu, 4642 static void kvm_set_segment(struct kvm_vcpu *vcpu,
4642 struct kvm_segment *var, int seg) 4643 struct kvm_segment *var, int seg)
4643 { 4644 {
4644 kvm_x86_ops->set_segment(vcpu, var, seg); 4645 kvm_x86_ops->set_segment(vcpu, var, seg);
4645 } 4646 }
4646 4647
4647 static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, 4648 static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
4648 struct kvm_segment *kvm_desct) 4649 struct kvm_segment *kvm_desct)
4649 { 4650 {
4650 kvm_desct->base = get_desc_base(seg_desc); 4651 kvm_desct->base = get_desc_base(seg_desc);
4651 kvm_desct->limit = get_desc_limit(seg_desc); 4652 kvm_desct->limit = get_desc_limit(seg_desc);
4652 if (seg_desc->g) { 4653 if (seg_desc->g) {
4653 kvm_desct->limit <<= 12; 4654 kvm_desct->limit <<= 12;
4654 kvm_desct->limit |= 0xfff; 4655 kvm_desct->limit |= 0xfff;
4655 } 4656 }
4656 kvm_desct->selector = selector; 4657 kvm_desct->selector = selector;
4657 kvm_desct->type = seg_desc->type; 4658 kvm_desct->type = seg_desc->type;
4658 kvm_desct->present = seg_desc->p; 4659 kvm_desct->present = seg_desc->p;
4659 kvm_desct->dpl = seg_desc->dpl; 4660 kvm_desct->dpl = seg_desc->dpl;
4660 kvm_desct->db = seg_desc->d; 4661 kvm_desct->db = seg_desc->d;
4661 kvm_desct->s = seg_desc->s; 4662 kvm_desct->s = seg_desc->s;
4662 kvm_desct->l = seg_desc->l; 4663 kvm_desct->l = seg_desc->l;
4663 kvm_desct->g = seg_desc->g; 4664 kvm_desct->g = seg_desc->g;
4664 kvm_desct->avl = seg_desc->avl; 4665 kvm_desct->avl = seg_desc->avl;
4665 if (!selector) 4666 if (!selector)
4666 kvm_desct->unusable = 1; 4667 kvm_desct->unusable = 1;
4667 else 4668 else
4668 kvm_desct->unusable = 0; 4669 kvm_desct->unusable = 0;
4669 kvm_desct->padding = 0; 4670 kvm_desct->padding = 0;
4670 } 4671 }
4671 4672
4672 static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, 4673 static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
4673 u16 selector, 4674 u16 selector,
4674 struct descriptor_table *dtable) 4675 struct descriptor_table *dtable)
4675 { 4676 {
4676 if (selector & 1 << 2) { 4677 if (selector & 1 << 2) {
4677 struct kvm_segment kvm_seg; 4678 struct kvm_segment kvm_seg;
4678 4679
4679 kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); 4680 kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
4680 4681
4681 if (kvm_seg.unusable) 4682 if (kvm_seg.unusable)
4682 dtable->limit = 0; 4683 dtable->limit = 0;
4683 else 4684 else
4684 dtable->limit = kvm_seg.limit; 4685 dtable->limit = kvm_seg.limit;
4685 dtable->base = kvm_seg.base; 4686 dtable->base = kvm_seg.base;
4686 } 4687 }
4687 else 4688 else
4688 kvm_x86_ops->get_gdt(vcpu, dtable); 4689 kvm_x86_ops->get_gdt(vcpu, dtable);
4689 } 4690 }
4690 4691
4691 /* allowed just for 8 bytes segments */ 4692 /* allowed just for 8 bytes segments */
4692 static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4693 static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4693 struct desc_struct *seg_desc) 4694 struct desc_struct *seg_desc)
4694 { 4695 {
4695 struct descriptor_table dtable; 4696 struct descriptor_table dtable;
4696 u16 index = selector >> 3; 4697 u16 index = selector >> 3;
4697 int ret; 4698 int ret;
4698 u32 err; 4699 u32 err;
4699 gva_t addr; 4700 gva_t addr;
4700 4701
4701 get_segment_descriptor_dtable(vcpu, selector, &dtable); 4702 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4702 4703
4703 if (dtable.limit < index * 8 + 7) { 4704 if (dtable.limit < index * 8 + 7) {
4704 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 4705 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
4705 return X86EMUL_PROPAGATE_FAULT; 4706 return X86EMUL_PROPAGATE_FAULT;
4706 } 4707 }
4707 addr = dtable.base + index * 8; 4708 addr = dtable.base + index * 8;
4708 ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), 4709 ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
4709 vcpu, &err); 4710 vcpu, &err);
4710 if (ret == X86EMUL_PROPAGATE_FAULT) 4711 if (ret == X86EMUL_PROPAGATE_FAULT)
4711 kvm_inject_page_fault(vcpu, addr, err); 4712 kvm_inject_page_fault(vcpu, addr, err);
4712 4713
4713 return ret; 4714 return ret;
4714 } 4715 }
4715 4716
4716 /* allowed just for 8 bytes segments */ 4717 /* allowed just for 8 bytes segments */
4717 static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4718 static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4718 struct desc_struct *seg_desc) 4719 struct desc_struct *seg_desc)
4719 { 4720 {
4720 struct descriptor_table dtable; 4721 struct descriptor_table dtable;
4721 u16 index = selector >> 3; 4722 u16 index = selector >> 3;
4722 4723
4723 get_segment_descriptor_dtable(vcpu, selector, &dtable); 4724 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4724 4725
4725 if (dtable.limit < index * 8 + 7) 4726 if (dtable.limit < index * 8 + 7)
4726 return 1; 4727 return 1;
4727 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); 4728 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
4728 } 4729 }
4729 4730
4730 static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, 4731 static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
4731 struct desc_struct *seg_desc) 4732 struct desc_struct *seg_desc)
4732 { 4733 {
4733 u32 base_addr = get_desc_base(seg_desc); 4734 u32 base_addr = get_desc_base(seg_desc);
4734 4735
4735 return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); 4736 return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
4736 } 4737 }
4737 4738
4738 static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, 4739 static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
4739 struct desc_struct *seg_desc) 4740 struct desc_struct *seg_desc)
4740 { 4741 {
4741 u32 base_addr = get_desc_base(seg_desc); 4742 u32 base_addr = get_desc_base(seg_desc);
4742 4743
4743 return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); 4744 return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
4744 } 4745 }
4745 4746
4746 static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) 4747 static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
4747 { 4748 {
4748 struct kvm_segment kvm_seg; 4749 struct kvm_segment kvm_seg;
4749 4750
4750 kvm_get_segment(vcpu, &kvm_seg, seg); 4751 kvm_get_segment(vcpu, &kvm_seg, seg);
4751 return kvm_seg.selector; 4752 return kvm_seg.selector;
4752 } 4753 }
4753 4754
4754 static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) 4755 static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
4755 { 4756 {
4756 struct kvm_segment segvar = { 4757 struct kvm_segment segvar = {
4757 .base = selector << 4, 4758 .base = selector << 4,
4758 .limit = 0xffff, 4759 .limit = 0xffff,
4759 .selector = selector, 4760 .selector = selector,
4760 .type = 3, 4761 .type = 3,
4761 .present = 1, 4762 .present = 1,
4762 .dpl = 3, 4763 .dpl = 3,
4763 .db = 0, 4764 .db = 0,
4764 .s = 1, 4765 .s = 1,
4765 .l = 0, 4766 .l = 0,
4766 .g = 0, 4767 .g = 0,
4767 .avl = 0, 4768 .avl = 0,
4768 .unusable = 0, 4769 .unusable = 0,
4769 }; 4770 };
4770 kvm_x86_ops->set_segment(vcpu, &segvar, seg); 4771 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
4771 return X86EMUL_CONTINUE; 4772 return X86EMUL_CONTINUE;
4772 } 4773 }
4773 4774
4774 static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) 4775 static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4775 { 4776 {
4776 return (seg != VCPU_SREG_LDTR) && 4777 return (seg != VCPU_SREG_LDTR) &&
4777 (seg != VCPU_SREG_TR) && 4778 (seg != VCPU_SREG_TR) &&
4778 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); 4779 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
4779 } 4780 }
4780 4781
4781 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) 4782 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
4782 { 4783 {
4783 struct kvm_segment kvm_seg; 4784 struct kvm_segment kvm_seg;
4784 struct desc_struct seg_desc; 4785 struct desc_struct seg_desc;
4785 u8 dpl, rpl, cpl; 4786 u8 dpl, rpl, cpl;
4786 unsigned err_vec = GP_VECTOR; 4787 unsigned err_vec = GP_VECTOR;
4787 u32 err_code = 0; 4788 u32 err_code = 0;
4788 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ 4789 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
4789 int ret; 4790 int ret;
4790 4791
4791 if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) 4792 if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
4792 return kvm_load_realmode_segment(vcpu, selector, seg); 4793 return kvm_load_realmode_segment(vcpu, selector, seg);
4793 4794
4794 /* NULL selector is not valid for TR, CS and SS */ 4795 /* NULL selector is not valid for TR, CS and SS */
4795 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) 4796 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
4796 && null_selector) 4797 && null_selector)
4797 goto exception; 4798 goto exception;
4798 4799
4799 /* TR should be in GDT only */ 4800 /* TR should be in GDT only */
4800 if (seg == VCPU_SREG_TR && (selector & (1 << 2))) 4801 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
4801 goto exception; 4802 goto exception;
4802 4803
4803 ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); 4804 ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
4804 if (ret) 4805 if (ret)
4805 return ret; 4806 return ret;
4806 4807
4807 seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); 4808 seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
4808 4809
4809 if (null_selector) { /* for NULL selector skip all following checks */ 4810 if (null_selector) { /* for NULL selector skip all following checks */
4810 kvm_seg.unusable = 1; 4811 kvm_seg.unusable = 1;
4811 goto load; 4812 goto load;
4812 } 4813 }
4813 4814
4814 err_code = selector & 0xfffc; 4815 err_code = selector & 0xfffc;
4815 err_vec = GP_VECTOR; 4816 err_vec = GP_VECTOR;
4816 4817
4817 /* can't load system descriptor into segment selecor */ 4818 /* can't load system descriptor into segment selecor */
4818 if (seg <= VCPU_SREG_GS && !kvm_seg.s) 4819 if (seg <= VCPU_SREG_GS && !kvm_seg.s)
4819 goto exception; 4820 goto exception;
4820 4821
4821 if (!kvm_seg.present) { 4822 if (!kvm_seg.present) {
4822 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; 4823 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
4823 goto exception; 4824 goto exception;
4824 } 4825 }
4825 4826
4826 rpl = selector & 3; 4827 rpl = selector & 3;
4827 dpl = kvm_seg.dpl; 4828 dpl = kvm_seg.dpl;
4828 cpl = kvm_x86_ops->get_cpl(vcpu); 4829 cpl = kvm_x86_ops->get_cpl(vcpu);
4829 4830
4830 switch (seg) { 4831 switch (seg) {
4831 case VCPU_SREG_SS: 4832 case VCPU_SREG_SS:
4832 /* 4833 /*
4833 * segment is not a writable data segment or segment 4834 * segment is not a writable data segment or segment
4834 * selector's RPL != CPL or segment selector's RPL != CPL 4835 * selector's RPL != CPL or segment selector's RPL != CPL
4835 */ 4836 */
4836 if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) 4837 if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
4837 goto exception; 4838 goto exception;
4838 break; 4839 break;
4839 case VCPU_SREG_CS: 4840 case VCPU_SREG_CS:
4840 if (!(kvm_seg.type & 8)) 4841 if (!(kvm_seg.type & 8))
4841 goto exception; 4842 goto exception;
4842 4843
4843 if (kvm_seg.type & 4) { 4844 if (kvm_seg.type & 4) {
4844 /* conforming */ 4845 /* conforming */
4845 if (dpl > cpl) 4846 if (dpl > cpl)
4846 goto exception; 4847 goto exception;
4847 } else { 4848 } else {
4848 /* nonconforming */ 4849 /* nonconforming */
4849 if (rpl > cpl || dpl != cpl) 4850 if (rpl > cpl || dpl != cpl)
4850 goto exception; 4851 goto exception;
4851 } 4852 }
4852 /* CS(RPL) <- CPL */ 4853 /* CS(RPL) <- CPL */
4853 selector = (selector & 0xfffc) | cpl; 4854 selector = (selector & 0xfffc) | cpl;
4854 break; 4855 break;
4855 case VCPU_SREG_TR: 4856 case VCPU_SREG_TR:
4856 if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) 4857 if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
4857 goto exception; 4858 goto exception;
4858 break; 4859 break;
4859 case VCPU_SREG_LDTR: 4860 case VCPU_SREG_LDTR:
4860 if (kvm_seg.s || kvm_seg.type != 2) 4861 if (kvm_seg.s || kvm_seg.type != 2)
4861 goto exception; 4862 goto exception;
4862 break; 4863 break;
4863 default: /* DS, ES, FS, or GS */ 4864 default: /* DS, ES, FS, or GS */
4864 /* 4865 /*
4865 * segment is not a data or readable code segment or 4866 * segment is not a data or readable code segment or
4866 * ((segment is a data or nonconforming code segment) 4867 * ((segment is a data or nonconforming code segment)
4867 * and (both RPL and CPL > DPL)) 4868 * and (both RPL and CPL > DPL))
4868 */ 4869 */
4869 if ((kvm_seg.type & 0xa) == 0x8 || 4870 if ((kvm_seg.type & 0xa) == 0x8 ||
4870 (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) 4871 (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
4871 goto exception; 4872 goto exception;
4872 break; 4873 break;
4873 } 4874 }
4874 4875
4875 if (!kvm_seg.unusable && kvm_seg.s) { 4876 if (!kvm_seg.unusable && kvm_seg.s) {
4876 /* mark segment as accessed */ 4877 /* mark segment as accessed */
4877 kvm_seg.type |= 1; 4878 kvm_seg.type |= 1;
4878 seg_desc.type |= 1; 4879 seg_desc.type |= 1;
4879 save_guest_segment_descriptor(vcpu, selector, &seg_desc); 4880 save_guest_segment_descriptor(vcpu, selector, &seg_desc);
4880 } 4881 }
4881 load: 4882 load:
4882 kvm_set_segment(vcpu, &kvm_seg, seg); 4883 kvm_set_segment(vcpu, &kvm_seg, seg);
4883 return X86EMUL_CONTINUE; 4884 return X86EMUL_CONTINUE;
4884 exception: 4885 exception:
4885 kvm_queue_exception_e(vcpu, err_vec, err_code); 4886 kvm_queue_exception_e(vcpu, err_vec, err_code);
4886 return X86EMUL_PROPAGATE_FAULT; 4887 return X86EMUL_PROPAGATE_FAULT;
4887 } 4888 }
4888 4889
4889 static void save_state_to_tss32(struct kvm_vcpu *vcpu, 4890 static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4890 struct tss_segment_32 *tss) 4891 struct tss_segment_32 *tss)
4891 { 4892 {
4892 tss->cr3 = vcpu->arch.cr3; 4893 tss->cr3 = vcpu->arch.cr3;
4893 tss->eip = kvm_rip_read(vcpu); 4894 tss->eip = kvm_rip_read(vcpu);
4894 tss->eflags = kvm_get_rflags(vcpu); 4895 tss->eflags = kvm_get_rflags(vcpu);
4895 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4896 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4896 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); 4897 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4897 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); 4898 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
4898 tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); 4899 tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
4899 tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); 4900 tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
4900 tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); 4901 tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
4901 tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); 4902 tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
4902 tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); 4903 tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
4903 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); 4904 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
4904 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); 4905 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
4905 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); 4906 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
4906 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); 4907 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
4907 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); 4908 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
4908 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); 4909 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
4909 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); 4910 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4910 } 4911 }
4911 4912
4912 static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) 4913 static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
4913 { 4914 {
4914 struct kvm_segment kvm_seg; 4915 struct kvm_segment kvm_seg;
4915 kvm_get_segment(vcpu, &kvm_seg, seg); 4916 kvm_get_segment(vcpu, &kvm_seg, seg);
4916 kvm_seg.selector = sel; 4917 kvm_seg.selector = sel;
4917 kvm_set_segment(vcpu, &kvm_seg, seg); 4918 kvm_set_segment(vcpu, &kvm_seg, seg);
4918 } 4919 }
4919 4920
4920 static int load_state_from_tss32(struct kvm_vcpu *vcpu, 4921 static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4921 struct tss_segment_32 *tss) 4922 struct tss_segment_32 *tss)
4922 { 4923 {
4923 kvm_set_cr3(vcpu, tss->cr3); 4924 kvm_set_cr3(vcpu, tss->cr3);
4924 4925
4925 kvm_rip_write(vcpu, tss->eip); 4926 kvm_rip_write(vcpu, tss->eip);
4926 kvm_set_rflags(vcpu, tss->eflags | 2); 4927 kvm_set_rflags(vcpu, tss->eflags | 2);
4927 4928
4928 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); 4929 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
4929 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); 4930 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
4930 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); 4931 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
4931 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); 4932 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
4932 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); 4933 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
4933 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); 4934 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
4934 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); 4935 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
4935 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); 4936 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
4936 4937
4937 /* 4938 /*
4938 * SDM says that segment selectors are loaded before segment 4939 * SDM says that segment selectors are loaded before segment
4939 * descriptors 4940 * descriptors
4940 */ 4941 */
4941 kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); 4942 kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
4942 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); 4943 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
4943 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); 4944 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
4944 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); 4945 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
4945 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); 4946 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
4946 kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); 4947 kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
4947 kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); 4948 kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
4948 4949
4949 /* 4950 /*
4950 * Now load segment descriptors. If fault happenes at this stage 4951 * Now load segment descriptors. If fault happenes at this stage
4951 * it is handled in a context of new task 4952 * it is handled in a context of new task
4952 */ 4953 */
4953 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) 4954 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
4954 return 1; 4955 return 1;
4955 4956
4956 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) 4957 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
4957 return 1; 4958 return 1;
4958 4959
4959 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) 4960 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
4960 return 1; 4961 return 1;
4961 4962
4962 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) 4963 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
4963 return 1; 4964 return 1;
4964 4965
4965 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) 4966 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
4966 return 1; 4967 return 1;
4967 4968
4968 if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) 4969 if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
4969 return 1; 4970 return 1;
4970 4971
4971 if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) 4972 if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
4972 return 1; 4973 return 1;
4973 return 0; 4974 return 0;
4974 } 4975 }
4975 4976
4976 static void save_state_to_tss16(struct kvm_vcpu *vcpu, 4977 static void save_state_to_tss16(struct kvm_vcpu *vcpu,
4977 struct tss_segment_16 *tss) 4978 struct tss_segment_16 *tss)
4978 { 4979 {
4979 tss->ip = kvm_rip_read(vcpu); 4980 tss->ip = kvm_rip_read(vcpu);
4980 tss->flag = kvm_get_rflags(vcpu); 4981 tss->flag = kvm_get_rflags(vcpu);
4981 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); 4982 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4982 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); 4983 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4983 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); 4984 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
4984 tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); 4985 tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
4985 tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); 4986 tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
4986 tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); 4987 tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
4987 tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); 4988 tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
4988 tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); 4989 tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
4989 4990
4990 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); 4991 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
4991 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); 4992 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
4992 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); 4993 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
4993 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); 4994 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
4994 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); 4995 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4995 } 4996 }
4996 4997
4997 static int load_state_from_tss16(struct kvm_vcpu *vcpu, 4998 static int load_state_from_tss16(struct kvm_vcpu *vcpu,
4998 struct tss_segment_16 *tss) 4999 struct tss_segment_16 *tss)
4999 { 5000 {
5000 kvm_rip_write(vcpu, tss->ip); 5001 kvm_rip_write(vcpu, tss->ip);
5001 kvm_set_rflags(vcpu, tss->flag | 2); 5002 kvm_set_rflags(vcpu, tss->flag | 2);
5002 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); 5003 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
5003 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); 5004 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
5004 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); 5005 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
5005 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); 5006 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
5006 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); 5007 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
5007 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); 5008 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
5008 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); 5009 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
5009 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); 5010 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
5010 5011
5011 /* 5012 /*
5012 * SDM says that segment selectors are loaded before segment 5013 * SDM says that segment selectors are loaded before segment
5013 * descriptors 5014 * descriptors
5014 */ 5015 */
5015 kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); 5016 kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
5016 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); 5017 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
5017 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); 5018 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
5018 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); 5019 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5019 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); 5020 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5020 5021
5021 /* 5022 /*
5022 * Now load segment descriptors. If fault happenes at this stage 5023 * Now load segment descriptors. If fault happenes at this stage
5023 * it is handled in a context of new task 5024 * it is handled in a context of new task
5024 */ 5025 */
5025 if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) 5026 if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
5026 return 1; 5027 return 1;
5027 5028
5028 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) 5029 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
5029 return 1; 5030 return 1;
5030 5031
5031 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) 5032 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
5032 return 1; 5033 return 1;
5033 5034
5034 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) 5035 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
5035 return 1; 5036 return 1;
5036 5037
5037 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) 5038 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
5038 return 1; 5039 return 1;
5039 return 0; 5040 return 0;
5040 } 5041 }
5041 5042
5042 static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, 5043 static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
5043 u16 old_tss_sel, u32 old_tss_base, 5044 u16 old_tss_sel, u32 old_tss_base,
5044 struct desc_struct *nseg_desc) 5045 struct desc_struct *nseg_desc)
5045 { 5046 {
5046 struct tss_segment_16 tss_segment_16; 5047 struct tss_segment_16 tss_segment_16;
5047 int ret = 0; 5048 int ret = 0;
5048 5049
5049 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, 5050 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
5050 sizeof tss_segment_16)) 5051 sizeof tss_segment_16))
5051 goto out; 5052 goto out;
5052 5053
5053 save_state_to_tss16(vcpu, &tss_segment_16); 5054 save_state_to_tss16(vcpu, &tss_segment_16);
5054 5055
5055 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, 5056 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
5056 sizeof tss_segment_16)) 5057 sizeof tss_segment_16))
5057 goto out; 5058 goto out;
5058 5059
5059 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), 5060 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
5060 &tss_segment_16, sizeof tss_segment_16)) 5061 &tss_segment_16, sizeof tss_segment_16))
5061 goto out; 5062 goto out;
5062 5063
5063 if (old_tss_sel != 0xffff) { 5064 if (old_tss_sel != 0xffff) {
5064 tss_segment_16.prev_task_link = old_tss_sel; 5065 tss_segment_16.prev_task_link = old_tss_sel;
5065 5066
5066 if (kvm_write_guest(vcpu->kvm, 5067 if (kvm_write_guest(vcpu->kvm,
5067 get_tss_base_addr_write(vcpu, nseg_desc), 5068 get_tss_base_addr_write(vcpu, nseg_desc),
5068 &tss_segment_16.prev_task_link, 5069 &tss_segment_16.prev_task_link,
5069 sizeof tss_segment_16.prev_task_link)) 5070 sizeof tss_segment_16.prev_task_link))
5070 goto out; 5071 goto out;
5071 } 5072 }
5072 5073
5073 if (load_state_from_tss16(vcpu, &tss_segment_16)) 5074 if (load_state_from_tss16(vcpu, &tss_segment_16))
5074 goto out; 5075 goto out;
5075 5076
5076 ret = 1; 5077 ret = 1;
5077 out: 5078 out:
5078 return ret; 5079 return ret;
5079 } 5080 }
5080 5081
5081 static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, 5082 static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
5082 u16 old_tss_sel, u32 old_tss_base, 5083 u16 old_tss_sel, u32 old_tss_base,
5083 struct desc_struct *nseg_desc) 5084 struct desc_struct *nseg_desc)
5084 { 5085 {
5085 struct tss_segment_32 tss_segment_32; 5086 struct tss_segment_32 tss_segment_32;
5086 int ret = 0; 5087 int ret = 0;
5087 5088
5088 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, 5089 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
5089 sizeof tss_segment_32)) 5090 sizeof tss_segment_32))
5090 goto out; 5091 goto out;
5091 5092
5092 save_state_to_tss32(vcpu, &tss_segment_32); 5093 save_state_to_tss32(vcpu, &tss_segment_32);
5093 5094
5094 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, 5095 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
5095 sizeof tss_segment_32)) 5096 sizeof tss_segment_32))
5096 goto out; 5097 goto out;
5097 5098
5098 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), 5099 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
5099 &tss_segment_32, sizeof tss_segment_32)) 5100 &tss_segment_32, sizeof tss_segment_32))
5100 goto out; 5101 goto out;
5101 5102
5102 if (old_tss_sel != 0xffff) { 5103 if (old_tss_sel != 0xffff) {
5103 tss_segment_32.prev_task_link = old_tss_sel; 5104 tss_segment_32.prev_task_link = old_tss_sel;
5104 5105
5105 if (kvm_write_guest(vcpu->kvm, 5106 if (kvm_write_guest(vcpu->kvm,
5106 get_tss_base_addr_write(vcpu, nseg_desc), 5107 get_tss_base_addr_write(vcpu, nseg_desc),
5107 &tss_segment_32.prev_task_link, 5108 &tss_segment_32.prev_task_link,
5108 sizeof tss_segment_32.prev_task_link)) 5109 sizeof tss_segment_32.prev_task_link))
5109 goto out; 5110 goto out;
5110 } 5111 }
5111 5112
5112 if (load_state_from_tss32(vcpu, &tss_segment_32)) 5113 if (load_state_from_tss32(vcpu, &tss_segment_32))
5113 goto out; 5114 goto out;
5114 5115
5115 ret = 1; 5116 ret = 1;
5116 out: 5117 out:
5117 return ret; 5118 return ret;
5118 } 5119 }
5119 5120
5120 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) 5121 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5121 { 5122 {
5122 struct kvm_segment tr_seg; 5123 struct kvm_segment tr_seg;
5123 struct desc_struct cseg_desc; 5124 struct desc_struct cseg_desc;
5124 struct desc_struct nseg_desc; 5125 struct desc_struct nseg_desc;
5125 int ret = 0; 5126 int ret = 0;
5126 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5127 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
5127 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5128 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
5128 5129
5129 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); 5130 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
5130 5131
5131 /* FIXME: Handle errors. Failure to read either TSS or their 5132 /* FIXME: Handle errors. Failure to read either TSS or their
5132 * descriptors should generate a pagefault. 5133 * descriptors should generate a pagefault.
5133 */ 5134 */
5134 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) 5135 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
5135 goto out; 5136 goto out;
5136 5137
5137 if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) 5138 if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
5138 goto out; 5139 goto out;
5139 5140
5140 if (reason != TASK_SWITCH_IRET) { 5141 if (reason != TASK_SWITCH_IRET) {
5141 int cpl; 5142 int cpl;
5142 5143
5143 cpl = kvm_x86_ops->get_cpl(vcpu); 5144 cpl = kvm_x86_ops->get_cpl(vcpu);
5144 if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { 5145 if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
5145 kvm_queue_exception_e(vcpu, GP_VECTOR, 0); 5146 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
5146 return 1; 5147 return 1;
5147 } 5148 }
5148 } 5149 }
5149 5150
5150 if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { 5151 if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
5151 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); 5152 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
5152 return 1; 5153 return 1;
5153 } 5154 }
5154 5155
5155 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { 5156 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
5156 cseg_desc.type &= ~(1 << 1); //clear the B flag 5157 cseg_desc.type &= ~(1 << 1); //clear the B flag
5157 save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); 5158 save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
5158 } 5159 }
5159 5160
5160 if (reason == TASK_SWITCH_IRET) { 5161 if (reason == TASK_SWITCH_IRET) {
5161 u32 eflags = kvm_get_rflags(vcpu); 5162 u32 eflags = kvm_get_rflags(vcpu);
5162 kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); 5163 kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
5163 } 5164 }
5164 5165
5165 /* set back link to prev task only if NT bit is set in eflags 5166 /* set back link to prev task only if NT bit is set in eflags
5166 note that old_tss_sel is not used afetr this point */ 5167 note that old_tss_sel is not used afetr this point */
5167 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) 5168 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
5168 old_tss_sel = 0xffff; 5169 old_tss_sel = 0xffff;
5169 5170
5170 if (nseg_desc.type & 8) 5171 if (nseg_desc.type & 8)
5171 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, 5172 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
5172 old_tss_base, &nseg_desc); 5173 old_tss_base, &nseg_desc);
5173 else 5174 else
5174 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, 5175 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
5175 old_tss_base, &nseg_desc); 5176 old_tss_base, &nseg_desc);
5176 5177
5177 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { 5178 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
5178 u32 eflags = kvm_get_rflags(vcpu); 5179 u32 eflags = kvm_get_rflags(vcpu);
5179 kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); 5180 kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
5180 } 5181 }
5181 5182
5182 if (reason != TASK_SWITCH_IRET) { 5183 if (reason != TASK_SWITCH_IRET) {
5183 nseg_desc.type |= (1 << 1); 5184 nseg_desc.type |= (1 << 1);
5184 save_guest_segment_descriptor(vcpu, tss_selector, 5185 save_guest_segment_descriptor(vcpu, tss_selector,
5185 &nseg_desc); 5186 &nseg_desc);
5186 } 5187 }
5187 5188
5188 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); 5189 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
5189 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); 5190 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
5190 tr_seg.type = 11; 5191 tr_seg.type = 11;
5191 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); 5192 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
5192 out: 5193 out:
5193 return ret; 5194 return ret;
5194 } 5195 }
5195 EXPORT_SYMBOL_GPL(kvm_task_switch); 5196 EXPORT_SYMBOL_GPL(kvm_task_switch);
5196 5197
5197 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 5198 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5198 struct kvm_sregs *sregs) 5199 struct kvm_sregs *sregs)
5199 { 5200 {
5200 int mmu_reset_needed = 0; 5201 int mmu_reset_needed = 0;
5201 int pending_vec, max_bits; 5202 int pending_vec, max_bits;
5202 struct descriptor_table dt; 5203 struct descriptor_table dt;
5203 5204
5204 vcpu_load(vcpu); 5205 vcpu_load(vcpu);
5205 5206
5206 dt.limit = sregs->idt.limit; 5207 dt.limit = sregs->idt.limit;
5207 dt.base = sregs->idt.base; 5208 dt.base = sregs->idt.base;
5208 kvm_x86_ops->set_idt(vcpu, &dt); 5209 kvm_x86_ops->set_idt(vcpu, &dt);
5209 dt.limit = sregs->gdt.limit; 5210 dt.limit = sregs->gdt.limit;
5210 dt.base = sregs->gdt.base; 5211 dt.base = sregs->gdt.base;
5211 kvm_x86_ops->set_gdt(vcpu, &dt); 5212 kvm_x86_ops->set_gdt(vcpu, &dt);
5212 5213
5213 vcpu->arch.cr2 = sregs->cr2; 5214 vcpu->arch.cr2 = sregs->cr2;
5214 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; 5215 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
5215 vcpu->arch.cr3 = sregs->cr3; 5216 vcpu->arch.cr3 = sregs->cr3;
5216 5217
5217 kvm_set_cr8(vcpu, sregs->cr8); 5218 kvm_set_cr8(vcpu, sregs->cr8);
5218 5219
5219 mmu_reset_needed |= vcpu->arch.efer != sregs->efer; 5220 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
5220 kvm_x86_ops->set_efer(vcpu, sregs->efer); 5221 kvm_x86_ops->set_efer(vcpu, sregs->efer);
5221 kvm_set_apic_base(vcpu, sregs->apic_base); 5222 kvm_set_apic_base(vcpu, sregs->apic_base);
5222 5223
5223 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; 5224 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
5224 kvm_x86_ops->set_cr0(vcpu, sregs->cr0); 5225 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
5225 vcpu->arch.cr0 = sregs->cr0; 5226 vcpu->arch.cr0 = sregs->cr0;
5226 5227
5227 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; 5228 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
5228 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5229 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5229 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5230 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5230 load_pdptrs(vcpu, vcpu->arch.cr3); 5231 load_pdptrs(vcpu, vcpu->arch.cr3);
5231 mmu_reset_needed = 1; 5232 mmu_reset_needed = 1;
5232 } 5233 }
5233 5234
5234 if (mmu_reset_needed) 5235 if (mmu_reset_needed)
5235 kvm_mmu_reset_context(vcpu); 5236 kvm_mmu_reset_context(vcpu);
5236 5237
5237 max_bits = (sizeof sregs->interrupt_bitmap) << 3; 5238 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
5238 pending_vec = find_first_bit( 5239 pending_vec = find_first_bit(
5239 (const unsigned long *)sregs->interrupt_bitmap, max_bits); 5240 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
5240 if (pending_vec < max_bits) { 5241 if (pending_vec < max_bits) {
5241 kvm_queue_interrupt(vcpu, pending_vec, false); 5242 kvm_queue_interrupt(vcpu, pending_vec, false);
5242 pr_debug("Set back pending irq %d\n", pending_vec); 5243 pr_debug("Set back pending irq %d\n", pending_vec);
5243 if (irqchip_in_kernel(vcpu->kvm)) 5244 if (irqchip_in_kernel(vcpu->kvm))
5244 kvm_pic_clear_isr_ack(vcpu->kvm); 5245 kvm_pic_clear_isr_ack(vcpu->kvm);
5245 } 5246 }
5246 5247
5247 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 5248 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
5248 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 5249 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
5249 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES); 5250 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
5250 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 5251 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
5251 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 5252 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
5252 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 5253 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
5253 5254
5254 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 5255 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
5255 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 5256 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
5256 5257
5257 update_cr8_intercept(vcpu); 5258 update_cr8_intercept(vcpu);
5258 5259
5259 /* Older userspace won't unhalt the vcpu on reset. */ 5260 /* Older userspace won't unhalt the vcpu on reset. */
5260 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && 5261 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
5261 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && 5262 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
5262 !is_protmode(vcpu)) 5263 !is_protmode(vcpu))
5263 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5264 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5264 5265
5265 vcpu_put(vcpu); 5266 vcpu_put(vcpu);
5266 5267
5267 return 0; 5268 return 0;
5268 } 5269 }
5269 5270
5270 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 5271 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5271 struct kvm_guest_debug *dbg) 5272 struct kvm_guest_debug *dbg)
5272 { 5273 {
5273 unsigned long rflags; 5274 unsigned long rflags;
5274 int i, r; 5275 int i, r;
5275 5276
5276 vcpu_load(vcpu); 5277 vcpu_load(vcpu);
5277 5278
5278 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { 5279 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
5279 r = -EBUSY; 5280 r = -EBUSY;
5280 if (vcpu->arch.exception.pending) 5281 if (vcpu->arch.exception.pending)
5281 goto unlock_out; 5282 goto unlock_out;
5282 if (dbg->control & KVM_GUESTDBG_INJECT_DB) 5283 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
5283 kvm_queue_exception(vcpu, DB_VECTOR); 5284 kvm_queue_exception(vcpu, DB_VECTOR);
5284 else 5285 else
5285 kvm_queue_exception(vcpu, BP_VECTOR); 5286 kvm_queue_exception(vcpu, BP_VECTOR);
5286 } 5287 }
5287 5288
5288 /* 5289 /*
5289 * Read rflags as long as potentially injected trace flags are still 5290 * Read rflags as long as potentially injected trace flags are still
5290 * filtered out. 5291 * filtered out.
5291 */ 5292 */
5292 rflags = kvm_get_rflags(vcpu); 5293 rflags = kvm_get_rflags(vcpu);
5293 5294
5294 vcpu->guest_debug = dbg->control; 5295 vcpu->guest_debug = dbg->control;
5295 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) 5296 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
5296 vcpu->guest_debug = 0; 5297 vcpu->guest_debug = 0;
5297 5298
5298 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { 5299 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5299 for (i = 0; i < KVM_NR_DB_REGS; ++i) 5300 for (i = 0; i < KVM_NR_DB_REGS; ++i)
5300 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; 5301 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
5301 vcpu->arch.switch_db_regs = 5302 vcpu->arch.switch_db_regs =
5302 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK); 5303 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
5303 } else { 5304 } else {
5304 for (i = 0; i < KVM_NR_DB_REGS; i++) 5305 for (i = 0; i < KVM_NR_DB_REGS; i++)
5305 vcpu->arch.eff_db[i] = vcpu->arch.db[i]; 5306 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
5306 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); 5307 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
5307 } 5308 }
5308 5309
5309 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 5310 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
5310 vcpu->arch.singlestep_cs = 5311 vcpu->arch.singlestep_cs =
5311 get_segment_selector(vcpu, VCPU_SREG_CS); 5312 get_segment_selector(vcpu, VCPU_SREG_CS);
5312 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); 5313 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
5313 } 5314 }
5314 5315
5315 /* 5316 /*
5316 * Trigger an rflags update that will inject or remove the trace 5317 * Trigger an rflags update that will inject or remove the trace
5317 * flags. 5318 * flags.
5318 */ 5319 */
5319 kvm_set_rflags(vcpu, rflags); 5320 kvm_set_rflags(vcpu, rflags);
5320 5321
5321 kvm_x86_ops->set_guest_debug(vcpu, dbg); 5322 kvm_x86_ops->set_guest_debug(vcpu, dbg);
5322 5323
5323 r = 0; 5324 r = 0;
5324 5325
5325 unlock_out: 5326 unlock_out:
5326 vcpu_put(vcpu); 5327 vcpu_put(vcpu);
5327 5328
5328 return r; 5329 return r;
5329 } 5330 }
5330 5331
5331 /* 5332 /*
5332 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when 5333 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when
5333 * we have asm/x86/processor.h 5334 * we have asm/x86/processor.h
5334 */ 5335 */
5335 struct fxsave { 5336 struct fxsave {
5336 u16 cwd; 5337 u16 cwd;
5337 u16 swd; 5338 u16 swd;
5338 u16 twd; 5339 u16 twd;
5339 u16 fop; 5340 u16 fop;
5340 u64 rip; 5341 u64 rip;
5341 u64 rdp; 5342 u64 rdp;
5342 u32 mxcsr; 5343 u32 mxcsr;
5343 u32 mxcsr_mask; 5344 u32 mxcsr_mask;
5344 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ 5345 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
5345 #ifdef CONFIG_X86_64 5346 #ifdef CONFIG_X86_64
5346 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ 5347 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
5347 #else 5348 #else
5348 u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ 5349 u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
5349 #endif 5350 #endif
5350 }; 5351 };
5351 5352
5352 /* 5353 /*
5353 * Translate a guest virtual address to a guest physical address. 5354 * Translate a guest virtual address to a guest physical address.
5354 */ 5355 */
5355 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 5356 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
5356 struct kvm_translation *tr) 5357 struct kvm_translation *tr)
5357 { 5358 {
5358 unsigned long vaddr = tr->linear_address; 5359 unsigned long vaddr = tr->linear_address;
5359 gpa_t gpa; 5360 gpa_t gpa;
5360 int idx; 5361 int idx;
5361 5362
5362 vcpu_load(vcpu); 5363 vcpu_load(vcpu);
5363 idx = srcu_read_lock(&vcpu->kvm->srcu); 5364 idx = srcu_read_lock(&vcpu->kvm->srcu);
5364 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); 5365 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
5365 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5366 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5366 tr->physical_address = gpa; 5367 tr->physical_address = gpa;
5367 tr->valid = gpa != UNMAPPED_GVA; 5368 tr->valid = gpa != UNMAPPED_GVA;
5368 tr->writeable = 1; 5369 tr->writeable = 1;
5369 tr->usermode = 0; 5370 tr->usermode = 0;
5370 vcpu_put(vcpu); 5371 vcpu_put(vcpu);
5371 5372
5372 return 0; 5373 return 0;
5373 } 5374 }
5374 5375
5375 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5376 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5376 { 5377 {
5377 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; 5378 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
5378 5379
5379 vcpu_load(vcpu); 5380 vcpu_load(vcpu);
5380 5381
5381 memcpy(fpu->fpr, fxsave->st_space, 128); 5382 memcpy(fpu->fpr, fxsave->st_space, 128);
5382 fpu->fcw = fxsave->cwd; 5383 fpu->fcw = fxsave->cwd;
5383 fpu->fsw = fxsave->swd; 5384 fpu->fsw = fxsave->swd;
5384 fpu->ftwx = fxsave->twd; 5385 fpu->ftwx = fxsave->twd;
5385 fpu->last_opcode = fxsave->fop; 5386 fpu->last_opcode = fxsave->fop;
5386 fpu->last_ip = fxsave->rip; 5387 fpu->last_ip = fxsave->rip;
5387 fpu->last_dp = fxsave->rdp; 5388 fpu->last_dp = fxsave->rdp;
5388 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); 5389 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
5389 5390
5390 vcpu_put(vcpu); 5391 vcpu_put(vcpu);
5391 5392
5392 return 0; 5393 return 0;
5393 } 5394 }
5394 5395
5395 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 5396 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5396 { 5397 {
5397 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; 5398 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
5398 5399
5399 vcpu_load(vcpu); 5400 vcpu_load(vcpu);
5400 5401
5401 memcpy(fxsave->st_space, fpu->fpr, 128); 5402 memcpy(fxsave->st_space, fpu->fpr, 128);
5402 fxsave->cwd = fpu->fcw; 5403 fxsave->cwd = fpu->fcw;
5403 fxsave->swd = fpu->fsw; 5404 fxsave->swd = fpu->fsw;
5404 fxsave->twd = fpu->ftwx; 5405 fxsave->twd = fpu->ftwx;
5405 fxsave->fop = fpu->last_opcode; 5406 fxsave->fop = fpu->last_opcode;
5406 fxsave->rip = fpu->last_ip; 5407 fxsave->rip = fpu->last_ip;
5407 fxsave->rdp = fpu->last_dp; 5408 fxsave->rdp = fpu->last_dp;
5408 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); 5409 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
5409 5410
5410 vcpu_put(vcpu); 5411 vcpu_put(vcpu);
5411 5412
5412 return 0; 5413 return 0;
5413 } 5414 }
5414 5415
5415 void fx_init(struct kvm_vcpu *vcpu) 5416 void fx_init(struct kvm_vcpu *vcpu)
5416 { 5417 {
5417 unsigned after_mxcsr_mask; 5418 unsigned after_mxcsr_mask;
5418 5419
5419 /* 5420 /*
5420 * Touch the fpu the first time in non atomic context as if 5421 * Touch the fpu the first time in non atomic context as if
5421 * this is the first fpu instruction the exception handler 5422 * this is the first fpu instruction the exception handler
5422 * will fire before the instruction returns and it'll have to 5423 * will fire before the instruction returns and it'll have to
5423 * allocate ram with GFP_KERNEL. 5424 * allocate ram with GFP_KERNEL.
5424 */ 5425 */
5425 if (!used_math()) 5426 if (!used_math())
5426 kvm_fx_save(&vcpu->arch.host_fx_image); 5427 kvm_fx_save(&vcpu->arch.host_fx_image);
5427 5428
5428 /* Initialize guest FPU by resetting ours and saving into guest's */ 5429 /* Initialize guest FPU by resetting ours and saving into guest's */
5429 preempt_disable(); 5430 preempt_disable();
5430 kvm_fx_save(&vcpu->arch.host_fx_image); 5431 kvm_fx_save(&vcpu->arch.host_fx_image);
5431 kvm_fx_finit(); 5432 kvm_fx_finit();
5432 kvm_fx_save(&vcpu->arch.guest_fx_image); 5433 kvm_fx_save(&vcpu->arch.guest_fx_image);
5433 kvm_fx_restore(&vcpu->arch.host_fx_image); 5434 kvm_fx_restore(&vcpu->arch.host_fx_image);
5434 preempt_enable(); 5435 preempt_enable();
5435 5436
5436 vcpu->arch.cr0 |= X86_CR0_ET; 5437 vcpu->arch.cr0 |= X86_CR0_ET;
5437 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); 5438 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
5438 vcpu->arch.guest_fx_image.mxcsr = 0x1f80; 5439 vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
5439 memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask, 5440 memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
5440 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); 5441 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
5441 } 5442 }
5442 EXPORT_SYMBOL_GPL(fx_init); 5443 EXPORT_SYMBOL_GPL(fx_init);
5443 5444
5444 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5445 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
5445 { 5446 {
5446 if (vcpu->guest_fpu_loaded) 5447 if (vcpu->guest_fpu_loaded)
5447 return; 5448 return;
5448 5449
5449 vcpu->guest_fpu_loaded = 1; 5450 vcpu->guest_fpu_loaded = 1;
5450 kvm_fx_save(&vcpu->arch.host_fx_image); 5451 kvm_fx_save(&vcpu->arch.host_fx_image);
5451 kvm_fx_restore(&vcpu->arch.guest_fx_image); 5452 kvm_fx_restore(&vcpu->arch.guest_fx_image);
5452 trace_kvm_fpu(1); 5453 trace_kvm_fpu(1);
5453 } 5454 }
5454 5455
5455 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5456 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
5456 { 5457 {
5457 if (!vcpu->guest_fpu_loaded) 5458 if (!vcpu->guest_fpu_loaded)
5458 return; 5459 return;
5459 5460
5460 vcpu->guest_fpu_loaded = 0; 5461 vcpu->guest_fpu_loaded = 0;
5461 kvm_fx_save(&vcpu->arch.guest_fx_image); 5462 kvm_fx_save(&vcpu->arch.guest_fx_image);
5462 kvm_fx_restore(&vcpu->arch.host_fx_image); 5463 kvm_fx_restore(&vcpu->arch.host_fx_image);
5463 ++vcpu->stat.fpu_reload; 5464 ++vcpu->stat.fpu_reload;
5464 set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); 5465 set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
5465 trace_kvm_fpu(0); 5466 trace_kvm_fpu(0);
5466 } 5467 }
5467 5468
5468 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 5469 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
5469 { 5470 {
5470 if (vcpu->arch.time_page) { 5471 if (vcpu->arch.time_page) {
5471 kvm_release_page_dirty(vcpu->arch.time_page); 5472 kvm_release_page_dirty(vcpu->arch.time_page);
5472 vcpu->arch.time_page = NULL; 5473 vcpu->arch.time_page = NULL;
5473 } 5474 }
5474 5475
5475 kvm_x86_ops->vcpu_free(vcpu); 5476 kvm_x86_ops->vcpu_free(vcpu);
5476 } 5477 }
5477 5478
5478 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 5479 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
5479 unsigned int id) 5480 unsigned int id)
5480 { 5481 {
5481 return kvm_x86_ops->vcpu_create(kvm, id); 5482 return kvm_x86_ops->vcpu_create(kvm, id);
5482 } 5483 }
5483 5484
5484 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 5485 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
5485 { 5486 {
5486 int r; 5487 int r;
5487 5488
5488 /* We do fxsave: this must be aligned. */ 5489 /* We do fxsave: this must be aligned. */
5489 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); 5490 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
5490 5491
5491 vcpu->arch.mtrr_state.have_fixed = 1; 5492 vcpu->arch.mtrr_state.have_fixed = 1;
5492 vcpu_load(vcpu); 5493 vcpu_load(vcpu);
5493 r = kvm_arch_vcpu_reset(vcpu); 5494 r = kvm_arch_vcpu_reset(vcpu);
5494 if (r == 0) 5495 if (r == 0)
5495 r = kvm_mmu_setup(vcpu); 5496 r = kvm_mmu_setup(vcpu);
5496 vcpu_put(vcpu); 5497 vcpu_put(vcpu);
5497 if (r < 0) 5498 if (r < 0)
5498 goto free_vcpu; 5499 goto free_vcpu;
5499 5500
5500 return 0; 5501 return 0;
5501 free_vcpu: 5502 free_vcpu:
5502 kvm_x86_ops->vcpu_free(vcpu); 5503 kvm_x86_ops->vcpu_free(vcpu);
5503 return r; 5504 return r;
5504 } 5505 }
5505 5506
5506 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 5507 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
5507 { 5508 {
5508 vcpu_load(vcpu); 5509 vcpu_load(vcpu);
5509 kvm_mmu_unload(vcpu); 5510 kvm_mmu_unload(vcpu);
5510 vcpu_put(vcpu); 5511 vcpu_put(vcpu);
5511 5512
5512 kvm_x86_ops->vcpu_free(vcpu); 5513 kvm_x86_ops->vcpu_free(vcpu);
5513 } 5514 }
5514 5515
5515 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) 5516 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
5516 { 5517 {
5517 vcpu->arch.nmi_pending = false; 5518 vcpu->arch.nmi_pending = false;
5518 vcpu->arch.nmi_injected = false; 5519 vcpu->arch.nmi_injected = false;
5519 5520
5520 vcpu->arch.switch_db_regs = 0; 5521 vcpu->arch.switch_db_regs = 0;
5521 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); 5522 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
5522 vcpu->arch.dr6 = DR6_FIXED_1; 5523 vcpu->arch.dr6 = DR6_FIXED_1;
5523 vcpu->arch.dr7 = DR7_FIXED_1; 5524 vcpu->arch.dr7 = DR7_FIXED_1;
5524 5525
5525 return kvm_x86_ops->vcpu_reset(vcpu); 5526 return kvm_x86_ops->vcpu_reset(vcpu);
5526 } 5527 }
5527 5528
5528 int kvm_arch_hardware_enable(void *garbage) 5529 int kvm_arch_hardware_enable(void *garbage)
5529 { 5530 {
5530 /* 5531 /*
5531 * Since this may be called from a hotplug notifcation, 5532 * Since this may be called from a hotplug notifcation,
5532 * we can't get the CPU frequency directly. 5533 * we can't get the CPU frequency directly.
5533 */ 5534 */
5534 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { 5535 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5535 int cpu = raw_smp_processor_id(); 5536 int cpu = raw_smp_processor_id();
5536 per_cpu(cpu_tsc_khz, cpu) = 0; 5537 per_cpu(cpu_tsc_khz, cpu) = 0;
5537 } 5538 }
5538 5539
5539 kvm_shared_msr_cpu_online(); 5540 kvm_shared_msr_cpu_online();
5540 5541
5541 return kvm_x86_ops->hardware_enable(garbage); 5542 return kvm_x86_ops->hardware_enable(garbage);
5542 } 5543 }
5543 5544
5544 void kvm_arch_hardware_disable(void *garbage) 5545 void kvm_arch_hardware_disable(void *garbage)
5545 { 5546 {
5546 kvm_x86_ops->hardware_disable(garbage); 5547 kvm_x86_ops->hardware_disable(garbage);
5547 drop_user_return_notifiers(garbage); 5548 drop_user_return_notifiers(garbage);
5548 } 5549 }
5549 5550
5550 int kvm_arch_hardware_setup(void) 5551 int kvm_arch_hardware_setup(void)
5551 { 5552 {
5552 return kvm_x86_ops->hardware_setup(); 5553 return kvm_x86_ops->hardware_setup();
5553 } 5554 }
5554 5555
5555 void kvm_arch_hardware_unsetup(void) 5556 void kvm_arch_hardware_unsetup(void)
5556 { 5557 {
5557 kvm_x86_ops->hardware_unsetup(); 5558 kvm_x86_ops->hardware_unsetup();
5558 } 5559 }
5559 5560
5560 void kvm_arch_check_processor_compat(void *rtn) 5561 void kvm_arch_check_processor_compat(void *rtn)
5561 { 5562 {
5562 kvm_x86_ops->check_processor_compatibility(rtn); 5563 kvm_x86_ops->check_processor_compatibility(rtn);
5563 } 5564 }
5564 5565
5565 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 5566 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5566 { 5567 {
5567 struct page *page; 5568 struct page *page;
5568 struct kvm *kvm; 5569 struct kvm *kvm;
5569 int r; 5570 int r;
5570 5571
5571 BUG_ON(vcpu->kvm == NULL); 5572 BUG_ON(vcpu->kvm == NULL);
5572 kvm = vcpu->kvm; 5573 kvm = vcpu->kvm;
5573 5574
5574 vcpu->arch.mmu.root_hpa = INVALID_PAGE; 5575 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
5575 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) 5576 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
5576 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5577 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5577 else 5578 else
5578 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; 5579 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
5579 5580
5580 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 5581 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
5581 if (!page) { 5582 if (!page) {
5582 r = -ENOMEM; 5583 r = -ENOMEM;
5583 goto fail; 5584 goto fail;
5584 } 5585 }
5585 vcpu->arch.pio_data = page_address(page); 5586 vcpu->arch.pio_data = page_address(page);
5586 5587
5587 r = kvm_mmu_create(vcpu); 5588 r = kvm_mmu_create(vcpu);
5588 if (r < 0) 5589 if (r < 0)
5589 goto fail_free_pio_data; 5590 goto fail_free_pio_data;
5590 5591
5591 if (irqchip_in_kernel(kvm)) { 5592 if (irqchip_in_kernel(kvm)) {
5592 r = kvm_create_lapic(vcpu); 5593 r = kvm_create_lapic(vcpu);
5593 if (r < 0) 5594 if (r < 0)
5594 goto fail_mmu_destroy; 5595 goto fail_mmu_destroy;
5595 } 5596 }
5596 5597
5597 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, 5598 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
5598 GFP_KERNEL); 5599 GFP_KERNEL);
5599 if (!vcpu->arch.mce_banks) { 5600 if (!vcpu->arch.mce_banks) {
5600 r = -ENOMEM; 5601 r = -ENOMEM;
5601 goto fail_free_lapic; 5602 goto fail_free_lapic;
5602 } 5603 }
5603 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 5604 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
5604 5605
5605 return 0; 5606 return 0;
5606 fail_free_lapic: 5607 fail_free_lapic:
5607 kvm_free_lapic(vcpu); 5608 kvm_free_lapic(vcpu);
5608 fail_mmu_destroy: 5609 fail_mmu_destroy:
5609 kvm_mmu_destroy(vcpu); 5610 kvm_mmu_destroy(vcpu);
5610 fail_free_pio_data: 5611 fail_free_pio_data:
5611 free_page((unsigned long)vcpu->arch.pio_data); 5612 free_page((unsigned long)vcpu->arch.pio_data);
5612 fail: 5613 fail:
5613 return r; 5614 return r;
5614 } 5615 }
5615 5616
5616 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5617 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5617 { 5618 {
5618 int idx; 5619 int idx;
5619 5620
5620 kfree(vcpu->arch.mce_banks); 5621 kfree(vcpu->arch.mce_banks);
5621 kvm_free_lapic(vcpu); 5622 kvm_free_lapic(vcpu);
5622 idx = srcu_read_lock(&vcpu->kvm->srcu); 5623 idx = srcu_read_lock(&vcpu->kvm->srcu);
5623 kvm_mmu_destroy(vcpu); 5624 kvm_mmu_destroy(vcpu);
5624 srcu_read_unlock(&vcpu->kvm->srcu, idx); 5625 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5625 free_page((unsigned long)vcpu->arch.pio_data); 5626 free_page((unsigned long)vcpu->arch.pio_data);
5626 } 5627 }
5627 5628
5628 struct kvm *kvm_arch_create_vm(void) 5629 struct kvm *kvm_arch_create_vm(void)
5629 { 5630 {
5630 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); 5631 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
5631 5632
5632 if (!kvm) 5633 if (!kvm)
5633 return ERR_PTR(-ENOMEM); 5634 return ERR_PTR(-ENOMEM);
5634 5635
5635 kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); 5636 kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
5636 if (!kvm->arch.aliases) { 5637 if (!kvm->arch.aliases) {
5637 kfree(kvm); 5638 kfree(kvm);
5638 return ERR_PTR(-ENOMEM); 5639 return ERR_PTR(-ENOMEM);
5639 } 5640 }
5640 5641
5641 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5642 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
5642 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 5643 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
5643 5644
5644 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 5645 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
5645 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 5646 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
5646 5647
5647 rdtscll(kvm->arch.vm_init_tsc); 5648 rdtscll(kvm->arch.vm_init_tsc);
5648 5649
5649 return kvm; 5650 return kvm;
5650 } 5651 }
5651 5652
5652 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) 5653 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
5653 { 5654 {
5654 vcpu_load(vcpu); 5655 vcpu_load(vcpu);
5655 kvm_mmu_unload(vcpu); 5656 kvm_mmu_unload(vcpu);
5656 vcpu_put(vcpu); 5657 vcpu_put(vcpu);
5657 } 5658 }
5658 5659
5659 static void kvm_free_vcpus(struct kvm *kvm) 5660 static void kvm_free_vcpus(struct kvm *kvm)
5660 { 5661 {
5661 unsigned int i; 5662 unsigned int i;
5662 struct kvm_vcpu *vcpu; 5663 struct kvm_vcpu *vcpu;
5663 5664
5664 /* 5665 /*
5665 * Unpin any mmu pages first. 5666 * Unpin any mmu pages first.
5666 */ 5667 */
5667 kvm_for_each_vcpu(i, vcpu, kvm) 5668 kvm_for_each_vcpu(i, vcpu, kvm)
5668 kvm_unload_vcpu_mmu(vcpu); 5669 kvm_unload_vcpu_mmu(vcpu);
5669 kvm_for_each_vcpu(i, vcpu, kvm) 5670 kvm_for_each_vcpu(i, vcpu, kvm)
5670 kvm_arch_vcpu_free(vcpu); 5671 kvm_arch_vcpu_free(vcpu);
5671 5672
5672 mutex_lock(&kvm->lock); 5673 mutex_lock(&kvm->lock);
5673 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 5674 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
5674 kvm->vcpus[i] = NULL; 5675 kvm->vcpus[i] = NULL;
5675 5676
5676 atomic_set(&kvm->online_vcpus, 0); 5677 atomic_set(&kvm->online_vcpus, 0);
5677 mutex_unlock(&kvm->lock); 5678 mutex_unlock(&kvm->lock);
5678 } 5679 }
5679 5680
5680 void kvm_arch_sync_events(struct kvm *kvm) 5681 void kvm_arch_sync_events(struct kvm *kvm)
5681 { 5682 {
5682 kvm_free_all_assigned_devices(kvm); 5683 kvm_free_all_assigned_devices(kvm);
5683 } 5684 }
5684 5685
5685 void kvm_arch_destroy_vm(struct kvm *kvm) 5686 void kvm_arch_destroy_vm(struct kvm *kvm)
5686 { 5687 {
5687 kvm_iommu_unmap_guest(kvm); 5688 kvm_iommu_unmap_guest(kvm);
5688 kvm_free_pit(kvm); 5689 kvm_free_pit(kvm);
5689 kfree(kvm->arch.vpic); 5690 kfree(kvm->arch.vpic);
5690 kfree(kvm->arch.vioapic); 5691 kfree(kvm->arch.vioapic);
5691 kvm_free_vcpus(kvm); 5692 kvm_free_vcpus(kvm);
5692 kvm_free_physmem(kvm); 5693 kvm_free_physmem(kvm);
5693 if (kvm->arch.apic_access_page) 5694 if (kvm->arch.apic_access_page)
5694 put_page(kvm->arch.apic_access_page); 5695 put_page(kvm->arch.apic_access_page);
5695 if (kvm->arch.ept_identity_pagetable) 5696 if (kvm->arch.ept_identity_pagetable)
5696 put_page(kvm->arch.ept_identity_pagetable); 5697 put_page(kvm->arch.ept_identity_pagetable);
5697 cleanup_srcu_struct(&kvm->srcu); 5698 cleanup_srcu_struct(&kvm->srcu);
5698 kfree(kvm->arch.aliases); 5699 kfree(kvm->arch.aliases);
5699 kfree(kvm); 5700 kfree(kvm);
5700 } 5701 }
5701 5702
5702 int kvm_arch_prepare_memory_region(struct kvm *kvm, 5703 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5703 struct kvm_memory_slot *memslot, 5704 struct kvm_memory_slot *memslot,
5704 struct kvm_memory_slot old, 5705 struct kvm_memory_slot old,
5705 struct kvm_userspace_memory_region *mem, 5706 struct kvm_userspace_memory_region *mem,
5706 int user_alloc) 5707 int user_alloc)
5707 { 5708 {
5708 int npages = memslot->npages; 5709 int npages = memslot->npages;
5709 5710
5710 /*To keep backward compatibility with older userspace, 5711 /*To keep backward compatibility with older userspace,
5711 *x86 needs to hanlde !user_alloc case. 5712 *x86 needs to hanlde !user_alloc case.
5712 */ 5713 */
5713 if (!user_alloc) { 5714 if (!user_alloc) {
5714 if (npages && !old.rmap) { 5715 if (npages && !old.rmap) {
5715 unsigned long userspace_addr; 5716 unsigned long userspace_addr;
5716 5717
5717 down_write(&current->mm->mmap_sem); 5718 down_write(&current->mm->mmap_sem);
5718 userspace_addr = do_mmap(NULL, 0, 5719 userspace_addr = do_mmap(NULL, 0,
5719 npages * PAGE_SIZE, 5720 npages * PAGE_SIZE,
5720 PROT_READ | PROT_WRITE, 5721 PROT_READ | PROT_WRITE,
5721 MAP_PRIVATE | MAP_ANONYMOUS, 5722 MAP_PRIVATE | MAP_ANONYMOUS,
5722 0); 5723 0);
5723 up_write(&current->mm->mmap_sem); 5724 up_write(&current->mm->mmap_sem);
5724 5725
5725 if (IS_ERR((void *)userspace_addr)) 5726 if (IS_ERR((void *)userspace_addr))
5726 return PTR_ERR((void *)userspace_addr); 5727 return PTR_ERR((void *)userspace_addr);
5727 5728
5728 memslot->userspace_addr = userspace_addr; 5729 memslot->userspace_addr = userspace_addr;
5729 } 5730 }
5730 } 5731 }
5731 5732
5732 5733
5733 return 0; 5734 return 0;
5734 } 5735 }
5735 5736
5736 void kvm_arch_commit_memory_region(struct kvm *kvm, 5737 void kvm_arch_commit_memory_region(struct kvm *kvm,
5737 struct kvm_userspace_memory_region *mem, 5738 struct kvm_userspace_memory_region *mem,
5738 struct kvm_memory_slot old, 5739 struct kvm_memory_slot old,
5739 int user_alloc) 5740 int user_alloc)
5740 { 5741 {
5741 5742
5742 int npages = mem->memory_size >> PAGE_SHIFT; 5743 int npages = mem->memory_size >> PAGE_SHIFT;
5743 5744
5744 if (!user_alloc && !old.user_alloc && old.rmap && !npages) { 5745 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
5745 int ret; 5746 int ret;
5746 5747
5747 down_write(&current->mm->mmap_sem); 5748 down_write(&current->mm->mmap_sem);
5748 ret = do_munmap(current->mm, old.userspace_addr, 5749 ret = do_munmap(current->mm, old.userspace_addr,
5749 old.npages * PAGE_SIZE); 5750 old.npages * PAGE_SIZE);
5750 up_write(&current->mm->mmap_sem); 5751 up_write(&current->mm->mmap_sem);
5751 if (ret < 0) 5752 if (ret < 0)
5752 printk(KERN_WARNING 5753 printk(KERN_WARNING
5753 "kvm_vm_ioctl_set_memory_region: " 5754 "kvm_vm_ioctl_set_memory_region: "
5754 "failed to munmap memory\n"); 5755 "failed to munmap memory\n");
5755 } 5756 }
5756 5757
5757 spin_lock(&kvm->mmu_lock); 5758 spin_lock(&kvm->mmu_lock);
5758 if (!kvm->arch.n_requested_mmu_pages) { 5759 if (!kvm->arch.n_requested_mmu_pages) {
5759 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 5760 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
5760 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 5761 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
5761 } 5762 }
5762 5763
5763 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 5764 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
5764 spin_unlock(&kvm->mmu_lock); 5765 spin_unlock(&kvm->mmu_lock);
5765 } 5766 }
5766 5767
5767 void kvm_arch_flush_shadow(struct kvm *kvm) 5768 void kvm_arch_flush_shadow(struct kvm *kvm)
5768 { 5769 {
5769 kvm_mmu_zap_all(kvm); 5770 kvm_mmu_zap_all(kvm);
5770 kvm_reload_remote_mmus(kvm); 5771 kvm_reload_remote_mmus(kvm);
5771 } 5772 }
5772 5773
5773 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 5774 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
5774 { 5775 {
5775 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 5776 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
5776 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED 5777 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
5777 || vcpu->arch.nmi_pending || 5778 || vcpu->arch.nmi_pending ||
5778 (kvm_arch_interrupt_allowed(vcpu) && 5779 (kvm_arch_interrupt_allowed(vcpu) &&
5779 kvm_cpu_has_interrupt(vcpu)); 5780 kvm_cpu_has_interrupt(vcpu));
5780 } 5781 }
5781 5782
5782 void kvm_vcpu_kick(struct kvm_vcpu *vcpu) 5783 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
5783 { 5784 {
5784 int me; 5785 int me;
5785 int cpu = vcpu->cpu; 5786 int cpu = vcpu->cpu;
5786 5787
5787 if (waitqueue_active(&vcpu->wq)) { 5788 if (waitqueue_active(&vcpu->wq)) {
5788 wake_up_interruptible(&vcpu->wq); 5789 wake_up_interruptible(&vcpu->wq);
5789 ++vcpu->stat.halt_wakeup; 5790 ++vcpu->stat.halt_wakeup;
5790 } 5791 }
5791 5792
5792 me = get_cpu(); 5793 me = get_cpu();
5793 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) 5794 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
5794 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) 5795 if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests))
5795 smp_send_reschedule(cpu); 5796 smp_send_reschedule(cpu);
5796 put_cpu(); 5797 put_cpu();
5797 } 5798 }
5798 5799
5799 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) 5800 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
5800 { 5801 {
5801 return kvm_x86_ops->interrupt_allowed(vcpu); 5802 return kvm_x86_ops->interrupt_allowed(vcpu);
5802 } 5803 }
5803 5804
5804 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) 5805 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
5805 { 5806 {
5806 unsigned long rflags; 5807 unsigned long rflags;
5807 5808
5808 rflags = kvm_x86_ops->get_rflags(vcpu); 5809 rflags = kvm_x86_ops->get_rflags(vcpu);
5809 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 5810 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5810 rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); 5811 rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
5811 return rflags; 5812 return rflags;
5812 } 5813 }
5813 EXPORT_SYMBOL_GPL(kvm_get_rflags); 5814 EXPORT_SYMBOL_GPL(kvm_get_rflags);
5814 5815
5815 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 5816 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
5816 { 5817 {
5817 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && 5818 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
5818 vcpu->arch.singlestep_cs == 5819 vcpu->arch.singlestep_cs ==
5819 get_segment_selector(vcpu, VCPU_SREG_CS) && 5820 get_segment_selector(vcpu, VCPU_SREG_CS) &&
5820 vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) 5821 vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
5821 rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; 5822 rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
5822 kvm_x86_ops->set_rflags(vcpu, rflags); 5823 kvm_x86_ops->set_rflags(vcpu, rflags);
5823 } 5824 }
5824 EXPORT_SYMBOL_GPL(kvm_set_rflags); 5825 EXPORT_SYMBOL_GPL(kvm_set_rflags);
5825 5826
5826 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 5827 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
5827 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 5828 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
5828 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 5829 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
5829 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); 5830 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
5830 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); 5831 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
5831 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); 5832 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
5832 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); 5833 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
5833 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); 5834 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
5834 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); 5835 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
5835 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); 5836 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
5836 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); 5837 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
5837 5838
include/linux/kvm_host.h
1 #ifndef __KVM_HOST_H 1 #ifndef __KVM_HOST_H
2 #define __KVM_HOST_H 2 #define __KVM_HOST_H
3 3
4 /* 4 /*
5 * This work is licensed under the terms of the GNU GPL, version 2. See 5 * This work is licensed under the terms of the GNU GPL, version 2. See
6 * the COPYING file in the top-level directory. 6 * the COPYING file in the top-level directory.
7 */ 7 */
8 8
9 #include <linux/types.h> 9 #include <linux/types.h>
10 #include <linux/hardirq.h> 10 #include <linux/hardirq.h>
11 #include <linux/list.h> 11 #include <linux/list.h>
12 #include <linux/mutex.h> 12 #include <linux/mutex.h>
13 #include <linux/spinlock.h> 13 #include <linux/spinlock.h>
14 #include <linux/signal.h> 14 #include <linux/signal.h>
15 #include <linux/sched.h> 15 #include <linux/sched.h>
16 #include <linux/mm.h> 16 #include <linux/mm.h>
17 #include <linux/preempt.h> 17 #include <linux/preempt.h>
18 #include <linux/msi.h> 18 #include <linux/msi.h>
19 #include <asm/signal.h> 19 #include <asm/signal.h>
20 20
21 #include <linux/kvm.h> 21 #include <linux/kvm.h>
22 #include <linux/kvm_para.h> 22 #include <linux/kvm_para.h>
23 23
24 #include <linux/kvm_types.h> 24 #include <linux/kvm_types.h>
25 25
26 #include <asm/kvm_host.h> 26 #include <asm/kvm_host.h>
27 27
28 /* 28 /*
29 * vcpu->requests bit members 29 * vcpu->requests bit members
30 */ 30 */
31 #define KVM_REQ_TLB_FLUSH 0 31 #define KVM_REQ_TLB_FLUSH 0
32 #define KVM_REQ_MIGRATE_TIMER 1 32 #define KVM_REQ_MIGRATE_TIMER 1
33 #define KVM_REQ_REPORT_TPR_ACCESS 2 33 #define KVM_REQ_REPORT_TPR_ACCESS 2
34 #define KVM_REQ_MMU_RELOAD 3 34 #define KVM_REQ_MMU_RELOAD 3
35 #define KVM_REQ_TRIPLE_FAULT 4 35 #define KVM_REQ_TRIPLE_FAULT 4
36 #define KVM_REQ_PENDING_TIMER 5 36 #define KVM_REQ_PENDING_TIMER 5
37 #define KVM_REQ_UNHALT 6 37 #define KVM_REQ_UNHALT 6
38 #define KVM_REQ_MMU_SYNC 7 38 #define KVM_REQ_MMU_SYNC 7
39 #define KVM_REQ_KVMCLOCK_UPDATE 8 39 #define KVM_REQ_KVMCLOCK_UPDATE 8
40 #define KVM_REQ_KICK 9 40 #define KVM_REQ_KICK 9
41 #define KVM_REQ_DEACTIVATE_FPU 10 41 #define KVM_REQ_DEACTIVATE_FPU 10
42 42
43 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 43 #define KVM_USERSPACE_IRQ_SOURCE_ID 0
44 44
45 struct kvm; 45 struct kvm;
46 struct kvm_vcpu; 46 struct kvm_vcpu;
47 extern struct kmem_cache *kvm_vcpu_cache; 47 extern struct kmem_cache *kvm_vcpu_cache;
48 48
49 /* 49 /*
50 * It would be nice to use something smarter than a linear search, TBD... 50 * It would be nice to use something smarter than a linear search, TBD...
51 * Thankfully we dont expect many devices to register (famous last words :), 51 * Thankfully we dont expect many devices to register (famous last words :),
52 * so until then it will suffice. At least its abstracted so we can change 52 * so until then it will suffice. At least its abstracted so we can change
53 * in one place. 53 * in one place.
54 */ 54 */
55 struct kvm_io_bus { 55 struct kvm_io_bus {
56 int dev_count; 56 int dev_count;
57 #define NR_IOBUS_DEVS 6 57 #define NR_IOBUS_DEVS 6
58 struct kvm_io_device *devs[NR_IOBUS_DEVS]; 58 struct kvm_io_device *devs[NR_IOBUS_DEVS];
59 }; 59 };
60 60
61 enum kvm_bus { 61 enum kvm_bus {
62 KVM_MMIO_BUS, 62 KVM_MMIO_BUS,
63 KVM_PIO_BUS, 63 KVM_PIO_BUS,
64 KVM_NR_BUSES 64 KVM_NR_BUSES
65 }; 65 };
66 66
67 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 67 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
68 int len, const void *val); 68 int len, const void *val);
69 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, 69 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
70 void *val); 70 void *val);
71 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, 71 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
72 struct kvm_io_device *dev); 72 struct kvm_io_device *dev);
73 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 73 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
74 struct kvm_io_device *dev); 74 struct kvm_io_device *dev);
75 75
76 struct kvm_vcpu { 76 struct kvm_vcpu {
77 struct kvm *kvm; 77 struct kvm *kvm;
78 #ifdef CONFIG_PREEMPT_NOTIFIERS 78 #ifdef CONFIG_PREEMPT_NOTIFIERS
79 struct preempt_notifier preempt_notifier; 79 struct preempt_notifier preempt_notifier;
80 #endif 80 #endif
81 int vcpu_id; 81 int vcpu_id;
82 struct mutex mutex; 82 struct mutex mutex;
83 int cpu; 83 int cpu;
84 struct kvm_run *run; 84 struct kvm_run *run;
85 unsigned long requests; 85 unsigned long requests;
86 unsigned long guest_debug; 86 unsigned long guest_debug;
87 int srcu_idx; 87 int srcu_idx;
88 88
89 int fpu_active; 89 int fpu_active;
90 int guest_fpu_loaded; 90 int guest_fpu_loaded;
91 wait_queue_head_t wq; 91 wait_queue_head_t wq;
92 int sigset_active; 92 int sigset_active;
93 sigset_t sigset; 93 sigset_t sigset;
94 struct kvm_vcpu_stat stat; 94 struct kvm_vcpu_stat stat;
95 95
96 #ifdef CONFIG_HAS_IOMEM 96 #ifdef CONFIG_HAS_IOMEM
97 int mmio_needed; 97 int mmio_needed;
98 int mmio_read_completed; 98 int mmio_read_completed;
99 int mmio_is_write; 99 int mmio_is_write;
100 int mmio_size; 100 int mmio_size;
101 unsigned char mmio_data[8]; 101 unsigned char mmio_data[8];
102 gpa_t mmio_phys_addr; 102 gpa_t mmio_phys_addr;
103 #endif 103 #endif
104 104
105 struct kvm_vcpu_arch arch; 105 struct kvm_vcpu_arch arch;
106 }; 106 };
107 107
108 struct kvm_memory_slot { 108 struct kvm_memory_slot {
109 gfn_t base_gfn; 109 gfn_t base_gfn;
110 unsigned long npages; 110 unsigned long npages;
111 unsigned long flags; 111 unsigned long flags;
112 unsigned long *rmap; 112 unsigned long *rmap;
113 unsigned long *dirty_bitmap; 113 unsigned long *dirty_bitmap;
114 struct { 114 struct {
115 unsigned long rmap_pde; 115 unsigned long rmap_pde;
116 int write_count; 116 int write_count;
117 } *lpage_info[KVM_NR_PAGE_SIZES - 1]; 117 } *lpage_info[KVM_NR_PAGE_SIZES - 1];
118 unsigned long userspace_addr; 118 unsigned long userspace_addr;
119 int user_alloc; 119 int user_alloc;
120 }; 120 };
121 121
122 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
123 {
124 return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
125 }
126
122 struct kvm_kernel_irq_routing_entry { 127 struct kvm_kernel_irq_routing_entry {
123 u32 gsi; 128 u32 gsi;
124 u32 type; 129 u32 type;
125 int (*set)(struct kvm_kernel_irq_routing_entry *e, 130 int (*set)(struct kvm_kernel_irq_routing_entry *e,
126 struct kvm *kvm, int irq_source_id, int level); 131 struct kvm *kvm, int irq_source_id, int level);
127 union { 132 union {
128 struct { 133 struct {
129 unsigned irqchip; 134 unsigned irqchip;
130 unsigned pin; 135 unsigned pin;
131 } irqchip; 136 } irqchip;
132 struct msi_msg msi; 137 struct msi_msg msi;
133 }; 138 };
134 struct hlist_node link; 139 struct hlist_node link;
135 }; 140 };
136 141
137 #ifdef __KVM_HAVE_IOAPIC 142 #ifdef __KVM_HAVE_IOAPIC
138 143
139 struct kvm_irq_routing_table { 144 struct kvm_irq_routing_table {
140 int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; 145 int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
141 struct kvm_kernel_irq_routing_entry *rt_entries; 146 struct kvm_kernel_irq_routing_entry *rt_entries;
142 u32 nr_rt_entries; 147 u32 nr_rt_entries;
143 /* 148 /*
144 * Array indexed by gsi. Each entry contains list of irq chips 149 * Array indexed by gsi. Each entry contains list of irq chips
145 * the gsi is connected to. 150 * the gsi is connected to.
146 */ 151 */
147 struct hlist_head map[0]; 152 struct hlist_head map[0];
148 }; 153 };
149 154
150 #else 155 #else
151 156
152 struct kvm_irq_routing_table {}; 157 struct kvm_irq_routing_table {};
153 158
154 #endif 159 #endif
155 160
156 struct kvm_memslots { 161 struct kvm_memslots {
157 int nmemslots; 162 int nmemslots;
158 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + 163 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
159 KVM_PRIVATE_MEM_SLOTS]; 164 KVM_PRIVATE_MEM_SLOTS];
160 }; 165 };
161 166
162 struct kvm { 167 struct kvm {
163 spinlock_t mmu_lock; 168 spinlock_t mmu_lock;
164 raw_spinlock_t requests_lock; 169 raw_spinlock_t requests_lock;
165 struct mutex slots_lock; 170 struct mutex slots_lock;
166 struct mm_struct *mm; /* userspace tied to this vm */ 171 struct mm_struct *mm; /* userspace tied to this vm */
167 struct kvm_memslots *memslots; 172 struct kvm_memslots *memslots;
168 struct srcu_struct srcu; 173 struct srcu_struct srcu;
169 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 174 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
170 u32 bsp_vcpu_id; 175 u32 bsp_vcpu_id;
171 struct kvm_vcpu *bsp_vcpu; 176 struct kvm_vcpu *bsp_vcpu;
172 #endif 177 #endif
173 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; 178 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
174 atomic_t online_vcpus; 179 atomic_t online_vcpus;
175 struct list_head vm_list; 180 struct list_head vm_list;
176 struct mutex lock; 181 struct mutex lock;
177 struct kvm_io_bus *buses[KVM_NR_BUSES]; 182 struct kvm_io_bus *buses[KVM_NR_BUSES];
178 #ifdef CONFIG_HAVE_KVM_EVENTFD 183 #ifdef CONFIG_HAVE_KVM_EVENTFD
179 struct { 184 struct {
180 spinlock_t lock; 185 spinlock_t lock;
181 struct list_head items; 186 struct list_head items;
182 } irqfds; 187 } irqfds;
183 struct list_head ioeventfds; 188 struct list_head ioeventfds;
184 #endif 189 #endif
185 struct kvm_vm_stat stat; 190 struct kvm_vm_stat stat;
186 struct kvm_arch arch; 191 struct kvm_arch arch;
187 atomic_t users_count; 192 atomic_t users_count;
188 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 193 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
189 struct kvm_coalesced_mmio_dev *coalesced_mmio_dev; 194 struct kvm_coalesced_mmio_dev *coalesced_mmio_dev;
190 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; 195 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
191 #endif 196 #endif
192 197
193 struct mutex irq_lock; 198 struct mutex irq_lock;
194 #ifdef CONFIG_HAVE_KVM_IRQCHIP 199 #ifdef CONFIG_HAVE_KVM_IRQCHIP
195 struct kvm_irq_routing_table *irq_routing; 200 struct kvm_irq_routing_table *irq_routing;
196 struct hlist_head mask_notifier_list; 201 struct hlist_head mask_notifier_list;
197 struct hlist_head irq_ack_notifier_list; 202 struct hlist_head irq_ack_notifier_list;
198 #endif 203 #endif
199 204
200 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER 205 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
201 struct mmu_notifier mmu_notifier; 206 struct mmu_notifier mmu_notifier;
202 unsigned long mmu_notifier_seq; 207 unsigned long mmu_notifier_seq;
203 long mmu_notifier_count; 208 long mmu_notifier_count;
204 #endif 209 #endif
205 }; 210 };
206 211
207 /* The guest did something we don't support. */ 212 /* The guest did something we don't support. */
208 #define pr_unimpl(vcpu, fmt, ...) \ 213 #define pr_unimpl(vcpu, fmt, ...) \
209 do { \ 214 do { \
210 if (printk_ratelimit()) \ 215 if (printk_ratelimit()) \
211 printk(KERN_ERR "kvm: %i: cpu%i " fmt, \ 216 printk(KERN_ERR "kvm: %i: cpu%i " fmt, \
212 current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \ 217 current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
213 } while (0) 218 } while (0)
214 219
215 #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) 220 #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
216 #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) 221 #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
217 222
218 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) 223 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
219 { 224 {
220 smp_rmb(); 225 smp_rmb();
221 return kvm->vcpus[i]; 226 return kvm->vcpus[i];
222 } 227 }
223 228
224 #define kvm_for_each_vcpu(idx, vcpup, kvm) \ 229 #define kvm_for_each_vcpu(idx, vcpup, kvm) \
225 for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \ 230 for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \
226 idx < atomic_read(&kvm->online_vcpus) && vcpup; \ 231 idx < atomic_read(&kvm->online_vcpus) && vcpup; \
227 vcpup = kvm_get_vcpu(kvm, ++idx)) 232 vcpup = kvm_get_vcpu(kvm, ++idx))
228 233
229 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); 234 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
230 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); 235 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
231 236
232 void vcpu_load(struct kvm_vcpu *vcpu); 237 void vcpu_load(struct kvm_vcpu *vcpu);
233 void vcpu_put(struct kvm_vcpu *vcpu); 238 void vcpu_put(struct kvm_vcpu *vcpu);
234 239
235 int kvm_init(void *opaque, unsigned int vcpu_size, 240 int kvm_init(void *opaque, unsigned int vcpu_size,
236 struct module *module); 241 struct module *module);
237 void kvm_exit(void); 242 void kvm_exit(void);
238 243
239 void kvm_get_kvm(struct kvm *kvm); 244 void kvm_get_kvm(struct kvm *kvm);
240 void kvm_put_kvm(struct kvm *kvm); 245 void kvm_put_kvm(struct kvm *kvm);
241 246
242 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) 247 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
243 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) 248 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
244 static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } 249 static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
245 struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); 250 struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
246 251
247 extern struct page *bad_page; 252 extern struct page *bad_page;
248 extern pfn_t bad_pfn; 253 extern pfn_t bad_pfn;
249 254
250 int is_error_page(struct page *page); 255 int is_error_page(struct page *page);
251 int is_error_pfn(pfn_t pfn); 256 int is_error_pfn(pfn_t pfn);
252 int kvm_is_error_hva(unsigned long addr); 257 int kvm_is_error_hva(unsigned long addr);
253 int kvm_set_memory_region(struct kvm *kvm, 258 int kvm_set_memory_region(struct kvm *kvm,
254 struct kvm_userspace_memory_region *mem, 259 struct kvm_userspace_memory_region *mem,
255 int user_alloc); 260 int user_alloc);
256 int __kvm_set_memory_region(struct kvm *kvm, 261 int __kvm_set_memory_region(struct kvm *kvm,
257 struct kvm_userspace_memory_region *mem, 262 struct kvm_userspace_memory_region *mem,
258 int user_alloc); 263 int user_alloc);
259 int kvm_arch_prepare_memory_region(struct kvm *kvm, 264 int kvm_arch_prepare_memory_region(struct kvm *kvm,
260 struct kvm_memory_slot *memslot, 265 struct kvm_memory_slot *memslot,
261 struct kvm_memory_slot old, 266 struct kvm_memory_slot old,
262 struct kvm_userspace_memory_region *mem, 267 struct kvm_userspace_memory_region *mem,
263 int user_alloc); 268 int user_alloc);
264 void kvm_arch_commit_memory_region(struct kvm *kvm, 269 void kvm_arch_commit_memory_region(struct kvm *kvm,
265 struct kvm_userspace_memory_region *mem, 270 struct kvm_userspace_memory_region *mem,
266 struct kvm_memory_slot old, 271 struct kvm_memory_slot old,
267 int user_alloc); 272 int user_alloc);
268 void kvm_disable_largepages(void); 273 void kvm_disable_largepages(void);
269 void kvm_arch_flush_shadow(struct kvm *kvm); 274 void kvm_arch_flush_shadow(struct kvm *kvm);
270 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); 275 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
271 gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn); 276 gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn);
272 277
273 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 278 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
274 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); 279 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
275 void kvm_release_page_clean(struct page *page); 280 void kvm_release_page_clean(struct page *page);
276 void kvm_release_page_dirty(struct page *page); 281 void kvm_release_page_dirty(struct page *page);
277 void kvm_set_page_dirty(struct page *page); 282 void kvm_set_page_dirty(struct page *page);
278 void kvm_set_page_accessed(struct page *page); 283 void kvm_set_page_accessed(struct page *page);
279 284
280 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); 285 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
281 pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 286 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
282 struct kvm_memory_slot *slot, gfn_t gfn); 287 struct kvm_memory_slot *slot, gfn_t gfn);
283 int memslot_id(struct kvm *kvm, gfn_t gfn); 288 int memslot_id(struct kvm *kvm, gfn_t gfn);
284 void kvm_release_pfn_dirty(pfn_t); 289 void kvm_release_pfn_dirty(pfn_t);
285 void kvm_release_pfn_clean(pfn_t pfn); 290 void kvm_release_pfn_clean(pfn_t pfn);
286 void kvm_set_pfn_dirty(pfn_t pfn); 291 void kvm_set_pfn_dirty(pfn_t pfn);
287 void kvm_set_pfn_accessed(pfn_t pfn); 292 void kvm_set_pfn_accessed(pfn_t pfn);
288 void kvm_get_pfn(pfn_t pfn); 293 void kvm_get_pfn(pfn_t pfn);
289 294
290 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 295 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
291 int len); 296 int len);
292 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 297 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
293 unsigned long len); 298 unsigned long len);
294 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); 299 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
295 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, 300 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
296 int offset, int len); 301 int offset, int len);
297 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, 302 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
298 unsigned long len); 303 unsigned long len);
299 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); 304 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
300 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); 305 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
301 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); 306 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
302 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); 307 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
303 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); 308 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
304 void mark_page_dirty(struct kvm *kvm, gfn_t gfn); 309 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
305 310
306 void kvm_vcpu_block(struct kvm_vcpu *vcpu); 311 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
307 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); 312 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
308 void kvm_resched(struct kvm_vcpu *vcpu); 313 void kvm_resched(struct kvm_vcpu *vcpu);
309 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); 314 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
310 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); 315 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
311 void kvm_flush_remote_tlbs(struct kvm *kvm); 316 void kvm_flush_remote_tlbs(struct kvm *kvm);
312 void kvm_reload_remote_mmus(struct kvm *kvm); 317 void kvm_reload_remote_mmus(struct kvm *kvm);
313 318
314 long kvm_arch_dev_ioctl(struct file *filp, 319 long kvm_arch_dev_ioctl(struct file *filp,
315 unsigned int ioctl, unsigned long arg); 320 unsigned int ioctl, unsigned long arg);
316 long kvm_arch_vcpu_ioctl(struct file *filp, 321 long kvm_arch_vcpu_ioctl(struct file *filp,
317 unsigned int ioctl, unsigned long arg); 322 unsigned int ioctl, unsigned long arg);
318 323
319 int kvm_dev_ioctl_check_extension(long ext); 324 int kvm_dev_ioctl_check_extension(long ext);
320 325
321 int kvm_get_dirty_log(struct kvm *kvm, 326 int kvm_get_dirty_log(struct kvm *kvm,
322 struct kvm_dirty_log *log, int *is_dirty); 327 struct kvm_dirty_log *log, int *is_dirty);
323 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 328 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
324 struct kvm_dirty_log *log); 329 struct kvm_dirty_log *log);
325 330
326 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, 331 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
327 struct 332 struct
328 kvm_userspace_memory_region *mem, 333 kvm_userspace_memory_region *mem,
329 int user_alloc); 334 int user_alloc);
330 long kvm_arch_vm_ioctl(struct file *filp, 335 long kvm_arch_vm_ioctl(struct file *filp,
331 unsigned int ioctl, unsigned long arg); 336 unsigned int ioctl, unsigned long arg);
332 337
333 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); 338 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
334 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); 339 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
335 340
336 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 341 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
337 struct kvm_translation *tr); 342 struct kvm_translation *tr);
338 343
339 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); 344 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
340 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); 345 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
341 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 346 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
342 struct kvm_sregs *sregs); 347 struct kvm_sregs *sregs);
343 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 348 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
344 struct kvm_sregs *sregs); 349 struct kvm_sregs *sregs);
345 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 350 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
346 struct kvm_mp_state *mp_state); 351 struct kvm_mp_state *mp_state);
347 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 352 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
348 struct kvm_mp_state *mp_state); 353 struct kvm_mp_state *mp_state);
349 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 354 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
350 struct kvm_guest_debug *dbg); 355 struct kvm_guest_debug *dbg);
351 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); 356 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
352 357
353 int kvm_arch_init(void *opaque); 358 int kvm_arch_init(void *opaque);
354 void kvm_arch_exit(void); 359 void kvm_arch_exit(void);
355 360
356 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu); 361 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
357 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); 362 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
358 363
359 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu); 364 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
360 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); 365 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
361 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); 366 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
362 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); 367 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
363 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); 368 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
364 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); 369 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
365 370
366 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); 371 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
367 int kvm_arch_hardware_enable(void *garbage); 372 int kvm_arch_hardware_enable(void *garbage);
368 void kvm_arch_hardware_disable(void *garbage); 373 void kvm_arch_hardware_disable(void *garbage);
369 int kvm_arch_hardware_setup(void); 374 int kvm_arch_hardware_setup(void);
370 void kvm_arch_hardware_unsetup(void); 375 void kvm_arch_hardware_unsetup(void);
371 void kvm_arch_check_processor_compat(void *rtn); 376 void kvm_arch_check_processor_compat(void *rtn);
372 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); 377 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
373 378
374 void kvm_free_physmem(struct kvm *kvm); 379 void kvm_free_physmem(struct kvm *kvm);
375 380
376 struct kvm *kvm_arch_create_vm(void); 381 struct kvm *kvm_arch_create_vm(void);
377 void kvm_arch_destroy_vm(struct kvm *kvm); 382 void kvm_arch_destroy_vm(struct kvm *kvm);
378 void kvm_free_all_assigned_devices(struct kvm *kvm); 383 void kvm_free_all_assigned_devices(struct kvm *kvm);
379 void kvm_arch_sync_events(struct kvm *kvm); 384 void kvm_arch_sync_events(struct kvm *kvm);
380 385
381 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); 386 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
382 void kvm_vcpu_kick(struct kvm_vcpu *vcpu); 387 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
383 388
384 int kvm_is_mmio_pfn(pfn_t pfn); 389 int kvm_is_mmio_pfn(pfn_t pfn);
385 390
386 struct kvm_irq_ack_notifier { 391 struct kvm_irq_ack_notifier {
387 struct hlist_node link; 392 struct hlist_node link;
388 unsigned gsi; 393 unsigned gsi;
389 void (*irq_acked)(struct kvm_irq_ack_notifier *kian); 394 void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
390 }; 395 };
391 396
392 #define KVM_ASSIGNED_MSIX_PENDING 0x1 397 #define KVM_ASSIGNED_MSIX_PENDING 0x1
393 struct kvm_guest_msix_entry { 398 struct kvm_guest_msix_entry {
394 u32 vector; 399 u32 vector;
395 u16 entry; 400 u16 entry;
396 u16 flags; 401 u16 flags;
397 }; 402 };
398 403
399 struct kvm_assigned_dev_kernel { 404 struct kvm_assigned_dev_kernel {
400 struct kvm_irq_ack_notifier ack_notifier; 405 struct kvm_irq_ack_notifier ack_notifier;
401 struct work_struct interrupt_work; 406 struct work_struct interrupt_work;
402 struct list_head list; 407 struct list_head list;
403 int assigned_dev_id; 408 int assigned_dev_id;
404 int host_segnr; 409 int host_segnr;
405 int host_busnr; 410 int host_busnr;
406 int host_devfn; 411 int host_devfn;
407 unsigned int entries_nr; 412 unsigned int entries_nr;
408 int host_irq; 413 int host_irq;
409 bool host_irq_disabled; 414 bool host_irq_disabled;
410 struct msix_entry *host_msix_entries; 415 struct msix_entry *host_msix_entries;
411 int guest_irq; 416 int guest_irq;
412 struct kvm_guest_msix_entry *guest_msix_entries; 417 struct kvm_guest_msix_entry *guest_msix_entries;
413 unsigned long irq_requested_type; 418 unsigned long irq_requested_type;
414 int irq_source_id; 419 int irq_source_id;
415 int flags; 420 int flags;
416 struct pci_dev *dev; 421 struct pci_dev *dev;
417 struct kvm *kvm; 422 struct kvm *kvm;
418 spinlock_t assigned_dev_lock; 423 spinlock_t assigned_dev_lock;
419 }; 424 };
420 425
421 struct kvm_irq_mask_notifier { 426 struct kvm_irq_mask_notifier {
422 void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); 427 void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
423 int irq; 428 int irq;
424 struct hlist_node link; 429 struct hlist_node link;
425 }; 430 };
426 431
427 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, 432 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
428 struct kvm_irq_mask_notifier *kimn); 433 struct kvm_irq_mask_notifier *kimn);
429 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, 434 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
430 struct kvm_irq_mask_notifier *kimn); 435 struct kvm_irq_mask_notifier *kimn);
431 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); 436 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
432 437
433 #ifdef __KVM_HAVE_IOAPIC 438 #ifdef __KVM_HAVE_IOAPIC
434 void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, 439 void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
435 union kvm_ioapic_redirect_entry *entry, 440 union kvm_ioapic_redirect_entry *entry,
436 unsigned long *deliver_bitmask); 441 unsigned long *deliver_bitmask);
437 #endif 442 #endif
438 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); 443 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
439 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); 444 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
440 void kvm_register_irq_ack_notifier(struct kvm *kvm, 445 void kvm_register_irq_ack_notifier(struct kvm *kvm,
441 struct kvm_irq_ack_notifier *kian); 446 struct kvm_irq_ack_notifier *kian);
442 void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 447 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
443 struct kvm_irq_ack_notifier *kian); 448 struct kvm_irq_ack_notifier *kian);
444 int kvm_request_irq_source_id(struct kvm *kvm); 449 int kvm_request_irq_source_id(struct kvm *kvm);
445 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); 450 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
446 451
447 /* For vcpu->arch.iommu_flags */ 452 /* For vcpu->arch.iommu_flags */
448 #define KVM_IOMMU_CACHE_COHERENCY 0x1 453 #define KVM_IOMMU_CACHE_COHERENCY 0x1
449 454
450 #ifdef CONFIG_IOMMU_API 455 #ifdef CONFIG_IOMMU_API
451 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); 456 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
452 int kvm_iommu_map_guest(struct kvm *kvm); 457 int kvm_iommu_map_guest(struct kvm *kvm);
453 int kvm_iommu_unmap_guest(struct kvm *kvm); 458 int kvm_iommu_unmap_guest(struct kvm *kvm);
454 int kvm_assign_device(struct kvm *kvm, 459 int kvm_assign_device(struct kvm *kvm,
455 struct kvm_assigned_dev_kernel *assigned_dev); 460 struct kvm_assigned_dev_kernel *assigned_dev);
456 int kvm_deassign_device(struct kvm *kvm, 461 int kvm_deassign_device(struct kvm *kvm,
457 struct kvm_assigned_dev_kernel *assigned_dev); 462 struct kvm_assigned_dev_kernel *assigned_dev);
458 #else /* CONFIG_IOMMU_API */ 463 #else /* CONFIG_IOMMU_API */
459 static inline int kvm_iommu_map_pages(struct kvm *kvm, 464 static inline int kvm_iommu_map_pages(struct kvm *kvm,
460 gfn_t base_gfn, 465 gfn_t base_gfn,
461 unsigned long npages) 466 unsigned long npages)
462 { 467 {
463 return 0; 468 return 0;
464 } 469 }
465 470
466 static inline int kvm_iommu_map_guest(struct kvm *kvm) 471 static inline int kvm_iommu_map_guest(struct kvm *kvm)
467 { 472 {
468 return -ENODEV; 473 return -ENODEV;
469 } 474 }
470 475
471 static inline int kvm_iommu_unmap_guest(struct kvm *kvm) 476 static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
472 { 477 {
473 return 0; 478 return 0;
474 } 479 }
475 480
476 static inline int kvm_assign_device(struct kvm *kvm, 481 static inline int kvm_assign_device(struct kvm *kvm,
477 struct kvm_assigned_dev_kernel *assigned_dev) 482 struct kvm_assigned_dev_kernel *assigned_dev)
478 { 483 {
479 return 0; 484 return 0;
480 } 485 }
481 486
482 static inline int kvm_deassign_device(struct kvm *kvm, 487 static inline int kvm_deassign_device(struct kvm *kvm,
483 struct kvm_assigned_dev_kernel *assigned_dev) 488 struct kvm_assigned_dev_kernel *assigned_dev)
484 { 489 {
485 return 0; 490 return 0;
486 } 491 }
487 #endif /* CONFIG_IOMMU_API */ 492 #endif /* CONFIG_IOMMU_API */
488 493
489 static inline void kvm_guest_enter(void) 494 static inline void kvm_guest_enter(void)
490 { 495 {
491 account_system_vtime(current); 496 account_system_vtime(current);
492 current->flags |= PF_VCPU; 497 current->flags |= PF_VCPU;
493 } 498 }
494 499
495 static inline void kvm_guest_exit(void) 500 static inline void kvm_guest_exit(void)
496 { 501 {
497 account_system_vtime(current); 502 account_system_vtime(current);
498 current->flags &= ~PF_VCPU; 503 current->flags &= ~PF_VCPU;
499 } 504 }
500 505
501 static inline gpa_t gfn_to_gpa(gfn_t gfn) 506 static inline gpa_t gfn_to_gpa(gfn_t gfn)
502 { 507 {
503 return (gpa_t)gfn << PAGE_SHIFT; 508 return (gpa_t)gfn << PAGE_SHIFT;
504 } 509 }
505 510
506 static inline hpa_t pfn_to_hpa(pfn_t pfn) 511 static inline hpa_t pfn_to_hpa(pfn_t pfn)
507 { 512 {
508 return (hpa_t)pfn << PAGE_SHIFT; 513 return (hpa_t)pfn << PAGE_SHIFT;
509 } 514 }
510 515
511 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) 516 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
512 { 517 {
513 set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); 518 set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
514 } 519 }
515 520
516 enum kvm_stat_kind { 521 enum kvm_stat_kind {
517 KVM_STAT_VM, 522 KVM_STAT_VM,
518 KVM_STAT_VCPU, 523 KVM_STAT_VCPU,
519 }; 524 };
520 525
521 struct kvm_stats_debugfs_item { 526 struct kvm_stats_debugfs_item {
522 const char *name; 527 const char *name;
523 int offset; 528 int offset;
524 enum kvm_stat_kind kind; 529 enum kvm_stat_kind kind;
525 struct dentry *dentry; 530 struct dentry *dentry;
526 }; 531 };
527 extern struct kvm_stats_debugfs_item debugfs_entries[]; 532 extern struct kvm_stats_debugfs_item debugfs_entries[];
528 extern struct dentry *kvm_debugfs_dir; 533 extern struct dentry *kvm_debugfs_dir;
529 534
530 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER 535 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
531 static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq) 536 static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
532 { 537 {
533 if (unlikely(vcpu->kvm->mmu_notifier_count)) 538 if (unlikely(vcpu->kvm->mmu_notifier_count))
534 return 1; 539 return 1;
535 /* 540 /*
536 * Both reads happen under the mmu_lock and both values are 541 * Both reads happen under the mmu_lock and both values are
537 * modified under mmu_lock, so there's no need of smb_rmb() 542 * modified under mmu_lock, so there's no need of smb_rmb()
538 * here in between, otherwise mmu_notifier_count should be 543 * here in between, otherwise mmu_notifier_count should be
539 * read before mmu_notifier_seq, see 544 * read before mmu_notifier_seq, see
540 * mmu_notifier_invalidate_range_end write side. 545 * mmu_notifier_invalidate_range_end write side.
541 */ 546 */
542 if (vcpu->kvm->mmu_notifier_seq != mmu_seq) 547 if (vcpu->kvm->mmu_notifier_seq != mmu_seq)
543 return 1; 548 return 1;
544 return 0; 549 return 0;
545 } 550 }
546 #endif 551 #endif
547 552
548 #ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION 553 #ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION
549 #define unalias_gfn_instantiation unalias_gfn 554 #define unalias_gfn_instantiation unalias_gfn
550 #endif 555 #endif
551 556
552 #ifdef CONFIG_HAVE_KVM_IRQCHIP 557 #ifdef CONFIG_HAVE_KVM_IRQCHIP
553 558
554 #define KVM_MAX_IRQ_ROUTES 1024 559 #define KVM_MAX_IRQ_ROUTES 1024
555 560
556 int kvm_setup_default_irq_routing(struct kvm *kvm); 561 int kvm_setup_default_irq_routing(struct kvm *kvm);
557 int kvm_set_irq_routing(struct kvm *kvm, 562 int kvm_set_irq_routing(struct kvm *kvm,
558 const struct kvm_irq_routing_entry *entries, 563 const struct kvm_irq_routing_entry *entries,
559 unsigned nr, 564 unsigned nr,
560 unsigned flags); 565 unsigned flags);
561 void kvm_free_irq_routing(struct kvm *kvm); 566 void kvm_free_irq_routing(struct kvm *kvm);
562 567
563 #else 568 #else
564 569
565 static inline void kvm_free_irq_routing(struct kvm *kvm) {} 570 static inline void kvm_free_irq_routing(struct kvm *kvm) {}
566 571
567 #endif 572 #endif
568 573
569 #ifdef CONFIG_HAVE_KVM_EVENTFD 574 #ifdef CONFIG_HAVE_KVM_EVENTFD
570 575
571 void kvm_eventfd_init(struct kvm *kvm); 576 void kvm_eventfd_init(struct kvm *kvm);
572 int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); 577 int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
573 void kvm_irqfd_release(struct kvm *kvm); 578 void kvm_irqfd_release(struct kvm *kvm);
574 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); 579 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
575 580
576 #else 581 #else
577 582
578 static inline void kvm_eventfd_init(struct kvm *kvm) {} 583 static inline void kvm_eventfd_init(struct kvm *kvm) {}
579 static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) 584 static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
580 { 585 {
581 return -EINVAL; 586 return -EINVAL;
582 } 587 }
583 588
584 static inline void kvm_irqfd_release(struct kvm *kvm) {} 589 static inline void kvm_irqfd_release(struct kvm *kvm) {}
585 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 590 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
586 { 591 {
587 return -ENOSYS; 592 return -ENOSYS;
588 } 593 }
589 594
590 #endif /* CONFIG_HAVE_KVM_EVENTFD */ 595 #endif /* CONFIG_HAVE_KVM_EVENTFD */
591 596
592 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 597 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
593 static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) 598 static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
594 { 599 {
595 return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; 600 return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
596 } 601 }
597 #endif 602 #endif
598 603
599 #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT 604 #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
600 605
601 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, 606 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
602 unsigned long arg); 607 unsigned long arg);
603 608
604 #else 609 #else
605 610
606 static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, 611 static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
607 unsigned long arg) 612 unsigned long arg)
608 { 613 {
609 return -ENOTTY; 614 return -ENOTTY;
610 } 615 }
611 616
612 #endif 617 #endif
613 618
614 #endif 619 #endif
615 620
616 621
1 /* 1 /*
2 * Kernel-based Virtual Machine driver for Linux 2 * Kernel-based Virtual Machine driver for Linux
3 * 3 *
4 * This module enables machines with Intel VT-x extensions to run virtual 4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation. 5 * machines without emulation or binary translation.
6 * 6 *
7 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright (C) 2006 Qumranet, Inc.
8 * 8 *
9 * Authors: 9 * Authors:
10 * Avi Kivity <avi@qumranet.com> 10 * Avi Kivity <avi@qumranet.com>
11 * Yaniv Kamay <yaniv@qumranet.com> 11 * Yaniv Kamay <yaniv@qumranet.com>
12 * 12 *
13 * This work is licensed under the terms of the GNU GPL, version 2. See 13 * This work is licensed under the terms of the GNU GPL, version 2. See
14 * the COPYING file in the top-level directory. 14 * the COPYING file in the top-level directory.
15 * 15 *
16 */ 16 */
17 17
18 #include "iodev.h" 18 #include "iodev.h"
19 19
20 #include <linux/kvm_host.h> 20 #include <linux/kvm_host.h>
21 #include <linux/kvm.h> 21 #include <linux/kvm.h>
22 #include <linux/module.h> 22 #include <linux/module.h>
23 #include <linux/errno.h> 23 #include <linux/errno.h>
24 #include <linux/percpu.h> 24 #include <linux/percpu.h>
25 #include <linux/mm.h> 25 #include <linux/mm.h>
26 #include <linux/miscdevice.h> 26 #include <linux/miscdevice.h>
27 #include <linux/vmalloc.h> 27 #include <linux/vmalloc.h>
28 #include <linux/reboot.h> 28 #include <linux/reboot.h>
29 #include <linux/debugfs.h> 29 #include <linux/debugfs.h>
30 #include <linux/highmem.h> 30 #include <linux/highmem.h>
31 #include <linux/file.h> 31 #include <linux/file.h>
32 #include <linux/sysdev.h> 32 #include <linux/sysdev.h>
33 #include <linux/cpu.h> 33 #include <linux/cpu.h>
34 #include <linux/sched.h> 34 #include <linux/sched.h>
35 #include <linux/cpumask.h> 35 #include <linux/cpumask.h>
36 #include <linux/smp.h> 36 #include <linux/smp.h>
37 #include <linux/anon_inodes.h> 37 #include <linux/anon_inodes.h>
38 #include <linux/profile.h> 38 #include <linux/profile.h>
39 #include <linux/kvm_para.h> 39 #include <linux/kvm_para.h>
40 #include <linux/pagemap.h> 40 #include <linux/pagemap.h>
41 #include <linux/mman.h> 41 #include <linux/mman.h>
42 #include <linux/swap.h> 42 #include <linux/swap.h>
43 #include <linux/bitops.h> 43 #include <linux/bitops.h>
44 #include <linux/spinlock.h> 44 #include <linux/spinlock.h>
45 #include <linux/compat.h> 45 #include <linux/compat.h>
46 #include <linux/srcu.h> 46 #include <linux/srcu.h>
47 #include <linux/hugetlb.h> 47 #include <linux/hugetlb.h>
48 #include <linux/slab.h> 48 #include <linux/slab.h>
49 49
50 #include <asm/processor.h> 50 #include <asm/processor.h>
51 #include <asm/io.h> 51 #include <asm/io.h>
52 #include <asm/uaccess.h> 52 #include <asm/uaccess.h>
53 #include <asm/pgtable.h> 53 #include <asm/pgtable.h>
54 #include <asm-generic/bitops/le.h> 54 #include <asm-generic/bitops/le.h>
55 55
56 #include "coalesced_mmio.h" 56 #include "coalesced_mmio.h"
57 57
58 #define CREATE_TRACE_POINTS 58 #define CREATE_TRACE_POINTS
59 #include <trace/events/kvm.h> 59 #include <trace/events/kvm.h>
60 60
61 MODULE_AUTHOR("Qumranet"); 61 MODULE_AUTHOR("Qumranet");
62 MODULE_LICENSE("GPL"); 62 MODULE_LICENSE("GPL");
63 63
64 /* 64 /*
65 * Ordering of locks: 65 * Ordering of locks:
66 * 66 *
67 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock 67 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
68 */ 68 */
69 69
70 DEFINE_SPINLOCK(kvm_lock); 70 DEFINE_SPINLOCK(kvm_lock);
71 LIST_HEAD(vm_list); 71 LIST_HEAD(vm_list);
72 72
73 static cpumask_var_t cpus_hardware_enabled; 73 static cpumask_var_t cpus_hardware_enabled;
74 static int kvm_usage_count = 0; 74 static int kvm_usage_count = 0;
75 static atomic_t hardware_enable_failed; 75 static atomic_t hardware_enable_failed;
76 76
77 struct kmem_cache *kvm_vcpu_cache; 77 struct kmem_cache *kvm_vcpu_cache;
78 EXPORT_SYMBOL_GPL(kvm_vcpu_cache); 78 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
79 79
80 static __read_mostly struct preempt_ops kvm_preempt_ops; 80 static __read_mostly struct preempt_ops kvm_preempt_ops;
81 81
82 struct dentry *kvm_debugfs_dir; 82 struct dentry *kvm_debugfs_dir;
83 83
84 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 84 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
85 unsigned long arg); 85 unsigned long arg);
86 static int hardware_enable_all(void); 86 static int hardware_enable_all(void);
87 static void hardware_disable_all(void); 87 static void hardware_disable_all(void);
88 88
89 static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 89 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
90 90
91 static bool kvm_rebooting; 91 static bool kvm_rebooting;
92 92
93 static bool largepages_enabled = true; 93 static bool largepages_enabled = true;
94 94
95 inline int kvm_is_mmio_pfn(pfn_t pfn) 95 inline int kvm_is_mmio_pfn(pfn_t pfn)
96 { 96 {
97 if (pfn_valid(pfn)) { 97 if (pfn_valid(pfn)) {
98 struct page *page = compound_head(pfn_to_page(pfn)); 98 struct page *page = compound_head(pfn_to_page(pfn));
99 return PageReserved(page); 99 return PageReserved(page);
100 } 100 }
101 101
102 return true; 102 return true;
103 } 103 }
104 104
105 /* 105 /*
106 * Switches to specified vcpu, until a matching vcpu_put() 106 * Switches to specified vcpu, until a matching vcpu_put()
107 */ 107 */
108 void vcpu_load(struct kvm_vcpu *vcpu) 108 void vcpu_load(struct kvm_vcpu *vcpu)
109 { 109 {
110 int cpu; 110 int cpu;
111 111
112 mutex_lock(&vcpu->mutex); 112 mutex_lock(&vcpu->mutex);
113 cpu = get_cpu(); 113 cpu = get_cpu();
114 preempt_notifier_register(&vcpu->preempt_notifier); 114 preempt_notifier_register(&vcpu->preempt_notifier);
115 kvm_arch_vcpu_load(vcpu, cpu); 115 kvm_arch_vcpu_load(vcpu, cpu);
116 put_cpu(); 116 put_cpu();
117 } 117 }
118 118
119 void vcpu_put(struct kvm_vcpu *vcpu) 119 void vcpu_put(struct kvm_vcpu *vcpu)
120 { 120 {
121 preempt_disable(); 121 preempt_disable();
122 kvm_arch_vcpu_put(vcpu); 122 kvm_arch_vcpu_put(vcpu);
123 preempt_notifier_unregister(&vcpu->preempt_notifier); 123 preempt_notifier_unregister(&vcpu->preempt_notifier);
124 preempt_enable(); 124 preempt_enable();
125 mutex_unlock(&vcpu->mutex); 125 mutex_unlock(&vcpu->mutex);
126 } 126 }
127 127
128 static void ack_flush(void *_completed) 128 static void ack_flush(void *_completed)
129 { 129 {
130 } 130 }
131 131
132 static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) 132 static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
133 { 133 {
134 int i, cpu, me; 134 int i, cpu, me;
135 cpumask_var_t cpus; 135 cpumask_var_t cpus;
136 bool called = true; 136 bool called = true;
137 struct kvm_vcpu *vcpu; 137 struct kvm_vcpu *vcpu;
138 138
139 zalloc_cpumask_var(&cpus, GFP_ATOMIC); 139 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
140 140
141 raw_spin_lock(&kvm->requests_lock); 141 raw_spin_lock(&kvm->requests_lock);
142 me = smp_processor_id(); 142 me = smp_processor_id();
143 kvm_for_each_vcpu(i, vcpu, kvm) { 143 kvm_for_each_vcpu(i, vcpu, kvm) {
144 if (test_and_set_bit(req, &vcpu->requests)) 144 if (test_and_set_bit(req, &vcpu->requests))
145 continue; 145 continue;
146 cpu = vcpu->cpu; 146 cpu = vcpu->cpu;
147 if (cpus != NULL && cpu != -1 && cpu != me) 147 if (cpus != NULL && cpu != -1 && cpu != me)
148 cpumask_set_cpu(cpu, cpus); 148 cpumask_set_cpu(cpu, cpus);
149 } 149 }
150 if (unlikely(cpus == NULL)) 150 if (unlikely(cpus == NULL))
151 smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); 151 smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
152 else if (!cpumask_empty(cpus)) 152 else if (!cpumask_empty(cpus))
153 smp_call_function_many(cpus, ack_flush, NULL, 1); 153 smp_call_function_many(cpus, ack_flush, NULL, 1);
154 else 154 else
155 called = false; 155 called = false;
156 raw_spin_unlock(&kvm->requests_lock); 156 raw_spin_unlock(&kvm->requests_lock);
157 free_cpumask_var(cpus); 157 free_cpumask_var(cpus);
158 return called; 158 return called;
159 } 159 }
160 160
161 void kvm_flush_remote_tlbs(struct kvm *kvm) 161 void kvm_flush_remote_tlbs(struct kvm *kvm)
162 { 162 {
163 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 163 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
164 ++kvm->stat.remote_tlb_flush; 164 ++kvm->stat.remote_tlb_flush;
165 } 165 }
166 166
167 void kvm_reload_remote_mmus(struct kvm *kvm) 167 void kvm_reload_remote_mmus(struct kvm *kvm)
168 { 168 {
169 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); 169 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
170 } 170 }
171 171
172 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 172 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
173 { 173 {
174 struct page *page; 174 struct page *page;
175 int r; 175 int r;
176 176
177 mutex_init(&vcpu->mutex); 177 mutex_init(&vcpu->mutex);
178 vcpu->cpu = -1; 178 vcpu->cpu = -1;
179 vcpu->kvm = kvm; 179 vcpu->kvm = kvm;
180 vcpu->vcpu_id = id; 180 vcpu->vcpu_id = id;
181 init_waitqueue_head(&vcpu->wq); 181 init_waitqueue_head(&vcpu->wq);
182 182
183 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 183 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
184 if (!page) { 184 if (!page) {
185 r = -ENOMEM; 185 r = -ENOMEM;
186 goto fail; 186 goto fail;
187 } 187 }
188 vcpu->run = page_address(page); 188 vcpu->run = page_address(page);
189 189
190 r = kvm_arch_vcpu_init(vcpu); 190 r = kvm_arch_vcpu_init(vcpu);
191 if (r < 0) 191 if (r < 0)
192 goto fail_free_run; 192 goto fail_free_run;
193 return 0; 193 return 0;
194 194
195 fail_free_run: 195 fail_free_run:
196 free_page((unsigned long)vcpu->run); 196 free_page((unsigned long)vcpu->run);
197 fail: 197 fail:
198 return r; 198 return r;
199 } 199 }
200 EXPORT_SYMBOL_GPL(kvm_vcpu_init); 200 EXPORT_SYMBOL_GPL(kvm_vcpu_init);
201 201
202 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) 202 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
203 { 203 {
204 kvm_arch_vcpu_uninit(vcpu); 204 kvm_arch_vcpu_uninit(vcpu);
205 free_page((unsigned long)vcpu->run); 205 free_page((unsigned long)vcpu->run);
206 } 206 }
207 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); 207 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
208 208
209 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 209 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
210 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) 210 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
211 { 211 {
212 return container_of(mn, struct kvm, mmu_notifier); 212 return container_of(mn, struct kvm, mmu_notifier);
213 } 213 }
214 214
215 static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, 215 static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
216 struct mm_struct *mm, 216 struct mm_struct *mm,
217 unsigned long address) 217 unsigned long address)
218 { 218 {
219 struct kvm *kvm = mmu_notifier_to_kvm(mn); 219 struct kvm *kvm = mmu_notifier_to_kvm(mn);
220 int need_tlb_flush, idx; 220 int need_tlb_flush, idx;
221 221
222 /* 222 /*
223 * When ->invalidate_page runs, the linux pte has been zapped 223 * When ->invalidate_page runs, the linux pte has been zapped
224 * already but the page is still allocated until 224 * already but the page is still allocated until
225 * ->invalidate_page returns. So if we increase the sequence 225 * ->invalidate_page returns. So if we increase the sequence
226 * here the kvm page fault will notice if the spte can't be 226 * here the kvm page fault will notice if the spte can't be
227 * established because the page is going to be freed. If 227 * established because the page is going to be freed. If
228 * instead the kvm page fault establishes the spte before 228 * instead the kvm page fault establishes the spte before
229 * ->invalidate_page runs, kvm_unmap_hva will release it 229 * ->invalidate_page runs, kvm_unmap_hva will release it
230 * before returning. 230 * before returning.
231 * 231 *
232 * The sequence increase only need to be seen at spin_unlock 232 * The sequence increase only need to be seen at spin_unlock
233 * time, and not at spin_lock time. 233 * time, and not at spin_lock time.
234 * 234 *
235 * Increasing the sequence after the spin_unlock would be 235 * Increasing the sequence after the spin_unlock would be
236 * unsafe because the kvm page fault could then establish the 236 * unsafe because the kvm page fault could then establish the
237 * pte after kvm_unmap_hva returned, without noticing the page 237 * pte after kvm_unmap_hva returned, without noticing the page
238 * is going to be freed. 238 * is going to be freed.
239 */ 239 */
240 idx = srcu_read_lock(&kvm->srcu); 240 idx = srcu_read_lock(&kvm->srcu);
241 spin_lock(&kvm->mmu_lock); 241 spin_lock(&kvm->mmu_lock);
242 kvm->mmu_notifier_seq++; 242 kvm->mmu_notifier_seq++;
243 need_tlb_flush = kvm_unmap_hva(kvm, address); 243 need_tlb_flush = kvm_unmap_hva(kvm, address);
244 spin_unlock(&kvm->mmu_lock); 244 spin_unlock(&kvm->mmu_lock);
245 srcu_read_unlock(&kvm->srcu, idx); 245 srcu_read_unlock(&kvm->srcu, idx);
246 246
247 /* we've to flush the tlb before the pages can be freed */ 247 /* we've to flush the tlb before the pages can be freed */
248 if (need_tlb_flush) 248 if (need_tlb_flush)
249 kvm_flush_remote_tlbs(kvm); 249 kvm_flush_remote_tlbs(kvm);
250 250
251 } 251 }
252 252
253 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, 253 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
254 struct mm_struct *mm, 254 struct mm_struct *mm,
255 unsigned long address, 255 unsigned long address,
256 pte_t pte) 256 pte_t pte)
257 { 257 {
258 struct kvm *kvm = mmu_notifier_to_kvm(mn); 258 struct kvm *kvm = mmu_notifier_to_kvm(mn);
259 int idx; 259 int idx;
260 260
261 idx = srcu_read_lock(&kvm->srcu); 261 idx = srcu_read_lock(&kvm->srcu);
262 spin_lock(&kvm->mmu_lock); 262 spin_lock(&kvm->mmu_lock);
263 kvm->mmu_notifier_seq++; 263 kvm->mmu_notifier_seq++;
264 kvm_set_spte_hva(kvm, address, pte); 264 kvm_set_spte_hva(kvm, address, pte);
265 spin_unlock(&kvm->mmu_lock); 265 spin_unlock(&kvm->mmu_lock);
266 srcu_read_unlock(&kvm->srcu, idx); 266 srcu_read_unlock(&kvm->srcu, idx);
267 } 267 }
268 268
269 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 269 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
270 struct mm_struct *mm, 270 struct mm_struct *mm,
271 unsigned long start, 271 unsigned long start,
272 unsigned long end) 272 unsigned long end)
273 { 273 {
274 struct kvm *kvm = mmu_notifier_to_kvm(mn); 274 struct kvm *kvm = mmu_notifier_to_kvm(mn);
275 int need_tlb_flush = 0, idx; 275 int need_tlb_flush = 0, idx;
276 276
277 idx = srcu_read_lock(&kvm->srcu); 277 idx = srcu_read_lock(&kvm->srcu);
278 spin_lock(&kvm->mmu_lock); 278 spin_lock(&kvm->mmu_lock);
279 /* 279 /*
280 * The count increase must become visible at unlock time as no 280 * The count increase must become visible at unlock time as no
281 * spte can be established without taking the mmu_lock and 281 * spte can be established without taking the mmu_lock and
282 * count is also read inside the mmu_lock critical section. 282 * count is also read inside the mmu_lock critical section.
283 */ 283 */
284 kvm->mmu_notifier_count++; 284 kvm->mmu_notifier_count++;
285 for (; start < end; start += PAGE_SIZE) 285 for (; start < end; start += PAGE_SIZE)
286 need_tlb_flush |= kvm_unmap_hva(kvm, start); 286 need_tlb_flush |= kvm_unmap_hva(kvm, start);
287 spin_unlock(&kvm->mmu_lock); 287 spin_unlock(&kvm->mmu_lock);
288 srcu_read_unlock(&kvm->srcu, idx); 288 srcu_read_unlock(&kvm->srcu, idx);
289 289
290 /* we've to flush the tlb before the pages can be freed */ 290 /* we've to flush the tlb before the pages can be freed */
291 if (need_tlb_flush) 291 if (need_tlb_flush)
292 kvm_flush_remote_tlbs(kvm); 292 kvm_flush_remote_tlbs(kvm);
293 } 293 }
294 294
295 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, 295 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
296 struct mm_struct *mm, 296 struct mm_struct *mm,
297 unsigned long start, 297 unsigned long start,
298 unsigned long end) 298 unsigned long end)
299 { 299 {
300 struct kvm *kvm = mmu_notifier_to_kvm(mn); 300 struct kvm *kvm = mmu_notifier_to_kvm(mn);
301 301
302 spin_lock(&kvm->mmu_lock); 302 spin_lock(&kvm->mmu_lock);
303 /* 303 /*
304 * This sequence increase will notify the kvm page fault that 304 * This sequence increase will notify the kvm page fault that
305 * the page that is going to be mapped in the spte could have 305 * the page that is going to be mapped in the spte could have
306 * been freed. 306 * been freed.
307 */ 307 */
308 kvm->mmu_notifier_seq++; 308 kvm->mmu_notifier_seq++;
309 /* 309 /*
310 * The above sequence increase must be visible before the 310 * The above sequence increase must be visible before the
311 * below count decrease but both values are read by the kvm 311 * below count decrease but both values are read by the kvm
312 * page fault under mmu_lock spinlock so we don't need to add 312 * page fault under mmu_lock spinlock so we don't need to add
313 * a smb_wmb() here in between the two. 313 * a smb_wmb() here in between the two.
314 */ 314 */
315 kvm->mmu_notifier_count--; 315 kvm->mmu_notifier_count--;
316 spin_unlock(&kvm->mmu_lock); 316 spin_unlock(&kvm->mmu_lock);
317 317
318 BUG_ON(kvm->mmu_notifier_count < 0); 318 BUG_ON(kvm->mmu_notifier_count < 0);
319 } 319 }
320 320
321 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 321 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
322 struct mm_struct *mm, 322 struct mm_struct *mm,
323 unsigned long address) 323 unsigned long address)
324 { 324 {
325 struct kvm *kvm = mmu_notifier_to_kvm(mn); 325 struct kvm *kvm = mmu_notifier_to_kvm(mn);
326 int young, idx; 326 int young, idx;
327 327
328 idx = srcu_read_lock(&kvm->srcu); 328 idx = srcu_read_lock(&kvm->srcu);
329 spin_lock(&kvm->mmu_lock); 329 spin_lock(&kvm->mmu_lock);
330 young = kvm_age_hva(kvm, address); 330 young = kvm_age_hva(kvm, address);
331 spin_unlock(&kvm->mmu_lock); 331 spin_unlock(&kvm->mmu_lock);
332 srcu_read_unlock(&kvm->srcu, idx); 332 srcu_read_unlock(&kvm->srcu, idx);
333 333
334 if (young) 334 if (young)
335 kvm_flush_remote_tlbs(kvm); 335 kvm_flush_remote_tlbs(kvm);
336 336
337 return young; 337 return young;
338 } 338 }
339 339
340 static void kvm_mmu_notifier_release(struct mmu_notifier *mn, 340 static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
341 struct mm_struct *mm) 341 struct mm_struct *mm)
342 { 342 {
343 struct kvm *kvm = mmu_notifier_to_kvm(mn); 343 struct kvm *kvm = mmu_notifier_to_kvm(mn);
344 kvm_arch_flush_shadow(kvm); 344 kvm_arch_flush_shadow(kvm);
345 } 345 }
346 346
347 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { 347 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
348 .invalidate_page = kvm_mmu_notifier_invalidate_page, 348 .invalidate_page = kvm_mmu_notifier_invalidate_page,
349 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, 349 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
350 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, 350 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
351 .clear_flush_young = kvm_mmu_notifier_clear_flush_young, 351 .clear_flush_young = kvm_mmu_notifier_clear_flush_young,
352 .change_pte = kvm_mmu_notifier_change_pte, 352 .change_pte = kvm_mmu_notifier_change_pte,
353 .release = kvm_mmu_notifier_release, 353 .release = kvm_mmu_notifier_release,
354 }; 354 };
355 355
356 static int kvm_init_mmu_notifier(struct kvm *kvm) 356 static int kvm_init_mmu_notifier(struct kvm *kvm)
357 { 357 {
358 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; 358 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
359 return mmu_notifier_register(&kvm->mmu_notifier, current->mm); 359 return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
360 } 360 }
361 361
362 #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ 362 #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
363 363
364 static int kvm_init_mmu_notifier(struct kvm *kvm) 364 static int kvm_init_mmu_notifier(struct kvm *kvm)
365 { 365 {
366 return 0; 366 return 0;
367 } 367 }
368 368
369 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 369 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
370 370
371 static struct kvm *kvm_create_vm(void) 371 static struct kvm *kvm_create_vm(void)
372 { 372 {
373 int r = 0, i; 373 int r = 0, i;
374 struct kvm *kvm = kvm_arch_create_vm(); 374 struct kvm *kvm = kvm_arch_create_vm();
375 375
376 if (IS_ERR(kvm)) 376 if (IS_ERR(kvm))
377 goto out; 377 goto out;
378 378
379 r = hardware_enable_all(); 379 r = hardware_enable_all();
380 if (r) 380 if (r)
381 goto out_err_nodisable; 381 goto out_err_nodisable;
382 382
383 #ifdef CONFIG_HAVE_KVM_IRQCHIP 383 #ifdef CONFIG_HAVE_KVM_IRQCHIP
384 INIT_HLIST_HEAD(&kvm->mask_notifier_list); 384 INIT_HLIST_HEAD(&kvm->mask_notifier_list);
385 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 385 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
386 #endif 386 #endif
387 387
388 r = -ENOMEM; 388 r = -ENOMEM;
389 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 389 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
390 if (!kvm->memslots) 390 if (!kvm->memslots)
391 goto out_err; 391 goto out_err;
392 if (init_srcu_struct(&kvm->srcu)) 392 if (init_srcu_struct(&kvm->srcu))
393 goto out_err; 393 goto out_err;
394 for (i = 0; i < KVM_NR_BUSES; i++) { 394 for (i = 0; i < KVM_NR_BUSES; i++) {
395 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), 395 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
396 GFP_KERNEL); 396 GFP_KERNEL);
397 if (!kvm->buses[i]) { 397 if (!kvm->buses[i]) {
398 cleanup_srcu_struct(&kvm->srcu); 398 cleanup_srcu_struct(&kvm->srcu);
399 goto out_err; 399 goto out_err;
400 } 400 }
401 } 401 }
402 402
403 r = kvm_init_mmu_notifier(kvm); 403 r = kvm_init_mmu_notifier(kvm);
404 if (r) { 404 if (r) {
405 cleanup_srcu_struct(&kvm->srcu); 405 cleanup_srcu_struct(&kvm->srcu);
406 goto out_err; 406 goto out_err;
407 } 407 }
408 408
409 kvm->mm = current->mm; 409 kvm->mm = current->mm;
410 atomic_inc(&kvm->mm->mm_count); 410 atomic_inc(&kvm->mm->mm_count);
411 spin_lock_init(&kvm->mmu_lock); 411 spin_lock_init(&kvm->mmu_lock);
412 raw_spin_lock_init(&kvm->requests_lock); 412 raw_spin_lock_init(&kvm->requests_lock);
413 kvm_eventfd_init(kvm); 413 kvm_eventfd_init(kvm);
414 mutex_init(&kvm->lock); 414 mutex_init(&kvm->lock);
415 mutex_init(&kvm->irq_lock); 415 mutex_init(&kvm->irq_lock);
416 mutex_init(&kvm->slots_lock); 416 mutex_init(&kvm->slots_lock);
417 atomic_set(&kvm->users_count, 1); 417 atomic_set(&kvm->users_count, 1);
418 spin_lock(&kvm_lock); 418 spin_lock(&kvm_lock);
419 list_add(&kvm->vm_list, &vm_list); 419 list_add(&kvm->vm_list, &vm_list);
420 spin_unlock(&kvm_lock); 420 spin_unlock(&kvm_lock);
421 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 421 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
422 kvm_coalesced_mmio_init(kvm); 422 kvm_coalesced_mmio_init(kvm);
423 #endif 423 #endif
424 out: 424 out:
425 return kvm; 425 return kvm;
426 426
427 out_err: 427 out_err:
428 hardware_disable_all(); 428 hardware_disable_all();
429 out_err_nodisable: 429 out_err_nodisable:
430 for (i = 0; i < KVM_NR_BUSES; i++) 430 for (i = 0; i < KVM_NR_BUSES; i++)
431 kfree(kvm->buses[i]); 431 kfree(kvm->buses[i]);
432 kfree(kvm->memslots); 432 kfree(kvm->memslots);
433 kfree(kvm); 433 kfree(kvm);
434 return ERR_PTR(r); 434 return ERR_PTR(r);
435 } 435 }
436 436
437 /* 437 /*
438 * Free any memory in @free but not in @dont. 438 * Free any memory in @free but not in @dont.
439 */ 439 */
440 static void kvm_free_physmem_slot(struct kvm_memory_slot *free, 440 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
441 struct kvm_memory_slot *dont) 441 struct kvm_memory_slot *dont)
442 { 442 {
443 int i; 443 int i;
444 444
445 if (!dont || free->rmap != dont->rmap) 445 if (!dont || free->rmap != dont->rmap)
446 vfree(free->rmap); 446 vfree(free->rmap);
447 447
448 if (!dont || free->dirty_bitmap != dont->dirty_bitmap) 448 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
449 vfree(free->dirty_bitmap); 449 vfree(free->dirty_bitmap);
450 450
451 451
452 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { 452 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
453 if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { 453 if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
454 vfree(free->lpage_info[i]); 454 vfree(free->lpage_info[i]);
455 free->lpage_info[i] = NULL; 455 free->lpage_info[i] = NULL;
456 } 456 }
457 } 457 }
458 458
459 free->npages = 0; 459 free->npages = 0;
460 free->dirty_bitmap = NULL; 460 free->dirty_bitmap = NULL;
461 free->rmap = NULL; 461 free->rmap = NULL;
462 } 462 }
463 463
464 void kvm_free_physmem(struct kvm *kvm) 464 void kvm_free_physmem(struct kvm *kvm)
465 { 465 {
466 int i; 466 int i;
467 struct kvm_memslots *slots = kvm->memslots; 467 struct kvm_memslots *slots = kvm->memslots;
468 468
469 for (i = 0; i < slots->nmemslots; ++i) 469 for (i = 0; i < slots->nmemslots; ++i)
470 kvm_free_physmem_slot(&slots->memslots[i], NULL); 470 kvm_free_physmem_slot(&slots->memslots[i], NULL);
471 471
472 kfree(kvm->memslots); 472 kfree(kvm->memslots);
473 } 473 }
474 474
475 static void kvm_destroy_vm(struct kvm *kvm) 475 static void kvm_destroy_vm(struct kvm *kvm)
476 { 476 {
477 int i; 477 int i;
478 struct mm_struct *mm = kvm->mm; 478 struct mm_struct *mm = kvm->mm;
479 479
480 kvm_arch_sync_events(kvm); 480 kvm_arch_sync_events(kvm);
481 spin_lock(&kvm_lock); 481 spin_lock(&kvm_lock);
482 list_del(&kvm->vm_list); 482 list_del(&kvm->vm_list);
483 spin_unlock(&kvm_lock); 483 spin_unlock(&kvm_lock);
484 kvm_free_irq_routing(kvm); 484 kvm_free_irq_routing(kvm);
485 for (i = 0; i < KVM_NR_BUSES; i++) 485 for (i = 0; i < KVM_NR_BUSES; i++)
486 kvm_io_bus_destroy(kvm->buses[i]); 486 kvm_io_bus_destroy(kvm->buses[i]);
487 kvm_coalesced_mmio_free(kvm); 487 kvm_coalesced_mmio_free(kvm);
488 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 488 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
489 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 489 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
490 #else 490 #else
491 kvm_arch_flush_shadow(kvm); 491 kvm_arch_flush_shadow(kvm);
492 #endif 492 #endif
493 kvm_arch_destroy_vm(kvm); 493 kvm_arch_destroy_vm(kvm);
494 hardware_disable_all(); 494 hardware_disable_all();
495 mmdrop(mm); 495 mmdrop(mm);
496 } 496 }
497 497
498 void kvm_get_kvm(struct kvm *kvm) 498 void kvm_get_kvm(struct kvm *kvm)
499 { 499 {
500 atomic_inc(&kvm->users_count); 500 atomic_inc(&kvm->users_count);
501 } 501 }
502 EXPORT_SYMBOL_GPL(kvm_get_kvm); 502 EXPORT_SYMBOL_GPL(kvm_get_kvm);
503 503
504 void kvm_put_kvm(struct kvm *kvm) 504 void kvm_put_kvm(struct kvm *kvm)
505 { 505 {
506 if (atomic_dec_and_test(&kvm->users_count)) 506 if (atomic_dec_and_test(&kvm->users_count))
507 kvm_destroy_vm(kvm); 507 kvm_destroy_vm(kvm);
508 } 508 }
509 EXPORT_SYMBOL_GPL(kvm_put_kvm); 509 EXPORT_SYMBOL_GPL(kvm_put_kvm);
510 510
511 511
512 static int kvm_vm_release(struct inode *inode, struct file *filp) 512 static int kvm_vm_release(struct inode *inode, struct file *filp)
513 { 513 {
514 struct kvm *kvm = filp->private_data; 514 struct kvm *kvm = filp->private_data;
515 515
516 kvm_irqfd_release(kvm); 516 kvm_irqfd_release(kvm);
517 517
518 kvm_put_kvm(kvm); 518 kvm_put_kvm(kvm);
519 return 0; 519 return 0;
520 } 520 }
521 521
522 /* 522 /*
523 * Allocate some memory and give it an address in the guest physical address 523 * Allocate some memory and give it an address in the guest physical address
524 * space. 524 * space.
525 * 525 *
526 * Discontiguous memory is allowed, mostly for framebuffers. 526 * Discontiguous memory is allowed, mostly for framebuffers.
527 * 527 *
528 * Must be called holding mmap_sem for write. 528 * Must be called holding mmap_sem for write.
529 */ 529 */
530 int __kvm_set_memory_region(struct kvm *kvm, 530 int __kvm_set_memory_region(struct kvm *kvm,
531 struct kvm_userspace_memory_region *mem, 531 struct kvm_userspace_memory_region *mem,
532 int user_alloc) 532 int user_alloc)
533 { 533 {
534 int r, flush_shadow = 0; 534 int r, flush_shadow = 0;
535 gfn_t base_gfn; 535 gfn_t base_gfn;
536 unsigned long npages; 536 unsigned long npages;
537 unsigned long i; 537 unsigned long i;
538 struct kvm_memory_slot *memslot; 538 struct kvm_memory_slot *memslot;
539 struct kvm_memory_slot old, new; 539 struct kvm_memory_slot old, new;
540 struct kvm_memslots *slots, *old_memslots; 540 struct kvm_memslots *slots, *old_memslots;
541 541
542 r = -EINVAL; 542 r = -EINVAL;
543 /* General sanity checks */ 543 /* General sanity checks */
544 if (mem->memory_size & (PAGE_SIZE - 1)) 544 if (mem->memory_size & (PAGE_SIZE - 1))
545 goto out; 545 goto out;
546 if (mem->guest_phys_addr & (PAGE_SIZE - 1)) 546 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
547 goto out; 547 goto out;
548 if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) 548 if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
549 goto out; 549 goto out;
550 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) 550 if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
551 goto out; 551 goto out;
552 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 552 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
553 goto out; 553 goto out;
554 554
555 memslot = &kvm->memslots->memslots[mem->slot]; 555 memslot = &kvm->memslots->memslots[mem->slot];
556 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 556 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
557 npages = mem->memory_size >> PAGE_SHIFT; 557 npages = mem->memory_size >> PAGE_SHIFT;
558 558
559 if (!npages) 559 if (!npages)
560 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; 560 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
561 561
562 new = old = *memslot; 562 new = old = *memslot;
563 563
564 new.base_gfn = base_gfn; 564 new.base_gfn = base_gfn;
565 new.npages = npages; 565 new.npages = npages;
566 new.flags = mem->flags; 566 new.flags = mem->flags;
567 567
568 /* Disallow changing a memory slot's size. */ 568 /* Disallow changing a memory slot's size. */
569 r = -EINVAL; 569 r = -EINVAL;
570 if (npages && old.npages && npages != old.npages) 570 if (npages && old.npages && npages != old.npages)
571 goto out_free; 571 goto out_free;
572 572
573 /* Check for overlaps */ 573 /* Check for overlaps */
574 r = -EEXIST; 574 r = -EEXIST;
575 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 575 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
576 struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; 576 struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
577 577
578 if (s == memslot || !s->npages) 578 if (s == memslot || !s->npages)
579 continue; 579 continue;
580 if (!((base_gfn + npages <= s->base_gfn) || 580 if (!((base_gfn + npages <= s->base_gfn) ||
581 (base_gfn >= s->base_gfn + s->npages))) 581 (base_gfn >= s->base_gfn + s->npages)))
582 goto out_free; 582 goto out_free;
583 } 583 }
584 584
585 /* Free page dirty bitmap if unneeded */ 585 /* Free page dirty bitmap if unneeded */
586 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) 586 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
587 new.dirty_bitmap = NULL; 587 new.dirty_bitmap = NULL;
588 588
589 r = -ENOMEM; 589 r = -ENOMEM;
590 590
591 /* Allocate if a slot is being created */ 591 /* Allocate if a slot is being created */
592 #ifndef CONFIG_S390 592 #ifndef CONFIG_S390
593 if (npages && !new.rmap) { 593 if (npages && !new.rmap) {
594 new.rmap = vmalloc(npages * sizeof(struct page *)); 594 new.rmap = vmalloc(npages * sizeof(struct page *));
595 595
596 if (!new.rmap) 596 if (!new.rmap)
597 goto out_free; 597 goto out_free;
598 598
599 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 599 memset(new.rmap, 0, npages * sizeof(*new.rmap));
600 600
601 new.user_alloc = user_alloc; 601 new.user_alloc = user_alloc;
602 new.userspace_addr = mem->userspace_addr; 602 new.userspace_addr = mem->userspace_addr;
603 } 603 }
604 if (!npages) 604 if (!npages)
605 goto skip_lpage; 605 goto skip_lpage;
606 606
607 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { 607 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
608 unsigned long ugfn; 608 unsigned long ugfn;
609 unsigned long j; 609 unsigned long j;
610 int lpages; 610 int lpages;
611 int level = i + 2; 611 int level = i + 2;
612 612
613 /* Avoid unused variable warning if no large pages */ 613 /* Avoid unused variable warning if no large pages */
614 (void)level; 614 (void)level;
615 615
616 if (new.lpage_info[i]) 616 if (new.lpage_info[i])
617 continue; 617 continue;
618 618
619 lpages = 1 + (base_gfn + npages - 1) / 619 lpages = 1 + (base_gfn + npages - 1) /
620 KVM_PAGES_PER_HPAGE(level); 620 KVM_PAGES_PER_HPAGE(level);
621 lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); 621 lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level);
622 622
623 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); 623 new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
624 624
625 if (!new.lpage_info[i]) 625 if (!new.lpage_info[i])
626 goto out_free; 626 goto out_free;
627 627
628 memset(new.lpage_info[i], 0, 628 memset(new.lpage_info[i], 0,
629 lpages * sizeof(*new.lpage_info[i])); 629 lpages * sizeof(*new.lpage_info[i]));
630 630
631 if (base_gfn % KVM_PAGES_PER_HPAGE(level)) 631 if (base_gfn % KVM_PAGES_PER_HPAGE(level))
632 new.lpage_info[i][0].write_count = 1; 632 new.lpage_info[i][0].write_count = 1;
633 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) 633 if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level))
634 new.lpage_info[i][lpages - 1].write_count = 1; 634 new.lpage_info[i][lpages - 1].write_count = 1;
635 ugfn = new.userspace_addr >> PAGE_SHIFT; 635 ugfn = new.userspace_addr >> PAGE_SHIFT;
636 /* 636 /*
637 * If the gfn and userspace address are not aligned wrt each 637 * If the gfn and userspace address are not aligned wrt each
638 * other, or if explicitly asked to, disable large page 638 * other, or if explicitly asked to, disable large page
639 * support for this slot 639 * support for this slot
640 */ 640 */
641 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || 641 if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
642 !largepages_enabled) 642 !largepages_enabled)
643 for (j = 0; j < lpages; ++j) 643 for (j = 0; j < lpages; ++j)
644 new.lpage_info[i][j].write_count = 1; 644 new.lpage_info[i][j].write_count = 1;
645 } 645 }
646 646
647 skip_lpage: 647 skip_lpage:
648 648
649 /* Allocate page dirty bitmap if needed */ 649 /* Allocate page dirty bitmap if needed */
650 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 650 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
651 unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; 651 unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
652 652
653 new.dirty_bitmap = vmalloc(dirty_bytes); 653 new.dirty_bitmap = vmalloc(dirty_bytes);
654 if (!new.dirty_bitmap) 654 if (!new.dirty_bitmap)
655 goto out_free; 655 goto out_free;
656 memset(new.dirty_bitmap, 0, dirty_bytes); 656 memset(new.dirty_bitmap, 0, dirty_bytes);
657 /* destroy any largepage mappings for dirty tracking */ 657 /* destroy any largepage mappings for dirty tracking */
658 if (old.npages) 658 if (old.npages)
659 flush_shadow = 1; 659 flush_shadow = 1;
660 } 660 }
661 #else /* not defined CONFIG_S390 */ 661 #else /* not defined CONFIG_S390 */
662 new.user_alloc = user_alloc; 662 new.user_alloc = user_alloc;
663 if (user_alloc) 663 if (user_alloc)
664 new.userspace_addr = mem->userspace_addr; 664 new.userspace_addr = mem->userspace_addr;
665 #endif /* not defined CONFIG_S390 */ 665 #endif /* not defined CONFIG_S390 */
666 666
667 if (!npages) { 667 if (!npages) {
668 r = -ENOMEM; 668 r = -ENOMEM;
669 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 669 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
670 if (!slots) 670 if (!slots)
671 goto out_free; 671 goto out_free;
672 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 672 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
673 if (mem->slot >= slots->nmemslots) 673 if (mem->slot >= slots->nmemslots)
674 slots->nmemslots = mem->slot + 1; 674 slots->nmemslots = mem->slot + 1;
675 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; 675 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
676 676
677 old_memslots = kvm->memslots; 677 old_memslots = kvm->memslots;
678 rcu_assign_pointer(kvm->memslots, slots); 678 rcu_assign_pointer(kvm->memslots, slots);
679 synchronize_srcu_expedited(&kvm->srcu); 679 synchronize_srcu_expedited(&kvm->srcu);
680 /* From this point no new shadow pages pointing to a deleted 680 /* From this point no new shadow pages pointing to a deleted
681 * memslot will be created. 681 * memslot will be created.
682 * 682 *
683 * validation of sp->gfn happens in: 683 * validation of sp->gfn happens in:
684 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) 684 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
685 * - kvm_is_visible_gfn (mmu_check_roots) 685 * - kvm_is_visible_gfn (mmu_check_roots)
686 */ 686 */
687 kvm_arch_flush_shadow(kvm); 687 kvm_arch_flush_shadow(kvm);
688 kfree(old_memslots); 688 kfree(old_memslots);
689 } 689 }
690 690
691 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); 691 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
692 if (r) 692 if (r)
693 goto out_free; 693 goto out_free;
694 694
695 #ifdef CONFIG_DMAR 695 #ifdef CONFIG_DMAR
696 /* map the pages in iommu page table */ 696 /* map the pages in iommu page table */
697 if (npages) { 697 if (npages) {
698 r = kvm_iommu_map_pages(kvm, &new); 698 r = kvm_iommu_map_pages(kvm, &new);
699 if (r) 699 if (r)
700 goto out_free; 700 goto out_free;
701 } 701 }
702 #endif 702 #endif
703 703
704 r = -ENOMEM; 704 r = -ENOMEM;
705 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 705 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
706 if (!slots) 706 if (!slots)
707 goto out_free; 707 goto out_free;
708 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 708 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
709 if (mem->slot >= slots->nmemslots) 709 if (mem->slot >= slots->nmemslots)
710 slots->nmemslots = mem->slot + 1; 710 slots->nmemslots = mem->slot + 1;
711 711
712 /* actual memory is freed via old in kvm_free_physmem_slot below */ 712 /* actual memory is freed via old in kvm_free_physmem_slot below */
713 if (!npages) { 713 if (!npages) {
714 new.rmap = NULL; 714 new.rmap = NULL;
715 new.dirty_bitmap = NULL; 715 new.dirty_bitmap = NULL;
716 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) 716 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
717 new.lpage_info[i] = NULL; 717 new.lpage_info[i] = NULL;
718 } 718 }
719 719
720 slots->memslots[mem->slot] = new; 720 slots->memslots[mem->slot] = new;
721 old_memslots = kvm->memslots; 721 old_memslots = kvm->memslots;
722 rcu_assign_pointer(kvm->memslots, slots); 722 rcu_assign_pointer(kvm->memslots, slots);
723 synchronize_srcu_expedited(&kvm->srcu); 723 synchronize_srcu_expedited(&kvm->srcu);
724 724
725 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); 725 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
726 726
727 kvm_free_physmem_slot(&old, &new); 727 kvm_free_physmem_slot(&old, &new);
728 kfree(old_memslots); 728 kfree(old_memslots);
729 729
730 if (flush_shadow) 730 if (flush_shadow)
731 kvm_arch_flush_shadow(kvm); 731 kvm_arch_flush_shadow(kvm);
732 732
733 return 0; 733 return 0;
734 734
735 out_free: 735 out_free:
736 kvm_free_physmem_slot(&new, &old); 736 kvm_free_physmem_slot(&new, &old);
737 out: 737 out:
738 return r; 738 return r;
739 739
740 } 740 }
741 EXPORT_SYMBOL_GPL(__kvm_set_memory_region); 741 EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
742 742
743 int kvm_set_memory_region(struct kvm *kvm, 743 int kvm_set_memory_region(struct kvm *kvm,
744 struct kvm_userspace_memory_region *mem, 744 struct kvm_userspace_memory_region *mem,
745 int user_alloc) 745 int user_alloc)
746 { 746 {
747 int r; 747 int r;
748 748
749 mutex_lock(&kvm->slots_lock); 749 mutex_lock(&kvm->slots_lock);
750 r = __kvm_set_memory_region(kvm, mem, user_alloc); 750 r = __kvm_set_memory_region(kvm, mem, user_alloc);
751 mutex_unlock(&kvm->slots_lock); 751 mutex_unlock(&kvm->slots_lock);
752 return r; 752 return r;
753 } 753 }
754 EXPORT_SYMBOL_GPL(kvm_set_memory_region); 754 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
755 755
756 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, 756 int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
757 struct 757 struct
758 kvm_userspace_memory_region *mem, 758 kvm_userspace_memory_region *mem,
759 int user_alloc) 759 int user_alloc)
760 { 760 {
761 if (mem->slot >= KVM_MEMORY_SLOTS) 761 if (mem->slot >= KVM_MEMORY_SLOTS)
762 return -EINVAL; 762 return -EINVAL;
763 return kvm_set_memory_region(kvm, mem, user_alloc); 763 return kvm_set_memory_region(kvm, mem, user_alloc);
764 } 764 }
765 765
766 int kvm_get_dirty_log(struct kvm *kvm, 766 int kvm_get_dirty_log(struct kvm *kvm,
767 struct kvm_dirty_log *log, int *is_dirty) 767 struct kvm_dirty_log *log, int *is_dirty)
768 { 768 {
769 struct kvm_memory_slot *memslot; 769 struct kvm_memory_slot *memslot;
770 int r, i; 770 int r, i;
771 int n; 771 unsigned long n;
772 unsigned long any = 0; 772 unsigned long any = 0;
773 773
774 r = -EINVAL; 774 r = -EINVAL;
775 if (log->slot >= KVM_MEMORY_SLOTS) 775 if (log->slot >= KVM_MEMORY_SLOTS)
776 goto out; 776 goto out;
777 777
778 memslot = &kvm->memslots->memslots[log->slot]; 778 memslot = &kvm->memslots->memslots[log->slot];
779 r = -ENOENT; 779 r = -ENOENT;
780 if (!memslot->dirty_bitmap) 780 if (!memslot->dirty_bitmap)
781 goto out; 781 goto out;
782 782
783 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 783 n = kvm_dirty_bitmap_bytes(memslot);
784 784
785 for (i = 0; !any && i < n/sizeof(long); ++i) 785 for (i = 0; !any && i < n/sizeof(long); ++i)
786 any = memslot->dirty_bitmap[i]; 786 any = memslot->dirty_bitmap[i];
787 787
788 r = -EFAULT; 788 r = -EFAULT;
789 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 789 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
790 goto out; 790 goto out;
791 791
792 if (any) 792 if (any)
793 *is_dirty = 1; 793 *is_dirty = 1;
794 794
795 r = 0; 795 r = 0;
796 out: 796 out:
797 return r; 797 return r;
798 } 798 }
799 799
800 void kvm_disable_largepages(void) 800 void kvm_disable_largepages(void)
801 { 801 {
802 largepages_enabled = false; 802 largepages_enabled = false;
803 } 803 }
804 EXPORT_SYMBOL_GPL(kvm_disable_largepages); 804 EXPORT_SYMBOL_GPL(kvm_disable_largepages);
805 805
806 int is_error_page(struct page *page) 806 int is_error_page(struct page *page)
807 { 807 {
808 return page == bad_page; 808 return page == bad_page;
809 } 809 }
810 EXPORT_SYMBOL_GPL(is_error_page); 810 EXPORT_SYMBOL_GPL(is_error_page);
811 811
812 int is_error_pfn(pfn_t pfn) 812 int is_error_pfn(pfn_t pfn)
813 { 813 {
814 return pfn == bad_pfn; 814 return pfn == bad_pfn;
815 } 815 }
816 EXPORT_SYMBOL_GPL(is_error_pfn); 816 EXPORT_SYMBOL_GPL(is_error_pfn);
817 817
818 static inline unsigned long bad_hva(void) 818 static inline unsigned long bad_hva(void)
819 { 819 {
820 return PAGE_OFFSET; 820 return PAGE_OFFSET;
821 } 821 }
822 822
823 int kvm_is_error_hva(unsigned long addr) 823 int kvm_is_error_hva(unsigned long addr)
824 { 824 {
825 return addr == bad_hva(); 825 return addr == bad_hva();
826 } 826 }
827 EXPORT_SYMBOL_GPL(kvm_is_error_hva); 827 EXPORT_SYMBOL_GPL(kvm_is_error_hva);
828 828
829 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 829 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
830 { 830 {
831 int i; 831 int i;
832 struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 832 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
833 833
834 for (i = 0; i < slots->nmemslots; ++i) { 834 for (i = 0; i < slots->nmemslots; ++i) {
835 struct kvm_memory_slot *memslot = &slots->memslots[i]; 835 struct kvm_memory_slot *memslot = &slots->memslots[i];
836 836
837 if (gfn >= memslot->base_gfn 837 if (gfn >= memslot->base_gfn
838 && gfn < memslot->base_gfn + memslot->npages) 838 && gfn < memslot->base_gfn + memslot->npages)
839 return memslot; 839 return memslot;
840 } 840 }
841 return NULL; 841 return NULL;
842 } 842 }
843 EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); 843 EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
844 844
845 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) 845 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
846 { 846 {
847 gfn = unalias_gfn(kvm, gfn); 847 gfn = unalias_gfn(kvm, gfn);
848 return gfn_to_memslot_unaliased(kvm, gfn); 848 return gfn_to_memslot_unaliased(kvm, gfn);
849 } 849 }
850 850
851 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 851 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
852 { 852 {
853 int i; 853 int i;
854 struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 854 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
855 855
856 gfn = unalias_gfn_instantiation(kvm, gfn); 856 gfn = unalias_gfn_instantiation(kvm, gfn);
857 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 857 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
858 struct kvm_memory_slot *memslot = &slots->memslots[i]; 858 struct kvm_memory_slot *memslot = &slots->memslots[i];
859 859
860 if (memslot->flags & KVM_MEMSLOT_INVALID) 860 if (memslot->flags & KVM_MEMSLOT_INVALID)
861 continue; 861 continue;
862 862
863 if (gfn >= memslot->base_gfn 863 if (gfn >= memslot->base_gfn
864 && gfn < memslot->base_gfn + memslot->npages) 864 && gfn < memslot->base_gfn + memslot->npages)
865 return 1; 865 return 1;
866 } 866 }
867 return 0; 867 return 0;
868 } 868 }
869 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 869 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
870 870
871 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) 871 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
872 { 872 {
873 struct vm_area_struct *vma; 873 struct vm_area_struct *vma;
874 unsigned long addr, size; 874 unsigned long addr, size;
875 875
876 size = PAGE_SIZE; 876 size = PAGE_SIZE;
877 877
878 addr = gfn_to_hva(kvm, gfn); 878 addr = gfn_to_hva(kvm, gfn);
879 if (kvm_is_error_hva(addr)) 879 if (kvm_is_error_hva(addr))
880 return PAGE_SIZE; 880 return PAGE_SIZE;
881 881
882 down_read(&current->mm->mmap_sem); 882 down_read(&current->mm->mmap_sem);
883 vma = find_vma(current->mm, addr); 883 vma = find_vma(current->mm, addr);
884 if (!vma) 884 if (!vma)
885 goto out; 885 goto out;
886 886
887 size = vma_kernel_pagesize(vma); 887 size = vma_kernel_pagesize(vma);
888 888
889 out: 889 out:
890 up_read(&current->mm->mmap_sem); 890 up_read(&current->mm->mmap_sem);
891 891
892 return size; 892 return size;
893 } 893 }
894 894
895 int memslot_id(struct kvm *kvm, gfn_t gfn) 895 int memslot_id(struct kvm *kvm, gfn_t gfn)
896 { 896 {
897 int i; 897 int i;
898 struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 898 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
899 struct kvm_memory_slot *memslot = NULL; 899 struct kvm_memory_slot *memslot = NULL;
900 900
901 gfn = unalias_gfn(kvm, gfn); 901 gfn = unalias_gfn(kvm, gfn);
902 for (i = 0; i < slots->nmemslots; ++i) { 902 for (i = 0; i < slots->nmemslots; ++i) {
903 memslot = &slots->memslots[i]; 903 memslot = &slots->memslots[i];
904 904
905 if (gfn >= memslot->base_gfn 905 if (gfn >= memslot->base_gfn
906 && gfn < memslot->base_gfn + memslot->npages) 906 && gfn < memslot->base_gfn + memslot->npages)
907 break; 907 break;
908 } 908 }
909 909
910 return memslot - slots->memslots; 910 return memslot - slots->memslots;
911 } 911 }
912 912
913 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 913 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
914 { 914 {
915 struct kvm_memory_slot *slot; 915 struct kvm_memory_slot *slot;
916 916
917 gfn = unalias_gfn_instantiation(kvm, gfn); 917 gfn = unalias_gfn_instantiation(kvm, gfn);
918 slot = gfn_to_memslot_unaliased(kvm, gfn); 918 slot = gfn_to_memslot_unaliased(kvm, gfn);
919 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 919 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
920 return bad_hva(); 920 return bad_hva();
921 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 921 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
922 } 922 }
923 EXPORT_SYMBOL_GPL(gfn_to_hva); 923 EXPORT_SYMBOL_GPL(gfn_to_hva);
924 924
925 static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) 925 static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
926 { 926 {
927 struct page *page[1]; 927 struct page *page[1];
928 int npages; 928 int npages;
929 pfn_t pfn; 929 pfn_t pfn;
930 930
931 might_sleep(); 931 might_sleep();
932 932
933 npages = get_user_pages_fast(addr, 1, 1, page); 933 npages = get_user_pages_fast(addr, 1, 1, page);
934 934
935 if (unlikely(npages != 1)) { 935 if (unlikely(npages != 1)) {
936 struct vm_area_struct *vma; 936 struct vm_area_struct *vma;
937 937
938 down_read(&current->mm->mmap_sem); 938 down_read(&current->mm->mmap_sem);
939 vma = find_vma(current->mm, addr); 939 vma = find_vma(current->mm, addr);
940 940
941 if (vma == NULL || addr < vma->vm_start || 941 if (vma == NULL || addr < vma->vm_start ||
942 !(vma->vm_flags & VM_PFNMAP)) { 942 !(vma->vm_flags & VM_PFNMAP)) {
943 up_read(&current->mm->mmap_sem); 943 up_read(&current->mm->mmap_sem);
944 get_page(bad_page); 944 get_page(bad_page);
945 return page_to_pfn(bad_page); 945 return page_to_pfn(bad_page);
946 } 946 }
947 947
948 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 948 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
949 up_read(&current->mm->mmap_sem); 949 up_read(&current->mm->mmap_sem);
950 BUG_ON(!kvm_is_mmio_pfn(pfn)); 950 BUG_ON(!kvm_is_mmio_pfn(pfn));
951 } else 951 } else
952 pfn = page_to_pfn(page[0]); 952 pfn = page_to_pfn(page[0]);
953 953
954 return pfn; 954 return pfn;
955 } 955 }
956 956
957 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 957 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
958 { 958 {
959 unsigned long addr; 959 unsigned long addr;
960 960
961 addr = gfn_to_hva(kvm, gfn); 961 addr = gfn_to_hva(kvm, gfn);
962 if (kvm_is_error_hva(addr)) { 962 if (kvm_is_error_hva(addr)) {
963 get_page(bad_page); 963 get_page(bad_page);
964 return page_to_pfn(bad_page); 964 return page_to_pfn(bad_page);
965 } 965 }
966 966
967 return hva_to_pfn(kvm, addr); 967 return hva_to_pfn(kvm, addr);
968 } 968 }
969 EXPORT_SYMBOL_GPL(gfn_to_pfn); 969 EXPORT_SYMBOL_GPL(gfn_to_pfn);
970 970
971 static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) 971 static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
972 { 972 {
973 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 973 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
974 } 974 }
975 975
976 pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 976 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
977 struct kvm_memory_slot *slot, gfn_t gfn) 977 struct kvm_memory_slot *slot, gfn_t gfn)
978 { 978 {
979 unsigned long addr = gfn_to_hva_memslot(slot, gfn); 979 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
980 return hva_to_pfn(kvm, addr); 980 return hva_to_pfn(kvm, addr);
981 } 981 }
982 982
983 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 983 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
984 { 984 {
985 pfn_t pfn; 985 pfn_t pfn;
986 986
987 pfn = gfn_to_pfn(kvm, gfn); 987 pfn = gfn_to_pfn(kvm, gfn);
988 if (!kvm_is_mmio_pfn(pfn)) 988 if (!kvm_is_mmio_pfn(pfn))
989 return pfn_to_page(pfn); 989 return pfn_to_page(pfn);
990 990
991 WARN_ON(kvm_is_mmio_pfn(pfn)); 991 WARN_ON(kvm_is_mmio_pfn(pfn));
992 992
993 get_page(bad_page); 993 get_page(bad_page);
994 return bad_page; 994 return bad_page;
995 } 995 }
996 996
997 EXPORT_SYMBOL_GPL(gfn_to_page); 997 EXPORT_SYMBOL_GPL(gfn_to_page);
998 998
999 void kvm_release_page_clean(struct page *page) 999 void kvm_release_page_clean(struct page *page)
1000 { 1000 {
1001 kvm_release_pfn_clean(page_to_pfn(page)); 1001 kvm_release_pfn_clean(page_to_pfn(page));
1002 } 1002 }
1003 EXPORT_SYMBOL_GPL(kvm_release_page_clean); 1003 EXPORT_SYMBOL_GPL(kvm_release_page_clean);
1004 1004
1005 void kvm_release_pfn_clean(pfn_t pfn) 1005 void kvm_release_pfn_clean(pfn_t pfn)
1006 { 1006 {
1007 if (!kvm_is_mmio_pfn(pfn)) 1007 if (!kvm_is_mmio_pfn(pfn))
1008 put_page(pfn_to_page(pfn)); 1008 put_page(pfn_to_page(pfn));
1009 } 1009 }
1010 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 1010 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
1011 1011
1012 void kvm_release_page_dirty(struct page *page) 1012 void kvm_release_page_dirty(struct page *page)
1013 { 1013 {
1014 kvm_release_pfn_dirty(page_to_pfn(page)); 1014 kvm_release_pfn_dirty(page_to_pfn(page));
1015 } 1015 }
1016 EXPORT_SYMBOL_GPL(kvm_release_page_dirty); 1016 EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
1017 1017
1018 void kvm_release_pfn_dirty(pfn_t pfn) 1018 void kvm_release_pfn_dirty(pfn_t pfn)
1019 { 1019 {
1020 kvm_set_pfn_dirty(pfn); 1020 kvm_set_pfn_dirty(pfn);
1021 kvm_release_pfn_clean(pfn); 1021 kvm_release_pfn_clean(pfn);
1022 } 1022 }
1023 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); 1023 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
1024 1024
1025 void kvm_set_page_dirty(struct page *page) 1025 void kvm_set_page_dirty(struct page *page)
1026 { 1026 {
1027 kvm_set_pfn_dirty(page_to_pfn(page)); 1027 kvm_set_pfn_dirty(page_to_pfn(page));
1028 } 1028 }
1029 EXPORT_SYMBOL_GPL(kvm_set_page_dirty); 1029 EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
1030 1030
1031 void kvm_set_pfn_dirty(pfn_t pfn) 1031 void kvm_set_pfn_dirty(pfn_t pfn)
1032 { 1032 {
1033 if (!kvm_is_mmio_pfn(pfn)) { 1033 if (!kvm_is_mmio_pfn(pfn)) {
1034 struct page *page = pfn_to_page(pfn); 1034 struct page *page = pfn_to_page(pfn);
1035 if (!PageReserved(page)) 1035 if (!PageReserved(page))
1036 SetPageDirty(page); 1036 SetPageDirty(page);
1037 } 1037 }
1038 } 1038 }
1039 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); 1039 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
1040 1040
1041 void kvm_set_pfn_accessed(pfn_t pfn) 1041 void kvm_set_pfn_accessed(pfn_t pfn)
1042 { 1042 {
1043 if (!kvm_is_mmio_pfn(pfn)) 1043 if (!kvm_is_mmio_pfn(pfn))
1044 mark_page_accessed(pfn_to_page(pfn)); 1044 mark_page_accessed(pfn_to_page(pfn));
1045 } 1045 }
1046 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 1046 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
1047 1047
1048 void kvm_get_pfn(pfn_t pfn) 1048 void kvm_get_pfn(pfn_t pfn)
1049 { 1049 {
1050 if (!kvm_is_mmio_pfn(pfn)) 1050 if (!kvm_is_mmio_pfn(pfn))
1051 get_page(pfn_to_page(pfn)); 1051 get_page(pfn_to_page(pfn));
1052 } 1052 }
1053 EXPORT_SYMBOL_GPL(kvm_get_pfn); 1053 EXPORT_SYMBOL_GPL(kvm_get_pfn);
1054 1054
1055 static int next_segment(unsigned long len, int offset) 1055 static int next_segment(unsigned long len, int offset)
1056 { 1056 {
1057 if (len > PAGE_SIZE - offset) 1057 if (len > PAGE_SIZE - offset)
1058 return PAGE_SIZE - offset; 1058 return PAGE_SIZE - offset;
1059 else 1059 else
1060 return len; 1060 return len;
1061 } 1061 }
1062 1062
1063 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, 1063 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
1064 int len) 1064 int len)
1065 { 1065 {
1066 int r; 1066 int r;
1067 unsigned long addr; 1067 unsigned long addr;
1068 1068
1069 addr = gfn_to_hva(kvm, gfn); 1069 addr = gfn_to_hva(kvm, gfn);
1070 if (kvm_is_error_hva(addr)) 1070 if (kvm_is_error_hva(addr))
1071 return -EFAULT; 1071 return -EFAULT;
1072 r = copy_from_user(data, (void __user *)addr + offset, len); 1072 r = copy_from_user(data, (void __user *)addr + offset, len);
1073 if (r) 1073 if (r)
1074 return -EFAULT; 1074 return -EFAULT;
1075 return 0; 1075 return 0;
1076 } 1076 }
1077 EXPORT_SYMBOL_GPL(kvm_read_guest_page); 1077 EXPORT_SYMBOL_GPL(kvm_read_guest_page);
1078 1078
1079 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) 1079 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
1080 { 1080 {
1081 gfn_t gfn = gpa >> PAGE_SHIFT; 1081 gfn_t gfn = gpa >> PAGE_SHIFT;
1082 int seg; 1082 int seg;
1083 int offset = offset_in_page(gpa); 1083 int offset = offset_in_page(gpa);
1084 int ret; 1084 int ret;
1085 1085
1086 while ((seg = next_segment(len, offset)) != 0) { 1086 while ((seg = next_segment(len, offset)) != 0) {
1087 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); 1087 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg);
1088 if (ret < 0) 1088 if (ret < 0)
1089 return ret; 1089 return ret;
1090 offset = 0; 1090 offset = 0;
1091 len -= seg; 1091 len -= seg;
1092 data += seg; 1092 data += seg;
1093 ++gfn; 1093 ++gfn;
1094 } 1094 }
1095 return 0; 1095 return 0;
1096 } 1096 }
1097 EXPORT_SYMBOL_GPL(kvm_read_guest); 1097 EXPORT_SYMBOL_GPL(kvm_read_guest);
1098 1098
1099 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 1099 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
1100 unsigned long len) 1100 unsigned long len)
1101 { 1101 {
1102 int r; 1102 int r;
1103 unsigned long addr; 1103 unsigned long addr;
1104 gfn_t gfn = gpa >> PAGE_SHIFT; 1104 gfn_t gfn = gpa >> PAGE_SHIFT;
1105 int offset = offset_in_page(gpa); 1105 int offset = offset_in_page(gpa);
1106 1106
1107 addr = gfn_to_hva(kvm, gfn); 1107 addr = gfn_to_hva(kvm, gfn);
1108 if (kvm_is_error_hva(addr)) 1108 if (kvm_is_error_hva(addr))
1109 return -EFAULT; 1109 return -EFAULT;
1110 pagefault_disable(); 1110 pagefault_disable();
1111 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); 1111 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
1112 pagefault_enable(); 1112 pagefault_enable();
1113 if (r) 1113 if (r)
1114 return -EFAULT; 1114 return -EFAULT;
1115 return 0; 1115 return 0;
1116 } 1116 }
1117 EXPORT_SYMBOL(kvm_read_guest_atomic); 1117 EXPORT_SYMBOL(kvm_read_guest_atomic);
1118 1118
1119 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, 1119 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
1120 int offset, int len) 1120 int offset, int len)
1121 { 1121 {
1122 int r; 1122 int r;
1123 unsigned long addr; 1123 unsigned long addr;
1124 1124
1125 addr = gfn_to_hva(kvm, gfn); 1125 addr = gfn_to_hva(kvm, gfn);
1126 if (kvm_is_error_hva(addr)) 1126 if (kvm_is_error_hva(addr))
1127 return -EFAULT; 1127 return -EFAULT;
1128 r = copy_to_user((void __user *)addr + offset, data, len); 1128 r = copy_to_user((void __user *)addr + offset, data, len);
1129 if (r) 1129 if (r)
1130 return -EFAULT; 1130 return -EFAULT;
1131 mark_page_dirty(kvm, gfn); 1131 mark_page_dirty(kvm, gfn);
1132 return 0; 1132 return 0;
1133 } 1133 }
1134 EXPORT_SYMBOL_GPL(kvm_write_guest_page); 1134 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
1135 1135
1136 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, 1136 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
1137 unsigned long len) 1137 unsigned long len)
1138 { 1138 {
1139 gfn_t gfn = gpa >> PAGE_SHIFT; 1139 gfn_t gfn = gpa >> PAGE_SHIFT;
1140 int seg; 1140 int seg;
1141 int offset = offset_in_page(gpa); 1141 int offset = offset_in_page(gpa);
1142 int ret; 1142 int ret;
1143 1143
1144 while ((seg = next_segment(len, offset)) != 0) { 1144 while ((seg = next_segment(len, offset)) != 0) {
1145 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); 1145 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg);
1146 if (ret < 0) 1146 if (ret < 0)
1147 return ret; 1147 return ret;
1148 offset = 0; 1148 offset = 0;
1149 len -= seg; 1149 len -= seg;
1150 data += seg; 1150 data += seg;
1151 ++gfn; 1151 ++gfn;
1152 } 1152 }
1153 return 0; 1153 return 0;
1154 } 1154 }
1155 1155
1156 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 1156 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
1157 { 1157 {
1158 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len); 1158 return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len);
1159 } 1159 }
1160 EXPORT_SYMBOL_GPL(kvm_clear_guest_page); 1160 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
1161 1161
1162 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) 1162 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
1163 { 1163 {
1164 gfn_t gfn = gpa >> PAGE_SHIFT; 1164 gfn_t gfn = gpa >> PAGE_SHIFT;
1165 int seg; 1165 int seg;
1166 int offset = offset_in_page(gpa); 1166 int offset = offset_in_page(gpa);
1167 int ret; 1167 int ret;
1168 1168
1169 while ((seg = next_segment(len, offset)) != 0) { 1169 while ((seg = next_segment(len, offset)) != 0) {
1170 ret = kvm_clear_guest_page(kvm, gfn, offset, seg); 1170 ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
1171 if (ret < 0) 1171 if (ret < 0)
1172 return ret; 1172 return ret;
1173 offset = 0; 1173 offset = 0;
1174 len -= seg; 1174 len -= seg;
1175 ++gfn; 1175 ++gfn;
1176 } 1176 }
1177 return 0; 1177 return 0;
1178 } 1178 }
1179 EXPORT_SYMBOL_GPL(kvm_clear_guest); 1179 EXPORT_SYMBOL_GPL(kvm_clear_guest);
1180 1180
1181 void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 1181 void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1182 { 1182 {
1183 struct kvm_memory_slot *memslot; 1183 struct kvm_memory_slot *memslot;
1184 1184
1185 gfn = unalias_gfn(kvm, gfn); 1185 gfn = unalias_gfn(kvm, gfn);
1186 memslot = gfn_to_memslot_unaliased(kvm, gfn); 1186 memslot = gfn_to_memslot_unaliased(kvm, gfn);
1187 if (memslot && memslot->dirty_bitmap) { 1187 if (memslot && memslot->dirty_bitmap) {
1188 unsigned long rel_gfn = gfn - memslot->base_gfn; 1188 unsigned long rel_gfn = gfn - memslot->base_gfn;
1189 unsigned long *p = memslot->dirty_bitmap +
1190 rel_gfn / BITS_PER_LONG;
1191 int offset = rel_gfn % BITS_PER_LONG;
1189 1192
1190 /* avoid RMW */ 1193 /* avoid RMW */
1191 if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) 1194 if (!generic_test_le_bit(offset, p))
1192 generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); 1195 generic___set_le_bit(offset, p);
1193 } 1196 }
1194 } 1197 }
1195 1198
1196 /* 1199 /*
1197 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1200 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
1198 */ 1201 */
1199 void kvm_vcpu_block(struct kvm_vcpu *vcpu) 1202 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1200 { 1203 {
1201 DEFINE_WAIT(wait); 1204 DEFINE_WAIT(wait);
1202 1205
1203 for (;;) { 1206 for (;;) {
1204 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1207 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1205 1208
1206 if (kvm_arch_vcpu_runnable(vcpu)) { 1209 if (kvm_arch_vcpu_runnable(vcpu)) {
1207 set_bit(KVM_REQ_UNHALT, &vcpu->requests); 1210 set_bit(KVM_REQ_UNHALT, &vcpu->requests);
1208 break; 1211 break;
1209 } 1212 }
1210 if (kvm_cpu_has_pending_timer(vcpu)) 1213 if (kvm_cpu_has_pending_timer(vcpu))
1211 break; 1214 break;
1212 if (signal_pending(current)) 1215 if (signal_pending(current))
1213 break; 1216 break;
1214 1217
1215 schedule(); 1218 schedule();
1216 } 1219 }
1217 1220
1218 finish_wait(&vcpu->wq, &wait); 1221 finish_wait(&vcpu->wq, &wait);
1219 } 1222 }
1220 1223
1221 void kvm_resched(struct kvm_vcpu *vcpu) 1224 void kvm_resched(struct kvm_vcpu *vcpu)
1222 { 1225 {
1223 if (!need_resched()) 1226 if (!need_resched())
1224 return; 1227 return;
1225 cond_resched(); 1228 cond_resched();
1226 } 1229 }
1227 EXPORT_SYMBOL_GPL(kvm_resched); 1230 EXPORT_SYMBOL_GPL(kvm_resched);
1228 1231
1229 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) 1232 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu)
1230 { 1233 {
1231 ktime_t expires; 1234 ktime_t expires;
1232 DEFINE_WAIT(wait); 1235 DEFINE_WAIT(wait);
1233 1236
1234 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1237 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1235 1238
1236 /* Sleep for 100 us, and hope lock-holder got scheduled */ 1239 /* Sleep for 100 us, and hope lock-holder got scheduled */
1237 expires = ktime_add_ns(ktime_get(), 100000UL); 1240 expires = ktime_add_ns(ktime_get(), 100000UL);
1238 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); 1241 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1239 1242
1240 finish_wait(&vcpu->wq, &wait); 1243 finish_wait(&vcpu->wq, &wait);
1241 } 1244 }
1242 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); 1245 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
1243 1246
1244 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1247 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1245 { 1248 {
1246 struct kvm_vcpu *vcpu = vma->vm_file->private_data; 1249 struct kvm_vcpu *vcpu = vma->vm_file->private_data;
1247 struct page *page; 1250 struct page *page;
1248 1251
1249 if (vmf->pgoff == 0) 1252 if (vmf->pgoff == 0)
1250 page = virt_to_page(vcpu->run); 1253 page = virt_to_page(vcpu->run);
1251 #ifdef CONFIG_X86 1254 #ifdef CONFIG_X86
1252 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) 1255 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
1253 page = virt_to_page(vcpu->arch.pio_data); 1256 page = virt_to_page(vcpu->arch.pio_data);
1254 #endif 1257 #endif
1255 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1258 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1256 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) 1259 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
1257 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); 1260 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
1258 #endif 1261 #endif
1259 else 1262 else
1260 return VM_FAULT_SIGBUS; 1263 return VM_FAULT_SIGBUS;
1261 get_page(page); 1264 get_page(page);
1262 vmf->page = page; 1265 vmf->page = page;
1263 return 0; 1266 return 0;
1264 } 1267 }
1265 1268
1266 static const struct vm_operations_struct kvm_vcpu_vm_ops = { 1269 static const struct vm_operations_struct kvm_vcpu_vm_ops = {
1267 .fault = kvm_vcpu_fault, 1270 .fault = kvm_vcpu_fault,
1268 }; 1271 };
1269 1272
1270 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma) 1273 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
1271 { 1274 {
1272 vma->vm_ops = &kvm_vcpu_vm_ops; 1275 vma->vm_ops = &kvm_vcpu_vm_ops;
1273 return 0; 1276 return 0;
1274 } 1277 }
1275 1278
1276 static int kvm_vcpu_release(struct inode *inode, struct file *filp) 1279 static int kvm_vcpu_release(struct inode *inode, struct file *filp)
1277 { 1280 {
1278 struct kvm_vcpu *vcpu = filp->private_data; 1281 struct kvm_vcpu *vcpu = filp->private_data;
1279 1282
1280 kvm_put_kvm(vcpu->kvm); 1283 kvm_put_kvm(vcpu->kvm);
1281 return 0; 1284 return 0;
1282 } 1285 }
1283 1286
1284 static struct file_operations kvm_vcpu_fops = { 1287 static struct file_operations kvm_vcpu_fops = {
1285 .release = kvm_vcpu_release, 1288 .release = kvm_vcpu_release,
1286 .unlocked_ioctl = kvm_vcpu_ioctl, 1289 .unlocked_ioctl = kvm_vcpu_ioctl,
1287 .compat_ioctl = kvm_vcpu_ioctl, 1290 .compat_ioctl = kvm_vcpu_ioctl,
1288 .mmap = kvm_vcpu_mmap, 1291 .mmap = kvm_vcpu_mmap,
1289 }; 1292 };
1290 1293
1291 /* 1294 /*
1292 * Allocates an inode for the vcpu. 1295 * Allocates an inode for the vcpu.
1293 */ 1296 */
1294 static int create_vcpu_fd(struct kvm_vcpu *vcpu) 1297 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
1295 { 1298 {
1296 return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); 1299 return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
1297 } 1300 }
1298 1301
1299 /* 1302 /*
1300 * Creates some virtual cpus. Good luck creating more than one. 1303 * Creates some virtual cpus. Good luck creating more than one.
1301 */ 1304 */
1302 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) 1305 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
1303 { 1306 {
1304 int r; 1307 int r;
1305 struct kvm_vcpu *vcpu, *v; 1308 struct kvm_vcpu *vcpu, *v;
1306 1309
1307 vcpu = kvm_arch_vcpu_create(kvm, id); 1310 vcpu = kvm_arch_vcpu_create(kvm, id);
1308 if (IS_ERR(vcpu)) 1311 if (IS_ERR(vcpu))
1309 return PTR_ERR(vcpu); 1312 return PTR_ERR(vcpu);
1310 1313
1311 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); 1314 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
1312 1315
1313 r = kvm_arch_vcpu_setup(vcpu); 1316 r = kvm_arch_vcpu_setup(vcpu);
1314 if (r) 1317 if (r)
1315 return r; 1318 return r;
1316 1319
1317 mutex_lock(&kvm->lock); 1320 mutex_lock(&kvm->lock);
1318 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { 1321 if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
1319 r = -EINVAL; 1322 r = -EINVAL;
1320 goto vcpu_destroy; 1323 goto vcpu_destroy;
1321 } 1324 }
1322 1325
1323 kvm_for_each_vcpu(r, v, kvm) 1326 kvm_for_each_vcpu(r, v, kvm)
1324 if (v->vcpu_id == id) { 1327 if (v->vcpu_id == id) {
1325 r = -EEXIST; 1328 r = -EEXIST;
1326 goto vcpu_destroy; 1329 goto vcpu_destroy;
1327 } 1330 }
1328 1331
1329 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); 1332 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
1330 1333
1331 /* Now it's all set up, let userspace reach it */ 1334 /* Now it's all set up, let userspace reach it */
1332 kvm_get_kvm(kvm); 1335 kvm_get_kvm(kvm);
1333 r = create_vcpu_fd(vcpu); 1336 r = create_vcpu_fd(vcpu);
1334 if (r < 0) { 1337 if (r < 0) {
1335 kvm_put_kvm(kvm); 1338 kvm_put_kvm(kvm);
1336 goto vcpu_destroy; 1339 goto vcpu_destroy;
1337 } 1340 }
1338 1341
1339 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; 1342 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
1340 smp_wmb(); 1343 smp_wmb();
1341 atomic_inc(&kvm->online_vcpus); 1344 atomic_inc(&kvm->online_vcpus);
1342 1345
1343 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1346 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
1344 if (kvm->bsp_vcpu_id == id) 1347 if (kvm->bsp_vcpu_id == id)
1345 kvm->bsp_vcpu = vcpu; 1348 kvm->bsp_vcpu = vcpu;
1346 #endif 1349 #endif
1347 mutex_unlock(&kvm->lock); 1350 mutex_unlock(&kvm->lock);
1348 return r; 1351 return r;
1349 1352
1350 vcpu_destroy: 1353 vcpu_destroy:
1351 mutex_unlock(&kvm->lock); 1354 mutex_unlock(&kvm->lock);
1352 kvm_arch_vcpu_destroy(vcpu); 1355 kvm_arch_vcpu_destroy(vcpu);
1353 return r; 1356 return r;
1354 } 1357 }
1355 1358
1356 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) 1359 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
1357 { 1360 {
1358 if (sigset) { 1361 if (sigset) {
1359 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP)); 1362 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
1360 vcpu->sigset_active = 1; 1363 vcpu->sigset_active = 1;
1361 vcpu->sigset = *sigset; 1364 vcpu->sigset = *sigset;
1362 } else 1365 } else
1363 vcpu->sigset_active = 0; 1366 vcpu->sigset_active = 0;
1364 return 0; 1367 return 0;
1365 } 1368 }
1366 1369
1367 static long kvm_vcpu_ioctl(struct file *filp, 1370 static long kvm_vcpu_ioctl(struct file *filp,
1368 unsigned int ioctl, unsigned long arg) 1371 unsigned int ioctl, unsigned long arg)
1369 { 1372 {
1370 struct kvm_vcpu *vcpu = filp->private_data; 1373 struct kvm_vcpu *vcpu = filp->private_data;
1371 void __user *argp = (void __user *)arg; 1374 void __user *argp = (void __user *)arg;
1372 int r; 1375 int r;
1373 struct kvm_fpu *fpu = NULL; 1376 struct kvm_fpu *fpu = NULL;
1374 struct kvm_sregs *kvm_sregs = NULL; 1377 struct kvm_sregs *kvm_sregs = NULL;
1375 1378
1376 if (vcpu->kvm->mm != current->mm) 1379 if (vcpu->kvm->mm != current->mm)
1377 return -EIO; 1380 return -EIO;
1378 switch (ioctl) { 1381 switch (ioctl) {
1379 case KVM_RUN: 1382 case KVM_RUN:
1380 r = -EINVAL; 1383 r = -EINVAL;
1381 if (arg) 1384 if (arg)
1382 goto out; 1385 goto out;
1383 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 1386 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1384 break; 1387 break;
1385 case KVM_GET_REGS: { 1388 case KVM_GET_REGS: {
1386 struct kvm_regs *kvm_regs; 1389 struct kvm_regs *kvm_regs;
1387 1390
1388 r = -ENOMEM; 1391 r = -ENOMEM;
1389 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1392 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
1390 if (!kvm_regs) 1393 if (!kvm_regs)
1391 goto out; 1394 goto out;
1392 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 1395 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
1393 if (r) 1396 if (r)
1394 goto out_free1; 1397 goto out_free1;
1395 r = -EFAULT; 1398 r = -EFAULT;
1396 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) 1399 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
1397 goto out_free1; 1400 goto out_free1;
1398 r = 0; 1401 r = 0;
1399 out_free1: 1402 out_free1:
1400 kfree(kvm_regs); 1403 kfree(kvm_regs);
1401 break; 1404 break;
1402 } 1405 }
1403 case KVM_SET_REGS: { 1406 case KVM_SET_REGS: {
1404 struct kvm_regs *kvm_regs; 1407 struct kvm_regs *kvm_regs;
1405 1408
1406 r = -ENOMEM; 1409 r = -ENOMEM;
1407 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 1410 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
1408 if (!kvm_regs) 1411 if (!kvm_regs)
1409 goto out; 1412 goto out;
1410 r = -EFAULT; 1413 r = -EFAULT;
1411 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) 1414 if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
1412 goto out_free2; 1415 goto out_free2;
1413 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); 1416 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
1414 if (r) 1417 if (r)
1415 goto out_free2; 1418 goto out_free2;
1416 r = 0; 1419 r = 0;
1417 out_free2: 1420 out_free2:
1418 kfree(kvm_regs); 1421 kfree(kvm_regs);
1419 break; 1422 break;
1420 } 1423 }
1421 case KVM_GET_SREGS: { 1424 case KVM_GET_SREGS: {
1422 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1425 kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
1423 r = -ENOMEM; 1426 r = -ENOMEM;
1424 if (!kvm_sregs) 1427 if (!kvm_sregs)
1425 goto out; 1428 goto out;
1426 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); 1429 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
1427 if (r) 1430 if (r)
1428 goto out; 1431 goto out;
1429 r = -EFAULT; 1432 r = -EFAULT;
1430 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) 1433 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
1431 goto out; 1434 goto out;
1432 r = 0; 1435 r = 0;
1433 break; 1436 break;
1434 } 1437 }
1435 case KVM_SET_SREGS: { 1438 case KVM_SET_SREGS: {
1436 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 1439 kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
1437 r = -ENOMEM; 1440 r = -ENOMEM;
1438 if (!kvm_sregs) 1441 if (!kvm_sregs)
1439 goto out; 1442 goto out;
1440 r = -EFAULT; 1443 r = -EFAULT;
1441 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) 1444 if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs)))
1442 goto out; 1445 goto out;
1443 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); 1446 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
1444 if (r) 1447 if (r)
1445 goto out; 1448 goto out;
1446 r = 0; 1449 r = 0;
1447 break; 1450 break;
1448 } 1451 }
1449 case KVM_GET_MP_STATE: { 1452 case KVM_GET_MP_STATE: {
1450 struct kvm_mp_state mp_state; 1453 struct kvm_mp_state mp_state;
1451 1454
1452 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); 1455 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
1453 if (r) 1456 if (r)
1454 goto out; 1457 goto out;
1455 r = -EFAULT; 1458 r = -EFAULT;
1456 if (copy_to_user(argp, &mp_state, sizeof mp_state)) 1459 if (copy_to_user(argp, &mp_state, sizeof mp_state))
1457 goto out; 1460 goto out;
1458 r = 0; 1461 r = 0;
1459 break; 1462 break;
1460 } 1463 }
1461 case KVM_SET_MP_STATE: { 1464 case KVM_SET_MP_STATE: {
1462 struct kvm_mp_state mp_state; 1465 struct kvm_mp_state mp_state;
1463 1466
1464 r = -EFAULT; 1467 r = -EFAULT;
1465 if (copy_from_user(&mp_state, argp, sizeof mp_state)) 1468 if (copy_from_user(&mp_state, argp, sizeof mp_state))
1466 goto out; 1469 goto out;
1467 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 1470 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
1468 if (r) 1471 if (r)
1469 goto out; 1472 goto out;
1470 r = 0; 1473 r = 0;
1471 break; 1474 break;
1472 } 1475 }
1473 case KVM_TRANSLATE: { 1476 case KVM_TRANSLATE: {
1474 struct kvm_translation tr; 1477 struct kvm_translation tr;
1475 1478
1476 r = -EFAULT; 1479 r = -EFAULT;
1477 if (copy_from_user(&tr, argp, sizeof tr)) 1480 if (copy_from_user(&tr, argp, sizeof tr))
1478 goto out; 1481 goto out;
1479 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 1482 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
1480 if (r) 1483 if (r)
1481 goto out; 1484 goto out;
1482 r = -EFAULT; 1485 r = -EFAULT;
1483 if (copy_to_user(argp, &tr, sizeof tr)) 1486 if (copy_to_user(argp, &tr, sizeof tr))
1484 goto out; 1487 goto out;
1485 r = 0; 1488 r = 0;
1486 break; 1489 break;
1487 } 1490 }
1488 case KVM_SET_GUEST_DEBUG: { 1491 case KVM_SET_GUEST_DEBUG: {
1489 struct kvm_guest_debug dbg; 1492 struct kvm_guest_debug dbg;
1490 1493
1491 r = -EFAULT; 1494 r = -EFAULT;
1492 if (copy_from_user(&dbg, argp, sizeof dbg)) 1495 if (copy_from_user(&dbg, argp, sizeof dbg))
1493 goto out; 1496 goto out;
1494 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); 1497 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
1495 if (r) 1498 if (r)
1496 goto out; 1499 goto out;
1497 r = 0; 1500 r = 0;
1498 break; 1501 break;
1499 } 1502 }
1500 case KVM_SET_SIGNAL_MASK: { 1503 case KVM_SET_SIGNAL_MASK: {
1501 struct kvm_signal_mask __user *sigmask_arg = argp; 1504 struct kvm_signal_mask __user *sigmask_arg = argp;
1502 struct kvm_signal_mask kvm_sigmask; 1505 struct kvm_signal_mask kvm_sigmask;
1503 sigset_t sigset, *p; 1506 sigset_t sigset, *p;
1504 1507
1505 p = NULL; 1508 p = NULL;
1506 if (argp) { 1509 if (argp) {
1507 r = -EFAULT; 1510 r = -EFAULT;
1508 if (copy_from_user(&kvm_sigmask, argp, 1511 if (copy_from_user(&kvm_sigmask, argp,
1509 sizeof kvm_sigmask)) 1512 sizeof kvm_sigmask))
1510 goto out; 1513 goto out;
1511 r = -EINVAL; 1514 r = -EINVAL;
1512 if (kvm_sigmask.len != sizeof sigset) 1515 if (kvm_sigmask.len != sizeof sigset)
1513 goto out; 1516 goto out;
1514 r = -EFAULT; 1517 r = -EFAULT;
1515 if (copy_from_user(&sigset, sigmask_arg->sigset, 1518 if (copy_from_user(&sigset, sigmask_arg->sigset,
1516 sizeof sigset)) 1519 sizeof sigset))
1517 goto out; 1520 goto out;
1518 p = &sigset; 1521 p = &sigset;
1519 } 1522 }
1520 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 1523 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
1521 break; 1524 break;
1522 } 1525 }
1523 case KVM_GET_FPU: { 1526 case KVM_GET_FPU: {
1524 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1527 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
1525 r = -ENOMEM; 1528 r = -ENOMEM;
1526 if (!fpu) 1529 if (!fpu)
1527 goto out; 1530 goto out;
1528 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); 1531 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
1529 if (r) 1532 if (r)
1530 goto out; 1533 goto out;
1531 r = -EFAULT; 1534 r = -EFAULT;
1532 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) 1535 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
1533 goto out; 1536 goto out;
1534 r = 0; 1537 r = 0;
1535 break; 1538 break;
1536 } 1539 }
1537 case KVM_SET_FPU: { 1540 case KVM_SET_FPU: {
1538 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 1541 fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
1539 r = -ENOMEM; 1542 r = -ENOMEM;
1540 if (!fpu) 1543 if (!fpu)
1541 goto out; 1544 goto out;
1542 r = -EFAULT; 1545 r = -EFAULT;
1543 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) 1546 if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu)))
1544 goto out; 1547 goto out;
1545 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); 1548 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
1546 if (r) 1549 if (r)
1547 goto out; 1550 goto out;
1548 r = 0; 1551 r = 0;
1549 break; 1552 break;
1550 } 1553 }
1551 default: 1554 default:
1552 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 1555 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
1553 } 1556 }
1554 out: 1557 out:
1555 kfree(fpu); 1558 kfree(fpu);
1556 kfree(kvm_sregs); 1559 kfree(kvm_sregs);
1557 return r; 1560 return r;
1558 } 1561 }
1559 1562
1560 static long kvm_vm_ioctl(struct file *filp, 1563 static long kvm_vm_ioctl(struct file *filp,
1561 unsigned int ioctl, unsigned long arg) 1564 unsigned int ioctl, unsigned long arg)
1562 { 1565 {
1563 struct kvm *kvm = filp->private_data; 1566 struct kvm *kvm = filp->private_data;
1564 void __user *argp = (void __user *)arg; 1567 void __user *argp = (void __user *)arg;
1565 int r; 1568 int r;
1566 1569
1567 if (kvm->mm != current->mm) 1570 if (kvm->mm != current->mm)
1568 return -EIO; 1571 return -EIO;
1569 switch (ioctl) { 1572 switch (ioctl) {
1570 case KVM_CREATE_VCPU: 1573 case KVM_CREATE_VCPU:
1571 r = kvm_vm_ioctl_create_vcpu(kvm, arg); 1574 r = kvm_vm_ioctl_create_vcpu(kvm, arg);
1572 if (r < 0) 1575 if (r < 0)
1573 goto out; 1576 goto out;
1574 break; 1577 break;
1575 case KVM_SET_USER_MEMORY_REGION: { 1578 case KVM_SET_USER_MEMORY_REGION: {
1576 struct kvm_userspace_memory_region kvm_userspace_mem; 1579 struct kvm_userspace_memory_region kvm_userspace_mem;
1577 1580
1578 r = -EFAULT; 1581 r = -EFAULT;
1579 if (copy_from_user(&kvm_userspace_mem, argp, 1582 if (copy_from_user(&kvm_userspace_mem, argp,
1580 sizeof kvm_userspace_mem)) 1583 sizeof kvm_userspace_mem))
1581 goto out; 1584 goto out;
1582 1585
1583 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); 1586 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1);
1584 if (r) 1587 if (r)
1585 goto out; 1588 goto out;
1586 break; 1589 break;
1587 } 1590 }
1588 case KVM_GET_DIRTY_LOG: { 1591 case KVM_GET_DIRTY_LOG: {
1589 struct kvm_dirty_log log; 1592 struct kvm_dirty_log log;
1590 1593
1591 r = -EFAULT; 1594 r = -EFAULT;
1592 if (copy_from_user(&log, argp, sizeof log)) 1595 if (copy_from_user(&log, argp, sizeof log))
1593 goto out; 1596 goto out;
1594 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1597 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
1595 if (r) 1598 if (r)
1596 goto out; 1599 goto out;
1597 break; 1600 break;
1598 } 1601 }
1599 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1602 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1600 case KVM_REGISTER_COALESCED_MMIO: { 1603 case KVM_REGISTER_COALESCED_MMIO: {
1601 struct kvm_coalesced_mmio_zone zone; 1604 struct kvm_coalesced_mmio_zone zone;
1602 r = -EFAULT; 1605 r = -EFAULT;
1603 if (copy_from_user(&zone, argp, sizeof zone)) 1606 if (copy_from_user(&zone, argp, sizeof zone))
1604 goto out; 1607 goto out;
1605 r = -ENXIO; 1608 r = -ENXIO;
1606 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 1609 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
1607 if (r) 1610 if (r)
1608 goto out; 1611 goto out;
1609 r = 0; 1612 r = 0;
1610 break; 1613 break;
1611 } 1614 }
1612 case KVM_UNREGISTER_COALESCED_MMIO: { 1615 case KVM_UNREGISTER_COALESCED_MMIO: {
1613 struct kvm_coalesced_mmio_zone zone; 1616 struct kvm_coalesced_mmio_zone zone;
1614 r = -EFAULT; 1617 r = -EFAULT;
1615 if (copy_from_user(&zone, argp, sizeof zone)) 1618 if (copy_from_user(&zone, argp, sizeof zone))
1616 goto out; 1619 goto out;
1617 r = -ENXIO; 1620 r = -ENXIO;
1618 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 1621 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
1619 if (r) 1622 if (r)
1620 goto out; 1623 goto out;
1621 r = 0; 1624 r = 0;
1622 break; 1625 break;
1623 } 1626 }
1624 #endif 1627 #endif
1625 case KVM_IRQFD: { 1628 case KVM_IRQFD: {
1626 struct kvm_irqfd data; 1629 struct kvm_irqfd data;
1627 1630
1628 r = -EFAULT; 1631 r = -EFAULT;
1629 if (copy_from_user(&data, argp, sizeof data)) 1632 if (copy_from_user(&data, argp, sizeof data))
1630 goto out; 1633 goto out;
1631 r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); 1634 r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
1632 break; 1635 break;
1633 } 1636 }
1634 case KVM_IOEVENTFD: { 1637 case KVM_IOEVENTFD: {
1635 struct kvm_ioeventfd data; 1638 struct kvm_ioeventfd data;
1636 1639
1637 r = -EFAULT; 1640 r = -EFAULT;
1638 if (copy_from_user(&data, argp, sizeof data)) 1641 if (copy_from_user(&data, argp, sizeof data))
1639 goto out; 1642 goto out;
1640 r = kvm_ioeventfd(kvm, &data); 1643 r = kvm_ioeventfd(kvm, &data);
1641 break; 1644 break;
1642 } 1645 }
1643 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1646 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
1644 case KVM_SET_BOOT_CPU_ID: 1647 case KVM_SET_BOOT_CPU_ID:
1645 r = 0; 1648 r = 0;
1646 mutex_lock(&kvm->lock); 1649 mutex_lock(&kvm->lock);
1647 if (atomic_read(&kvm->online_vcpus) != 0) 1650 if (atomic_read(&kvm->online_vcpus) != 0)
1648 r = -EBUSY; 1651 r = -EBUSY;
1649 else 1652 else
1650 kvm->bsp_vcpu_id = arg; 1653 kvm->bsp_vcpu_id = arg;
1651 mutex_unlock(&kvm->lock); 1654 mutex_unlock(&kvm->lock);
1652 break; 1655 break;
1653 #endif 1656 #endif
1654 default: 1657 default:
1655 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 1658 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
1656 if (r == -ENOTTY) 1659 if (r == -ENOTTY)
1657 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); 1660 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
1658 } 1661 }
1659 out: 1662 out:
1660 return r; 1663 return r;
1661 } 1664 }
1662 1665
1663 #ifdef CONFIG_COMPAT 1666 #ifdef CONFIG_COMPAT
1664 struct compat_kvm_dirty_log { 1667 struct compat_kvm_dirty_log {
1665 __u32 slot; 1668 __u32 slot;
1666 __u32 padding1; 1669 __u32 padding1;
1667 union { 1670 union {
1668 compat_uptr_t dirty_bitmap; /* one bit per page */ 1671 compat_uptr_t dirty_bitmap; /* one bit per page */
1669 __u64 padding2; 1672 __u64 padding2;
1670 }; 1673 };
1671 }; 1674 };
1672 1675
1673 static long kvm_vm_compat_ioctl(struct file *filp, 1676 static long kvm_vm_compat_ioctl(struct file *filp,
1674 unsigned int ioctl, unsigned long arg) 1677 unsigned int ioctl, unsigned long arg)
1675 { 1678 {
1676 struct kvm *kvm = filp->private_data; 1679 struct kvm *kvm = filp->private_data;
1677 int r; 1680 int r;
1678 1681
1679 if (kvm->mm != current->mm) 1682 if (kvm->mm != current->mm)
1680 return -EIO; 1683 return -EIO;
1681 switch (ioctl) { 1684 switch (ioctl) {
1682 case KVM_GET_DIRTY_LOG: { 1685 case KVM_GET_DIRTY_LOG: {
1683 struct compat_kvm_dirty_log compat_log; 1686 struct compat_kvm_dirty_log compat_log;
1684 struct kvm_dirty_log log; 1687 struct kvm_dirty_log log;
1685 1688
1686 r = -EFAULT; 1689 r = -EFAULT;
1687 if (copy_from_user(&compat_log, (void __user *)arg, 1690 if (copy_from_user(&compat_log, (void __user *)arg,
1688 sizeof(compat_log))) 1691 sizeof(compat_log)))
1689 goto out; 1692 goto out;
1690 log.slot = compat_log.slot; 1693 log.slot = compat_log.slot;
1691 log.padding1 = compat_log.padding1; 1694 log.padding1 = compat_log.padding1;
1692 log.padding2 = compat_log.padding2; 1695 log.padding2 = compat_log.padding2;
1693 log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); 1696 log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
1694 1697
1695 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 1698 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
1696 if (r) 1699 if (r)
1697 goto out; 1700 goto out;
1698 break; 1701 break;
1699 } 1702 }
1700 default: 1703 default:
1701 r = kvm_vm_ioctl(filp, ioctl, arg); 1704 r = kvm_vm_ioctl(filp, ioctl, arg);
1702 } 1705 }
1703 1706
1704 out: 1707 out:
1705 return r; 1708 return r;
1706 } 1709 }
1707 #endif 1710 #endif
1708 1711
1709 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1712 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1710 { 1713 {
1711 struct page *page[1]; 1714 struct page *page[1];
1712 unsigned long addr; 1715 unsigned long addr;
1713 int npages; 1716 int npages;
1714 gfn_t gfn = vmf->pgoff; 1717 gfn_t gfn = vmf->pgoff;
1715 struct kvm *kvm = vma->vm_file->private_data; 1718 struct kvm *kvm = vma->vm_file->private_data;
1716 1719
1717 addr = gfn_to_hva(kvm, gfn); 1720 addr = gfn_to_hva(kvm, gfn);
1718 if (kvm_is_error_hva(addr)) 1721 if (kvm_is_error_hva(addr))
1719 return VM_FAULT_SIGBUS; 1722 return VM_FAULT_SIGBUS;
1720 1723
1721 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, 1724 npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
1722 NULL); 1725 NULL);
1723 if (unlikely(npages != 1)) 1726 if (unlikely(npages != 1))
1724 return VM_FAULT_SIGBUS; 1727 return VM_FAULT_SIGBUS;
1725 1728
1726 vmf->page = page[0]; 1729 vmf->page = page[0];
1727 return 0; 1730 return 0;
1728 } 1731 }
1729 1732
1730 static const struct vm_operations_struct kvm_vm_vm_ops = { 1733 static const struct vm_operations_struct kvm_vm_vm_ops = {
1731 .fault = kvm_vm_fault, 1734 .fault = kvm_vm_fault,
1732 }; 1735 };
1733 1736
1734 static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) 1737 static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
1735 { 1738 {
1736 vma->vm_ops = &kvm_vm_vm_ops; 1739 vma->vm_ops = &kvm_vm_vm_ops;
1737 return 0; 1740 return 0;
1738 } 1741 }
1739 1742
1740 static struct file_operations kvm_vm_fops = { 1743 static struct file_operations kvm_vm_fops = {
1741 .release = kvm_vm_release, 1744 .release = kvm_vm_release,
1742 .unlocked_ioctl = kvm_vm_ioctl, 1745 .unlocked_ioctl = kvm_vm_ioctl,
1743 #ifdef CONFIG_COMPAT 1746 #ifdef CONFIG_COMPAT
1744 .compat_ioctl = kvm_vm_compat_ioctl, 1747 .compat_ioctl = kvm_vm_compat_ioctl,
1745 #endif 1748 #endif
1746 .mmap = kvm_vm_mmap, 1749 .mmap = kvm_vm_mmap,
1747 }; 1750 };
1748 1751
1749 static int kvm_dev_ioctl_create_vm(void) 1752 static int kvm_dev_ioctl_create_vm(void)
1750 { 1753 {
1751 int fd; 1754 int fd;
1752 struct kvm *kvm; 1755 struct kvm *kvm;
1753 1756
1754 kvm = kvm_create_vm(); 1757 kvm = kvm_create_vm();
1755 if (IS_ERR(kvm)) 1758 if (IS_ERR(kvm))
1756 return PTR_ERR(kvm); 1759 return PTR_ERR(kvm);
1757 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); 1760 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
1758 if (fd < 0) 1761 if (fd < 0)
1759 kvm_put_kvm(kvm); 1762 kvm_put_kvm(kvm);
1760 1763
1761 return fd; 1764 return fd;
1762 } 1765 }
1763 1766
1764 static long kvm_dev_ioctl_check_extension_generic(long arg) 1767 static long kvm_dev_ioctl_check_extension_generic(long arg)
1765 { 1768 {
1766 switch (arg) { 1769 switch (arg) {
1767 case KVM_CAP_USER_MEMORY: 1770 case KVM_CAP_USER_MEMORY:
1768 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 1771 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
1769 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: 1772 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
1770 #ifdef CONFIG_KVM_APIC_ARCHITECTURE 1773 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
1771 case KVM_CAP_SET_BOOT_CPU_ID: 1774 case KVM_CAP_SET_BOOT_CPU_ID:
1772 #endif 1775 #endif
1773 case KVM_CAP_INTERNAL_ERROR_DATA: 1776 case KVM_CAP_INTERNAL_ERROR_DATA:
1774 return 1; 1777 return 1;
1775 #ifdef CONFIG_HAVE_KVM_IRQCHIP 1778 #ifdef CONFIG_HAVE_KVM_IRQCHIP
1776 case KVM_CAP_IRQ_ROUTING: 1779 case KVM_CAP_IRQ_ROUTING:
1777 return KVM_MAX_IRQ_ROUTES; 1780 return KVM_MAX_IRQ_ROUTES;
1778 #endif 1781 #endif
1779 default: 1782 default:
1780 break; 1783 break;
1781 } 1784 }
1782 return kvm_dev_ioctl_check_extension(arg); 1785 return kvm_dev_ioctl_check_extension(arg);
1783 } 1786 }
1784 1787
1785 static long kvm_dev_ioctl(struct file *filp, 1788 static long kvm_dev_ioctl(struct file *filp,
1786 unsigned int ioctl, unsigned long arg) 1789 unsigned int ioctl, unsigned long arg)
1787 { 1790 {
1788 long r = -EINVAL; 1791 long r = -EINVAL;
1789 1792
1790 switch (ioctl) { 1793 switch (ioctl) {
1791 case KVM_GET_API_VERSION: 1794 case KVM_GET_API_VERSION:
1792 r = -EINVAL; 1795 r = -EINVAL;
1793 if (arg) 1796 if (arg)
1794 goto out; 1797 goto out;
1795 r = KVM_API_VERSION; 1798 r = KVM_API_VERSION;
1796 break; 1799 break;
1797 case KVM_CREATE_VM: 1800 case KVM_CREATE_VM:
1798 r = -EINVAL; 1801 r = -EINVAL;
1799 if (arg) 1802 if (arg)
1800 goto out; 1803 goto out;
1801 r = kvm_dev_ioctl_create_vm(); 1804 r = kvm_dev_ioctl_create_vm();
1802 break; 1805 break;
1803 case KVM_CHECK_EXTENSION: 1806 case KVM_CHECK_EXTENSION:
1804 r = kvm_dev_ioctl_check_extension_generic(arg); 1807 r = kvm_dev_ioctl_check_extension_generic(arg);
1805 break; 1808 break;
1806 case KVM_GET_VCPU_MMAP_SIZE: 1809 case KVM_GET_VCPU_MMAP_SIZE:
1807 r = -EINVAL; 1810 r = -EINVAL;
1808 if (arg) 1811 if (arg)
1809 goto out; 1812 goto out;
1810 r = PAGE_SIZE; /* struct kvm_run */ 1813 r = PAGE_SIZE; /* struct kvm_run */
1811 #ifdef CONFIG_X86 1814 #ifdef CONFIG_X86
1812 r += PAGE_SIZE; /* pio data page */ 1815 r += PAGE_SIZE; /* pio data page */
1813 #endif 1816 #endif
1814 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 1817 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1815 r += PAGE_SIZE; /* coalesced mmio ring page */ 1818 r += PAGE_SIZE; /* coalesced mmio ring page */
1816 #endif 1819 #endif
1817 break; 1820 break;
1818 case KVM_TRACE_ENABLE: 1821 case KVM_TRACE_ENABLE:
1819 case KVM_TRACE_PAUSE: 1822 case KVM_TRACE_PAUSE:
1820 case KVM_TRACE_DISABLE: 1823 case KVM_TRACE_DISABLE:
1821 r = -EOPNOTSUPP; 1824 r = -EOPNOTSUPP;
1822 break; 1825 break;
1823 default: 1826 default:
1824 return kvm_arch_dev_ioctl(filp, ioctl, arg); 1827 return kvm_arch_dev_ioctl(filp, ioctl, arg);
1825 } 1828 }
1826 out: 1829 out:
1827 return r; 1830 return r;
1828 } 1831 }
1829 1832
1830 static struct file_operations kvm_chardev_ops = { 1833 static struct file_operations kvm_chardev_ops = {
1831 .unlocked_ioctl = kvm_dev_ioctl, 1834 .unlocked_ioctl = kvm_dev_ioctl,
1832 .compat_ioctl = kvm_dev_ioctl, 1835 .compat_ioctl = kvm_dev_ioctl,
1833 }; 1836 };
1834 1837
1835 static struct miscdevice kvm_dev = { 1838 static struct miscdevice kvm_dev = {
1836 KVM_MINOR, 1839 KVM_MINOR,
1837 "kvm", 1840 "kvm",
1838 &kvm_chardev_ops, 1841 &kvm_chardev_ops,
1839 }; 1842 };
1840 1843
1841 static void hardware_enable(void *junk) 1844 static void hardware_enable(void *junk)
1842 { 1845 {
1843 int cpu = raw_smp_processor_id(); 1846 int cpu = raw_smp_processor_id();
1844 int r; 1847 int r;
1845 1848
1846 if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1849 if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
1847 return; 1850 return;
1848 1851
1849 cpumask_set_cpu(cpu, cpus_hardware_enabled); 1852 cpumask_set_cpu(cpu, cpus_hardware_enabled);
1850 1853
1851 r = kvm_arch_hardware_enable(NULL); 1854 r = kvm_arch_hardware_enable(NULL);
1852 1855
1853 if (r) { 1856 if (r) {
1854 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 1857 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
1855 atomic_inc(&hardware_enable_failed); 1858 atomic_inc(&hardware_enable_failed);
1856 printk(KERN_INFO "kvm: enabling virtualization on " 1859 printk(KERN_INFO "kvm: enabling virtualization on "
1857 "CPU%d failed\n", cpu); 1860 "CPU%d failed\n", cpu);
1858 } 1861 }
1859 } 1862 }
1860 1863
1861 static void hardware_disable(void *junk) 1864 static void hardware_disable(void *junk)
1862 { 1865 {
1863 int cpu = raw_smp_processor_id(); 1866 int cpu = raw_smp_processor_id();
1864 1867
1865 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) 1868 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
1866 return; 1869 return;
1867 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 1870 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
1868 kvm_arch_hardware_disable(NULL); 1871 kvm_arch_hardware_disable(NULL);
1869 } 1872 }
1870 1873
1871 static void hardware_disable_all_nolock(void) 1874 static void hardware_disable_all_nolock(void)
1872 { 1875 {
1873 BUG_ON(!kvm_usage_count); 1876 BUG_ON(!kvm_usage_count);
1874 1877
1875 kvm_usage_count--; 1878 kvm_usage_count--;
1876 if (!kvm_usage_count) 1879 if (!kvm_usage_count)
1877 on_each_cpu(hardware_disable, NULL, 1); 1880 on_each_cpu(hardware_disable, NULL, 1);
1878 } 1881 }
1879 1882
1880 static void hardware_disable_all(void) 1883 static void hardware_disable_all(void)
1881 { 1884 {
1882 spin_lock(&kvm_lock); 1885 spin_lock(&kvm_lock);
1883 hardware_disable_all_nolock(); 1886 hardware_disable_all_nolock();
1884 spin_unlock(&kvm_lock); 1887 spin_unlock(&kvm_lock);
1885 } 1888 }
1886 1889
1887 static int hardware_enable_all(void) 1890 static int hardware_enable_all(void)
1888 { 1891 {
1889 int r = 0; 1892 int r = 0;
1890 1893
1891 spin_lock(&kvm_lock); 1894 spin_lock(&kvm_lock);
1892 1895
1893 kvm_usage_count++; 1896 kvm_usage_count++;
1894 if (kvm_usage_count == 1) { 1897 if (kvm_usage_count == 1) {
1895 atomic_set(&hardware_enable_failed, 0); 1898 atomic_set(&hardware_enable_failed, 0);
1896 on_each_cpu(hardware_enable, NULL, 1); 1899 on_each_cpu(hardware_enable, NULL, 1);
1897 1900
1898 if (atomic_read(&hardware_enable_failed)) { 1901 if (atomic_read(&hardware_enable_failed)) {
1899 hardware_disable_all_nolock(); 1902 hardware_disable_all_nolock();
1900 r = -EBUSY; 1903 r = -EBUSY;
1901 } 1904 }
1902 } 1905 }
1903 1906
1904 spin_unlock(&kvm_lock); 1907 spin_unlock(&kvm_lock);
1905 1908
1906 return r; 1909 return r;
1907 } 1910 }
1908 1911
1909 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, 1912 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
1910 void *v) 1913 void *v)
1911 { 1914 {
1912 int cpu = (long)v; 1915 int cpu = (long)v;
1913 1916
1914 if (!kvm_usage_count) 1917 if (!kvm_usage_count)
1915 return NOTIFY_OK; 1918 return NOTIFY_OK;
1916 1919
1917 val &= ~CPU_TASKS_FROZEN; 1920 val &= ~CPU_TASKS_FROZEN;
1918 switch (val) { 1921 switch (val) {
1919 case CPU_DYING: 1922 case CPU_DYING:
1920 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1923 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
1921 cpu); 1924 cpu);
1922 hardware_disable(NULL); 1925 hardware_disable(NULL);
1923 break; 1926 break;
1924 case CPU_UP_CANCELED: 1927 case CPU_UP_CANCELED:
1925 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 1928 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
1926 cpu); 1929 cpu);
1927 smp_call_function_single(cpu, hardware_disable, NULL, 1); 1930 smp_call_function_single(cpu, hardware_disable, NULL, 1);
1928 break; 1931 break;
1929 case CPU_ONLINE: 1932 case CPU_ONLINE:
1930 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 1933 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
1931 cpu); 1934 cpu);
1932 smp_call_function_single(cpu, hardware_enable, NULL, 1); 1935 smp_call_function_single(cpu, hardware_enable, NULL, 1);
1933 break; 1936 break;
1934 } 1937 }
1935 return NOTIFY_OK; 1938 return NOTIFY_OK;
1936 } 1939 }
1937 1940
1938 1941
1939 asmlinkage void kvm_handle_fault_on_reboot(void) 1942 asmlinkage void kvm_handle_fault_on_reboot(void)
1940 { 1943 {
1941 if (kvm_rebooting) 1944 if (kvm_rebooting)
1942 /* spin while reset goes on */ 1945 /* spin while reset goes on */
1943 while (true) 1946 while (true)
1944 ; 1947 ;
1945 /* Fault while not rebooting. We want the trace. */ 1948 /* Fault while not rebooting. We want the trace. */
1946 BUG(); 1949 BUG();
1947 } 1950 }
1948 EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); 1951 EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
1949 1952
1950 static int kvm_reboot(struct notifier_block *notifier, unsigned long val, 1953 static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
1951 void *v) 1954 void *v)
1952 { 1955 {
1953 /* 1956 /*
1954 * Some (well, at least mine) BIOSes hang on reboot if 1957 * Some (well, at least mine) BIOSes hang on reboot if
1955 * in vmx root mode. 1958 * in vmx root mode.
1956 * 1959 *
1957 * And Intel TXT required VMX off for all cpu when system shutdown. 1960 * And Intel TXT required VMX off for all cpu when system shutdown.
1958 */ 1961 */
1959 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 1962 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
1960 kvm_rebooting = true; 1963 kvm_rebooting = true;
1961 on_each_cpu(hardware_disable, NULL, 1); 1964 on_each_cpu(hardware_disable, NULL, 1);
1962 return NOTIFY_OK; 1965 return NOTIFY_OK;
1963 } 1966 }
1964 1967
1965 static struct notifier_block kvm_reboot_notifier = { 1968 static struct notifier_block kvm_reboot_notifier = {
1966 .notifier_call = kvm_reboot, 1969 .notifier_call = kvm_reboot,
1967 .priority = 0, 1970 .priority = 0,
1968 }; 1971 };
1969 1972
1970 static void kvm_io_bus_destroy(struct kvm_io_bus *bus) 1973 static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1971 { 1974 {
1972 int i; 1975 int i;
1973 1976
1974 for (i = 0; i < bus->dev_count; i++) { 1977 for (i = 0; i < bus->dev_count; i++) {
1975 struct kvm_io_device *pos = bus->devs[i]; 1978 struct kvm_io_device *pos = bus->devs[i];
1976 1979
1977 kvm_iodevice_destructor(pos); 1980 kvm_iodevice_destructor(pos);
1978 } 1981 }
1979 kfree(bus); 1982 kfree(bus);
1980 } 1983 }
1981 1984
1982 /* kvm_io_bus_write - called under kvm->slots_lock */ 1985 /* kvm_io_bus_write - called under kvm->slots_lock */
1983 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 1986 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
1984 int len, const void *val) 1987 int len, const void *val)
1985 { 1988 {
1986 int i; 1989 int i;
1987 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); 1990 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
1988 for (i = 0; i < bus->dev_count; i++) 1991 for (i = 0; i < bus->dev_count; i++)
1989 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 1992 if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
1990 return 0; 1993 return 0;
1991 return -EOPNOTSUPP; 1994 return -EOPNOTSUPP;
1992 } 1995 }
1993 1996
1994 /* kvm_io_bus_read - called under kvm->slots_lock */ 1997 /* kvm_io_bus_read - called under kvm->slots_lock */
1995 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 1998 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
1996 int len, void *val) 1999 int len, void *val)
1997 { 2000 {
1998 int i; 2001 int i;
1999 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); 2002 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
2000 2003
2001 for (i = 0; i < bus->dev_count; i++) 2004 for (i = 0; i < bus->dev_count; i++)
2002 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 2005 if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
2003 return 0; 2006 return 0;
2004 return -EOPNOTSUPP; 2007 return -EOPNOTSUPP;
2005 } 2008 }
2006 2009
2007 /* Caller must hold slots_lock. */ 2010 /* Caller must hold slots_lock. */
2008 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, 2011 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2009 struct kvm_io_device *dev) 2012 struct kvm_io_device *dev)
2010 { 2013 {
2011 struct kvm_io_bus *new_bus, *bus; 2014 struct kvm_io_bus *new_bus, *bus;
2012 2015
2013 bus = kvm->buses[bus_idx]; 2016 bus = kvm->buses[bus_idx];
2014 if (bus->dev_count > NR_IOBUS_DEVS-1) 2017 if (bus->dev_count > NR_IOBUS_DEVS-1)
2015 return -ENOSPC; 2018 return -ENOSPC;
2016 2019
2017 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); 2020 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
2018 if (!new_bus) 2021 if (!new_bus)
2019 return -ENOMEM; 2022 return -ENOMEM;
2020 memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); 2023 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
2021 new_bus->devs[new_bus->dev_count++] = dev; 2024 new_bus->devs[new_bus->dev_count++] = dev;
2022 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 2025 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2023 synchronize_srcu_expedited(&kvm->srcu); 2026 synchronize_srcu_expedited(&kvm->srcu);
2024 kfree(bus); 2027 kfree(bus);
2025 2028
2026 return 0; 2029 return 0;
2027 } 2030 }
2028 2031
2029 /* Caller must hold slots_lock. */ 2032 /* Caller must hold slots_lock. */
2030 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, 2033 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2031 struct kvm_io_device *dev) 2034 struct kvm_io_device *dev)
2032 { 2035 {
2033 int i, r; 2036 int i, r;
2034 struct kvm_io_bus *new_bus, *bus; 2037 struct kvm_io_bus *new_bus, *bus;
2035 2038
2036 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); 2039 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
2037 if (!new_bus) 2040 if (!new_bus)
2038 return -ENOMEM; 2041 return -ENOMEM;
2039 2042
2040 bus = kvm->buses[bus_idx]; 2043 bus = kvm->buses[bus_idx];
2041 memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); 2044 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
2042 2045
2043 r = -ENOENT; 2046 r = -ENOENT;
2044 for (i = 0; i < new_bus->dev_count; i++) 2047 for (i = 0; i < new_bus->dev_count; i++)
2045 if (new_bus->devs[i] == dev) { 2048 if (new_bus->devs[i] == dev) {
2046 r = 0; 2049 r = 0;
2047 new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; 2050 new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
2048 break; 2051 break;
2049 } 2052 }
2050 2053
2051 if (r) { 2054 if (r) {
2052 kfree(new_bus); 2055 kfree(new_bus);
2053 return r; 2056 return r;
2054 } 2057 }
2055 2058
2056 rcu_assign_pointer(kvm->buses[bus_idx], new_bus); 2059 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2057 synchronize_srcu_expedited(&kvm->srcu); 2060 synchronize_srcu_expedited(&kvm->srcu);
2058 kfree(bus); 2061 kfree(bus);
2059 return r; 2062 return r;
2060 } 2063 }
2061 2064
2062 static struct notifier_block kvm_cpu_notifier = { 2065 static struct notifier_block kvm_cpu_notifier = {
2063 .notifier_call = kvm_cpu_hotplug, 2066 .notifier_call = kvm_cpu_hotplug,
2064 .priority = 20, /* must be > scheduler priority */ 2067 .priority = 20, /* must be > scheduler priority */
2065 }; 2068 };
2066 2069
2067 static int vm_stat_get(void *_offset, u64 *val) 2070 static int vm_stat_get(void *_offset, u64 *val)
2068 { 2071 {
2069 unsigned offset = (long)_offset; 2072 unsigned offset = (long)_offset;
2070 struct kvm *kvm; 2073 struct kvm *kvm;
2071 2074
2072 *val = 0; 2075 *val = 0;
2073 spin_lock(&kvm_lock); 2076 spin_lock(&kvm_lock);
2074 list_for_each_entry(kvm, &vm_list, vm_list) 2077 list_for_each_entry(kvm, &vm_list, vm_list)
2075 *val += *(u32 *)((void *)kvm + offset); 2078 *val += *(u32 *)((void *)kvm + offset);
2076 spin_unlock(&kvm_lock); 2079 spin_unlock(&kvm_lock);
2077 return 0; 2080 return 0;
2078 } 2081 }
2079 2082
2080 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n"); 2083 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
2081 2084
2082 static int vcpu_stat_get(void *_offset, u64 *val) 2085 static int vcpu_stat_get(void *_offset, u64 *val)
2083 { 2086 {
2084 unsigned offset = (long)_offset; 2087 unsigned offset = (long)_offset;
2085 struct kvm *kvm; 2088 struct kvm *kvm;
2086 struct kvm_vcpu *vcpu; 2089 struct kvm_vcpu *vcpu;
2087 int i; 2090 int i;
2088 2091
2089 *val = 0; 2092 *val = 0;
2090 spin_lock(&kvm_lock); 2093 spin_lock(&kvm_lock);
2091 list_for_each_entry(kvm, &vm_list, vm_list) 2094 list_for_each_entry(kvm, &vm_list, vm_list)
2092 kvm_for_each_vcpu(i, vcpu, kvm) 2095 kvm_for_each_vcpu(i, vcpu, kvm)
2093 *val += *(u32 *)((void *)vcpu + offset); 2096 *val += *(u32 *)((void *)vcpu + offset);
2094 2097
2095 spin_unlock(&kvm_lock); 2098 spin_unlock(&kvm_lock);
2096 return 0; 2099 return 0;
2097 } 2100 }
2098 2101
2099 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n"); 2102 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
2100 2103
2101 static const struct file_operations *stat_fops[] = { 2104 static const struct file_operations *stat_fops[] = {
2102 [KVM_STAT_VCPU] = &vcpu_stat_fops, 2105 [KVM_STAT_VCPU] = &vcpu_stat_fops,
2103 [KVM_STAT_VM] = &vm_stat_fops, 2106 [KVM_STAT_VM] = &vm_stat_fops,
2104 }; 2107 };
2105 2108
2106 static void kvm_init_debug(void) 2109 static void kvm_init_debug(void)
2107 { 2110 {
2108 struct kvm_stats_debugfs_item *p; 2111 struct kvm_stats_debugfs_item *p;
2109 2112
2110 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); 2113 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
2111 for (p = debugfs_entries; p->name; ++p) 2114 for (p = debugfs_entries; p->name; ++p)
2112 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, 2115 p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
2113 (void *)(long)p->offset, 2116 (void *)(long)p->offset,
2114 stat_fops[p->kind]); 2117 stat_fops[p->kind]);
2115 } 2118 }
2116 2119
2117 static void kvm_exit_debug(void) 2120 static void kvm_exit_debug(void)
2118 { 2121 {
2119 struct kvm_stats_debugfs_item *p; 2122 struct kvm_stats_debugfs_item *p;
2120 2123
2121 for (p = debugfs_entries; p->name; ++p) 2124 for (p = debugfs_entries; p->name; ++p)
2122 debugfs_remove(p->dentry); 2125 debugfs_remove(p->dentry);
2123 debugfs_remove(kvm_debugfs_dir); 2126 debugfs_remove(kvm_debugfs_dir);
2124 } 2127 }
2125 2128
2126 static int kvm_suspend(struct sys_device *dev, pm_message_t state) 2129 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
2127 { 2130 {
2128 if (kvm_usage_count) 2131 if (kvm_usage_count)
2129 hardware_disable(NULL); 2132 hardware_disable(NULL);
2130 return 0; 2133 return 0;
2131 } 2134 }
2132 2135
2133 static int kvm_resume(struct sys_device *dev) 2136 static int kvm_resume(struct sys_device *dev)
2134 { 2137 {
2135 if (kvm_usage_count) 2138 if (kvm_usage_count)
2136 hardware_enable(NULL); 2139 hardware_enable(NULL);
2137 return 0; 2140 return 0;
2138 } 2141 }
2139 2142
2140 static struct sysdev_class kvm_sysdev_class = { 2143 static struct sysdev_class kvm_sysdev_class = {
2141 .name = "kvm", 2144 .name = "kvm",
2142 .suspend = kvm_suspend, 2145 .suspend = kvm_suspend,
2143 .resume = kvm_resume, 2146 .resume = kvm_resume,
2144 }; 2147 };
2145 2148
2146 static struct sys_device kvm_sysdev = { 2149 static struct sys_device kvm_sysdev = {
2147 .id = 0, 2150 .id = 0,
2148 .cls = &kvm_sysdev_class, 2151 .cls = &kvm_sysdev_class,
2149 }; 2152 };
2150 2153
2151 struct page *bad_page; 2154 struct page *bad_page;
2152 pfn_t bad_pfn; 2155 pfn_t bad_pfn;
2153 2156
2154 static inline 2157 static inline
2155 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 2158 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
2156 { 2159 {
2157 return container_of(pn, struct kvm_vcpu, preempt_notifier); 2160 return container_of(pn, struct kvm_vcpu, preempt_notifier);
2158 } 2161 }
2159 2162
2160 static void kvm_sched_in(struct preempt_notifier *pn, int cpu) 2163 static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
2161 { 2164 {
2162 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 2165 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
2163 2166
2164 kvm_arch_vcpu_load(vcpu, cpu); 2167 kvm_arch_vcpu_load(vcpu, cpu);
2165 } 2168 }
2166 2169
2167 static void kvm_sched_out(struct preempt_notifier *pn, 2170 static void kvm_sched_out(struct preempt_notifier *pn,
2168 struct task_struct *next) 2171 struct task_struct *next)
2169 { 2172 {
2170 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 2173 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
2171 2174
2172 kvm_arch_vcpu_put(vcpu); 2175 kvm_arch_vcpu_put(vcpu);
2173 } 2176 }
2174 2177
2175 int kvm_init(void *opaque, unsigned int vcpu_size, 2178 int kvm_init(void *opaque, unsigned int vcpu_size,
2176 struct module *module) 2179 struct module *module)
2177 { 2180 {
2178 int r; 2181 int r;
2179 int cpu; 2182 int cpu;
2180 2183
2181 r = kvm_arch_init(opaque); 2184 r = kvm_arch_init(opaque);
2182 if (r) 2185 if (r)
2183 goto out_fail; 2186 goto out_fail;
2184 2187
2185 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 2188 bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2186 2189
2187 if (bad_page == NULL) { 2190 if (bad_page == NULL) {
2188 r = -ENOMEM; 2191 r = -ENOMEM;
2189 goto out; 2192 goto out;
2190 } 2193 }
2191 2194
2192 bad_pfn = page_to_pfn(bad_page); 2195 bad_pfn = page_to_pfn(bad_page);
2193 2196
2194 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { 2197 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
2195 r = -ENOMEM; 2198 r = -ENOMEM;
2196 goto out_free_0; 2199 goto out_free_0;
2197 } 2200 }
2198 2201
2199 r = kvm_arch_hardware_setup(); 2202 r = kvm_arch_hardware_setup();
2200 if (r < 0) 2203 if (r < 0)
2201 goto out_free_0a; 2204 goto out_free_0a;
2202 2205
2203 for_each_online_cpu(cpu) { 2206 for_each_online_cpu(cpu) {
2204 smp_call_function_single(cpu, 2207 smp_call_function_single(cpu,
2205 kvm_arch_check_processor_compat, 2208 kvm_arch_check_processor_compat,
2206 &r, 1); 2209 &r, 1);
2207 if (r < 0) 2210 if (r < 0)
2208 goto out_free_1; 2211 goto out_free_1;
2209 } 2212 }
2210 2213
2211 r = register_cpu_notifier(&kvm_cpu_notifier); 2214 r = register_cpu_notifier(&kvm_cpu_notifier);
2212 if (r) 2215 if (r)
2213 goto out_free_2; 2216 goto out_free_2;
2214 register_reboot_notifier(&kvm_reboot_notifier); 2217 register_reboot_notifier(&kvm_reboot_notifier);
2215 2218
2216 r = sysdev_class_register(&kvm_sysdev_class); 2219 r = sysdev_class_register(&kvm_sysdev_class);
2217 if (r) 2220 if (r)
2218 goto out_free_3; 2221 goto out_free_3;
2219 2222
2220 r = sysdev_register(&kvm_sysdev); 2223 r = sysdev_register(&kvm_sysdev);
2221 if (r) 2224 if (r)
2222 goto out_free_4; 2225 goto out_free_4;
2223 2226
2224 /* A kmem cache lets us meet the alignment requirements of fx_save. */ 2227 /* A kmem cache lets us meet the alignment requirements of fx_save. */
2225 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, 2228 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
2226 __alignof__(struct kvm_vcpu), 2229 __alignof__(struct kvm_vcpu),
2227 0, NULL); 2230 0, NULL);
2228 if (!kvm_vcpu_cache) { 2231 if (!kvm_vcpu_cache) {
2229 r = -ENOMEM; 2232 r = -ENOMEM;
2230 goto out_free_5; 2233 goto out_free_5;
2231 } 2234 }
2232 2235
2233 kvm_chardev_ops.owner = module; 2236 kvm_chardev_ops.owner = module;
2234 kvm_vm_fops.owner = module; 2237 kvm_vm_fops.owner = module;
2235 kvm_vcpu_fops.owner = module; 2238 kvm_vcpu_fops.owner = module;
2236 2239
2237 r = misc_register(&kvm_dev); 2240 r = misc_register(&kvm_dev);
2238 if (r) { 2241 if (r) {
2239 printk(KERN_ERR "kvm: misc device register failed\n"); 2242 printk(KERN_ERR "kvm: misc device register failed\n");
2240 goto out_free; 2243 goto out_free;
2241 } 2244 }
2242 2245
2243 kvm_preempt_ops.sched_in = kvm_sched_in; 2246 kvm_preempt_ops.sched_in = kvm_sched_in;
2244 kvm_preempt_ops.sched_out = kvm_sched_out; 2247 kvm_preempt_ops.sched_out = kvm_sched_out;
2245 2248
2246 kvm_init_debug(); 2249 kvm_init_debug();
2247 2250
2248 return 0; 2251 return 0;
2249 2252
2250 out_free: 2253 out_free:
2251 kmem_cache_destroy(kvm_vcpu_cache); 2254 kmem_cache_destroy(kvm_vcpu_cache);
2252 out_free_5: 2255 out_free_5:
2253 sysdev_unregister(&kvm_sysdev); 2256 sysdev_unregister(&kvm_sysdev);
2254 out_free_4: 2257 out_free_4:
2255 sysdev_class_unregister(&kvm_sysdev_class); 2258 sysdev_class_unregister(&kvm_sysdev_class);
2256 out_free_3: 2259 out_free_3:
2257 unregister_reboot_notifier(&kvm_reboot_notifier); 2260 unregister_reboot_notifier(&kvm_reboot_notifier);
2258 unregister_cpu_notifier(&kvm_cpu_notifier); 2261 unregister_cpu_notifier(&kvm_cpu_notifier);
2259 out_free_2: 2262 out_free_2:
2260 out_free_1: 2263 out_free_1:
2261 kvm_arch_hardware_unsetup(); 2264 kvm_arch_hardware_unsetup();
2262 out_free_0a: 2265 out_free_0a:
2263 free_cpumask_var(cpus_hardware_enabled); 2266 free_cpumask_var(cpus_hardware_enabled);
2264 out_free_0: 2267 out_free_0:
2265 __free_page(bad_page); 2268 __free_page(bad_page);
2266 out: 2269 out:
2267 kvm_arch_exit(); 2270 kvm_arch_exit();
2268 out_fail: 2271 out_fail:
2269 return r; 2272 return r;
2270 } 2273 }
2271 EXPORT_SYMBOL_GPL(kvm_init); 2274 EXPORT_SYMBOL_GPL(kvm_init);
2272 2275
2273 void kvm_exit(void) 2276 void kvm_exit(void)
2274 { 2277 {
2275 tracepoint_synchronize_unregister(); 2278 tracepoint_synchronize_unregister();
2276 kvm_exit_debug(); 2279 kvm_exit_debug();
2277 misc_deregister(&kvm_dev); 2280 misc_deregister(&kvm_dev);
2278 kmem_cache_destroy(kvm_vcpu_cache); 2281 kmem_cache_destroy(kvm_vcpu_cache);
2279 sysdev_unregister(&kvm_sysdev); 2282 sysdev_unregister(&kvm_sysdev);
2280 sysdev_class_unregister(&kvm_sysdev_class); 2283 sysdev_class_unregister(&kvm_sysdev_class);
2281 unregister_reboot_notifier(&kvm_reboot_notifier); 2284 unregister_reboot_notifier(&kvm_reboot_notifier);
2282 unregister_cpu_notifier(&kvm_cpu_notifier); 2285 unregister_cpu_notifier(&kvm_cpu_notifier);
2283 on_each_cpu(hardware_disable, NULL, 1); 2286 on_each_cpu(hardware_disable, NULL, 1);
2284 kvm_arch_hardware_unsetup(); 2287 kvm_arch_hardware_unsetup();
2285 kvm_arch_exit(); 2288 kvm_arch_exit();
2286 free_cpumask_var(cpus_hardware_enabled); 2289 free_cpumask_var(cpus_hardware_enabled);
2287 __free_page(bad_page); 2290 __free_page(bad_page);
2288 } 2291 }
2289 EXPORT_SYMBOL_GPL(kvm_exit); 2292 EXPORT_SYMBOL_GPL(kvm_exit);
2290 2293